From c3a552f26e44d47bb66fd937981eeb9ce2169da0 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 21 Jan 2025 15:42:30 -0700 Subject: [PATCH] chore: merge comet-parquet-exec branch into main (#1318) --- .../comet/parquet/AbstractColumnReader.java | 14 + .../org/apache/comet/parquet/BatchReader.java | 2 +- .../java/org/apache/comet/parquet/Native.java | 52 + .../comet/parquet/NativeBatchReader.java | 523 ++++ .../comet/parquet/NativeColumnReader.java | 159 ++ .../org/apache/comet/vector/CometVector.java | 2 +- .../scala/org/apache/comet/CometConf.scala | 21 + .../spark/sql/comet/CometArrowUtils.scala | 180 ++ docs/source/contributor-guide/benchmarking.md | 4 + native/Cargo.lock | 5 +- native/Cargo.toml | 2 + native/core/Cargo.toml | 5 +- native/core/src/execution/jni_api.rs | 7 +- native/core/src/execution/operators/filter.rs | 2 +- native/core/src/execution/planner.rs | 204 +- native/core/src/parquet/mod.rs | 227 +- native/core/src/parquet/parquet_support.rs | 2338 +++++++++++++++++ native/core/src/parquet/schema_adapter.rs | 626 +++++ native/core/src/parquet/util/jni.rs | 54 + native/proto/src/proto/operator.proto | 36 + native/spark-expr/src/avg.rs | 341 +++ native/spark-expr/src/avg_decimal.rs | 522 ++++ .../spark-expr/src/conversion_funcs/cast.rs | 34 +- native/spark-expr/src/covariance.rs | 306 +++ native/spark-expr/src/strings.rs | 290 ++ native/spark-expr/src/sum_decimal.rs | 555 ++++ native/spark-expr/src/temporal.rs | 510 ++++ native/spark-expr/src/utils.rs | 41 +- native/spark-expr/src/variance.rs | 247 ++ .../org/apache/comet/CometExecIterator.scala | 1 + .../comet/CometSparkSessionExtensions.scala | 61 +- .../org/apache/comet/DataTypeSupport.scala | 4 +- .../main/scala/org/apache/comet/Native.scala | 2 + .../parquet/CometParquetFileFormat.scala | 64 +- .../apache/comet/serde/QueryPlanSerde.scala | 125 +- .../apache/spark/sql/comet/CometExecRDD.scala | 57 + .../spark/sql/comet/CometNativeScanExec.scala | 131 + .../spark/sql/comet/CometScanExec.scala | 24 +- .../apache/spark/sql/comet/operators.scala | 41 +- .../q1.native_datafusion/explain.txt | 212 ++ .../q1.native_datafusion/simplified.txt | 42 + .../q1.native_iceberg_compat/explain.txt | 229 ++ .../q1.native_iceberg_compat/simplified.txt | 42 + .../q10.native_datafusion/explain.txt | 214 ++ .../q10.native_datafusion/simplified.txt | 54 + .../q10.native_iceberg_compat/explain.txt | 260 ++ .../q10.native_iceberg_compat/simplified.txt | 61 + .../q11.native_datafusion/explain.txt | 331 +++ .../q11.native_datafusion/simplified.txt | 65 + .../q11.native_iceberg_compat/explain.txt | 368 +++ .../q11.native_iceberg_compat/simplified.txt | 67 + .../q12.native_datafusion/explain.txt | 116 + .../q12.native_datafusion/simplified.txt | 26 + .../q12.native_iceberg_compat/explain.txt | 126 + .../q12.native_iceberg_compat/simplified.txt | 26 + .../q13.native_datafusion/explain.txt | 174 ++ .../q13.native_datafusion/simplified.txt | 35 + .../q13.native_iceberg_compat/explain.txt | 193 ++ .../q13.native_iceberg_compat/simplified.txt | 35 + .../q14a.native_datafusion/explain.txt | 474 ++++ .../q14a.native_datafusion/simplified.txt | 96 + .../q14a.native_iceberg_compat/explain.txt | 680 +++++ .../q14a.native_iceberg_compat/simplified.txt | 126 + .../q14b.native_datafusion/explain.txt | 542 ++++ .../q14b.native_datafusion/simplified.txt | 114 + .../q14b.native_iceberg_compat/explain.txt | 639 +++++ .../q14b.native_iceberg_compat/simplified.txt | 122 + .../q15.native_datafusion/explain.txt | 122 + .../q15.native_datafusion/simplified.txt | 25 + .../q15.native_iceberg_compat/explain.txt | 135 + .../q15.native_iceberg_compat/simplified.txt | 25 + .../q16.native_datafusion/explain.txt | 217 ++ .../q16.native_datafusion/simplified.txt | 44 + .../q16.native_iceberg_compat/explain.txt | 233 ++ .../q16.native_iceberg_compat/simplified.txt | 44 + .../q17.native_datafusion/explain.txt | 220 ++ .../q17.native_datafusion/simplified.txt | 44 + .../q17.native_iceberg_compat/explain.txt | 244 ++ .../q17.native_iceberg_compat/simplified.txt | 44 + .../q18.native_datafusion/explain.txt | 215 ++ .../q18.native_datafusion/simplified.txt | 43 + .../q18.native_iceberg_compat/explain.txt | 237 ++ .../q18.native_iceberg_compat/simplified.txt | 43 + .../q19.native_datafusion/explain.txt | 168 ++ .../q19.native_datafusion/simplified.txt | 34 + .../q19.native_iceberg_compat/explain.txt | 198 ++ .../q19.native_iceberg_compat/simplified.txt | 36 + .../q2.native_datafusion/explain.txt | 173 ++ .../q2.native_datafusion/simplified.txt | 35 + .../q2.native_iceberg_compat/explain.txt | 188 ++ .../q2.native_iceberg_compat/simplified.txt | 35 + .../q20.native_datafusion/explain.txt | 116 + .../q20.native_datafusion/simplified.txt | 26 + .../q20.native_iceberg_compat/explain.txt | 126 + .../q20.native_iceberg_compat/simplified.txt | 26 + .../q21.native_datafusion/explain.txt | 127 + .../q21.native_datafusion/simplified.txt | 26 + .../q21.native_iceberg_compat/explain.txt | 140 + .../q21.native_iceberg_compat/simplified.txt | 26 + .../q22.native_datafusion/explain.txt | 127 + .../q22.native_datafusion/simplified.txt | 26 + .../q22.native_iceberg_compat/explain.txt | 140 + .../q22.native_iceberg_compat/simplified.txt | 26 + .../q23a.native_datafusion/explain.txt | 452 ++++ .../q23a.native_datafusion/simplified.txt | 91 + .../q23a.native_iceberg_compat/explain.txt | 484 ++++ .../q23a.native_iceberg_compat/simplified.txt | 91 + .../q23b.native_datafusion/explain.txt | 558 ++++ .../q23b.native_datafusion/simplified.txt | 112 + .../q23b.native_iceberg_compat/explain.txt | 595 +++++ .../q23b.native_iceberg_compat/simplified.txt | 112 + .../q24a.native_datafusion/explain.txt | 382 +++ .../q24a.native_datafusion/simplified.txt | 86 + .../q24a.native_iceberg_compat/explain.txt | 403 +++ .../q24a.native_iceberg_compat/simplified.txt | 86 + .../q24b.native_datafusion/explain.txt | 382 +++ .../q24b.native_datafusion/simplified.txt | 86 + .../q24b.native_iceberg_compat/explain.txt | 403 +++ .../q24b.native_iceberg_compat/simplified.txt | 86 + .../q25.native_datafusion/explain.txt | 209 ++ .../q25.native_datafusion/simplified.txt | 42 + .../q25.native_iceberg_compat/explain.txt | 244 ++ .../q25.native_iceberg_compat/simplified.txt | 44 + .../q26.native_datafusion/explain.txt | 158 ++ .../q26.native_datafusion/simplified.txt | 32 + .../q26.native_iceberg_compat/explain.txt | 174 ++ .../q26.native_iceberg_compat/simplified.txt | 32 + .../q27.native_datafusion/explain.txt | 158 ++ .../q27.native_datafusion/simplified.txt | 32 + .../q27.native_iceberg_compat/explain.txt | 174 ++ .../q27.native_iceberg_compat/simplified.txt | 32 + .../q28.native_datafusion/explain.txt | 407 +++ .../q28.native_datafusion/simplified.txt | 99 + .../q28.native_iceberg_compat/explain.txt | 425 +++ .../q28.native_iceberg_compat/simplified.txt | 99 + .../q29.native_datafusion/explain.txt | 225 ++ .../q29.native_datafusion/simplified.txt | 45 + .../q29.native_iceberg_compat/explain.txt | 263 ++ .../q29.native_iceberg_compat/simplified.txt | 47 + .../q3.native_datafusion/explain.txt | 101 + .../q3.native_datafusion/simplified.txt | 21 + .../q3.native_iceberg_compat/explain.txt | 111 + .../q3.native_iceberg_compat/simplified.txt | 21 + .../q30.native_datafusion/explain.txt | 253 ++ .../q30.native_datafusion/simplified.txt | 50 + .../q30.native_iceberg_compat/explain.txt | 273 ++ .../q30.native_iceberg_compat/simplified.txt | 50 + .../q31.native_datafusion/explain.txt | 310 +++ .../q31.native_datafusion/simplified.txt | 61 + .../q31.native_iceberg_compat/explain.txt | 509 ++++ .../q31.native_iceberg_compat/simplified.txt | 92 + .../q32.native_datafusion/explain.txt | 159 ++ .../q32.native_datafusion/simplified.txt | 32 + .../q32.native_iceberg_compat/explain.txt | 173 ++ .../q32.native_iceberg_compat/simplified.txt | 32 + .../q33.native_datafusion/explain.txt | 258 ++ .../q33.native_datafusion/simplified.txt | 51 + .../q33.native_iceberg_compat/explain.txt | 344 +++ .../q33.native_iceberg_compat/simplified.txt | 63 + .../q34.native_datafusion/explain.txt | 168 ++ .../q34.native_datafusion/simplified.txt | 34 + .../q34.native_iceberg_compat/explain.txt | 184 ++ .../q34.native_iceberg_compat/simplified.txt | 34 + .../q35.native_datafusion/explain.txt | 209 ++ .../q35.native_datafusion/simplified.txt | 53 + .../q35.native_iceberg_compat/explain.txt | 255 ++ .../q35.native_iceberg_compat/simplified.txt | 60 + .../q36.native_datafusion/explain.txt | 152 ++ .../q36.native_datafusion/simplified.txt | 33 + .../q36.native_iceberg_compat/explain.txt | 165 ++ .../q36.native_iceberg_compat/simplified.txt | 33 + .../q37.native_datafusion/explain.txt | 137 + .../q37.native_datafusion/simplified.txt | 28 + .../q37.native_iceberg_compat/explain.txt | 150 ++ .../q37.native_iceberg_compat/simplified.txt | 28 + .../q38.native_datafusion/explain.txt | 143 + .../q38.native_datafusion/simplified.txt | 29 + .../q38.native_iceberg_compat/explain.txt | 266 ++ .../q38.native_iceberg_compat/simplified.txt | 49 + .../q39a.native_datafusion/explain.txt | 246 ++ .../q39a.native_datafusion/simplified.txt | 49 + .../q39a.native_iceberg_compat/explain.txt | 266 ++ .../q39a.native_iceberg_compat/simplified.txt | 49 + .../q39b.native_datafusion/explain.txt | 246 ++ .../q39b.native_datafusion/simplified.txt | 49 + .../q39b.native_iceberg_compat/explain.txt | 266 ++ .../q39b.native_iceberg_compat/simplified.txt | 49 + .../q4.native_datafusion/explain.txt | 262 ++ .../q4.native_datafusion/simplified.txt | 52 + .../q4.native_iceberg_compat/explain.txt | 535 ++++ .../q4.native_iceberg_compat/simplified.txt | 97 + .../q40.native_datafusion/explain.txt | 168 ++ .../q40.native_datafusion/simplified.txt | 34 + .../q40.native_iceberg_compat/explain.txt | 184 ++ .../q40.native_iceberg_compat/simplified.txt | 34 + .../q41.native_datafusion/explain.txt | 102 + .../q41.native_datafusion/simplified.txt | 21 + .../q41.native_iceberg_compat/explain.txt | 108 + .../q41.native_iceberg_compat/simplified.txt | 21 + .../q42.native_datafusion/explain.txt | 101 + .../q42.native_datafusion/simplified.txt | 21 + .../q42.native_iceberg_compat/explain.txt | 111 + .../q42.native_iceberg_compat/simplified.txt | 21 + .../q43.native_datafusion/explain.txt | 101 + .../q43.native_datafusion/simplified.txt | 21 + .../q43.native_iceberg_compat/explain.txt | 111 + .../q43.native_iceberg_compat/simplified.txt | 21 + .../q44.native_datafusion/explain.txt | 261 ++ .../q44.native_datafusion/simplified.txt | 72 + .../q44.native_iceberg_compat/explain.txt | 270 ++ .../q44.native_iceberg_compat/simplified.txt | 72 + .../q45.native_datafusion/explain.txt | 193 ++ .../q45.native_datafusion/simplified.txt | 43 + .../q45.native_iceberg_compat/explain.txt | 212 ++ .../q45.native_iceberg_compat/simplified.txt | 43 + .../q46.native_datafusion/explain.txt | 199 ++ .../q46.native_datafusion/simplified.txt | 40 + .../q46.native_iceberg_compat/explain.txt | 218 ++ .../q46.native_iceberg_compat/simplified.txt | 40 + .../q47.native_datafusion/explain.txt | 243 ++ .../q47.native_datafusion/simplified.txt | 63 + .../q47.native_iceberg_compat/explain.txt | 256 ++ .../q47.native_iceberg_compat/simplified.txt | 63 + .../q48.native_datafusion/explain.txt | 148 ++ .../q48.native_datafusion/simplified.txt | 30 + .../q48.native_iceberg_compat/explain.txt | 164 ++ .../q48.native_iceberg_compat/simplified.txt | 30 + .../q49.native_datafusion/explain.txt | 241 ++ .../q49.native_datafusion/simplified.txt | 69 + .../q49.native_iceberg_compat/explain.txt | 423 +++ .../q49.native_iceberg_compat/simplified.txt | 99 + .../q5.native_datafusion/explain.txt | 277 ++ .../q5.native_datafusion/simplified.txt | 62 + .../q5.native_iceberg_compat/explain.txt | 409 +++ .../q5.native_iceberg_compat/simplified.txt | 80 + .../q50.native_datafusion/explain.txt | 148 ++ .../q50.native_datafusion/simplified.txt | 30 + .../q50.native_iceberg_compat/explain.txt | 165 ++ .../q50.native_iceberg_compat/simplified.txt | 30 + .../q51.native_datafusion/explain.txt | 141 + .../q51.native_datafusion/simplified.txt | 42 + .../q51.native_iceberg_compat/explain.txt | 219 ++ .../q51.native_iceberg_compat/simplified.txt | 59 + .../q52.native_datafusion/explain.txt | 101 + .../q52.native_datafusion/simplified.txt | 21 + .../q52.native_iceberg_compat/explain.txt | 111 + .../q52.native_iceberg_compat/simplified.txt | 21 + .../q53.native_datafusion/explain.txt | 152 ++ .../q53.native_datafusion/simplified.txt | 33 + .../q53.native_iceberg_compat/explain.txt | 165 ++ .../q53.native_iceberg_compat/simplified.txt | 33 + .../q54.native_datafusion/explain.txt | 376 +++ .../q54.native_datafusion/simplified.txt | 78 + .../q54.native_iceberg_compat/explain.txt | 412 +++ .../q54.native_iceberg_compat/simplified.txt | 78 + .../q55.native_datafusion/explain.txt | 101 + .../q55.native_datafusion/simplified.txt | 21 + .../q55.native_iceberg_compat/explain.txt | 111 + .../q55.native_iceberg_compat/simplified.txt | 21 + .../q56.native_datafusion/explain.txt | 258 ++ .../q56.native_datafusion/simplified.txt | 51 + .../q56.native_iceberg_compat/explain.txt | 344 +++ .../q56.native_iceberg_compat/simplified.txt | 63 + .../q57.native_datafusion/explain.txt | 243 ++ .../q57.native_datafusion/simplified.txt | 63 + .../q57.native_iceberg_compat/explain.txt | 256 ++ .../q57.native_iceberg_compat/simplified.txt | 63 + .../q58.native_datafusion/explain.txt | 198 ++ .../q58.native_datafusion/simplified.txt | 42 + .../q58.native_iceberg_compat/explain.txt | 332 +++ .../q58.native_iceberg_compat/simplified.txt | 63 + .../q59.native_datafusion/explain.txt | 205 ++ .../q59.native_datafusion/simplified.txt | 41 + .../q59.native_iceberg_compat/explain.txt | 224 ++ .../q59.native_iceberg_compat/simplified.txt | 41 + .../q6.native_datafusion/explain.txt | 243 ++ .../q6.native_datafusion/simplified.txt | 50 + .../q6.native_iceberg_compat/explain.txt | 265 ++ .../q6.native_iceberg_compat/simplified.txt | 50 + .../q60.native_datafusion/explain.txt | 258 ++ .../q60.native_datafusion/simplified.txt | 51 + .../q60.native_iceberg_compat/explain.txt | 344 +++ .../q60.native_iceberg_compat/simplified.txt | 63 + .../q61.native_datafusion/explain.txt | 320 +++ .../q61.native_datafusion/simplified.txt | 67 + .../q61.native_iceberg_compat/explain.txt | 362 +++ .../q61.native_iceberg_compat/simplified.txt | 70 + .../q62.native_datafusion/explain.txt | 126 + .../q62.native_datafusion/simplified.txt | 26 + .../q62.native_iceberg_compat/explain.txt | 163 ++ .../q62.native_iceberg_compat/simplified.txt | 30 + .../q63.native_datafusion/explain.txt | 152 ++ .../q63.native_datafusion/simplified.txt | 33 + .../q63.native_iceberg_compat/explain.txt | 165 ++ .../q63.native_iceberg_compat/simplified.txt | 33 + .../q64.native_datafusion/explain.txt | 837 ++++++ .../q64.native_datafusion/simplified.txt | 165 ++ .../q64.native_iceberg_compat/explain.txt | 894 +++++++ .../q64.native_iceberg_compat/simplified.txt | 167 ++ .../q65.native_datafusion/explain.txt | 207 ++ .../q65.native_datafusion/simplified.txt | 41 + .../q65.native_iceberg_compat/explain.txt | 224 ++ .../q65.native_iceberg_compat/simplified.txt | 41 + .../q66.native_datafusion/explain.txt | 185 ++ .../q66.native_datafusion/simplified.txt | 37 + .../q66.native_iceberg_compat/explain.txt | 282 ++ .../q66.native_iceberg_compat/simplified.txt | 52 + .../q67.native_datafusion/explain.txt | 162 ++ .../q67.native_datafusion/simplified.txt | 37 + .../q67.native_iceberg_compat/explain.txt | 175 ++ .../q67.native_iceberg_compat/simplified.txt | 37 + .../q68.native_datafusion/explain.txt | 199 ++ .../q68.native_datafusion/simplified.txt | 40 + .../q68.native_iceberg_compat/explain.txt | 218 ++ .../q68.native_iceberg_compat/simplified.txt | 40 + .../q69.native_datafusion/explain.txt | 209 ++ .../q69.native_datafusion/simplified.txt | 53 + .../q69.native_iceberg_compat/explain.txt | 255 ++ .../q69.native_iceberg_compat/simplified.txt | 60 + .../q7.native_datafusion/explain.txt | 158 ++ .../q7.native_datafusion/simplified.txt | 32 + .../q7.native_iceberg_compat/explain.txt | 174 ++ .../q7.native_iceberg_compat/simplified.txt | 32 + .../q70.native_datafusion/explain.txt | 239 ++ .../q70.native_datafusion/simplified.txt | 61 + .../q70.native_iceberg_compat/explain.txt | 256 ++ .../q70.native_iceberg_compat/simplified.txt | 61 + .../q71.native_datafusion/explain.txt | 193 ++ .../q71.native_datafusion/simplified.txt | 39 + .../q71.native_iceberg_compat/explain.txt | 214 ++ .../q71.native_iceberg_compat/simplified.txt | 39 + .../q72.native_datafusion/explain.txt | 323 +++ .../q72.native_datafusion/simplified.txt | 64 + .../q72.native_iceberg_compat/explain.txt | 369 +++ .../q72.native_iceberg_compat/simplified.txt | 66 + .../q73.native_datafusion/explain.txt | 168 ++ .../q73.native_datafusion/simplified.txt | 34 + .../q73.native_iceberg_compat/explain.txt | 184 ++ .../q73.native_iceberg_compat/simplified.txt | 34 + .../q74.native_datafusion/explain.txt | 232 ++ .../q74.native_datafusion/simplified.txt | 46 + .../q74.native_iceberg_compat/explain.txt | 363 +++ .../q74.native_iceberg_compat/simplified.txt | 66 + .../q75.native_datafusion/explain.txt | 443 ++++ .../q75.native_datafusion/simplified.txt | 88 + .../q75.native_iceberg_compat/explain.txt | 681 +++++ .../q75.native_iceberg_compat/simplified.txt | 126 + .../q76.native_datafusion/explain.txt | 177 ++ .../q76.native_datafusion/simplified.txt | 36 + .../q76.native_iceberg_compat/explain.txt | 195 ++ .../q76.native_iceberg_compat/simplified.txt | 36 + .../q77.native_datafusion/explain.txt | 277 ++ .../q77.native_datafusion/simplified.txt | 66 + .../q77.native_iceberg_compat/explain.txt | 471 ++++ .../q77.native_iceberg_compat/simplified.txt | 97 + .../q78.native_datafusion/explain.txt | 250 ++ .../q78.native_datafusion/simplified.txt | 50 + .../q78.native_iceberg_compat/explain.txt | 373 +++ .../q78.native_iceberg_compat/simplified.txt | 69 + .../q79.native_datafusion/explain.txt | 158 ++ .../q79.native_datafusion/simplified.txt | 32 + .../q79.native_iceberg_compat/explain.txt | 174 ++ .../q79.native_iceberg_compat/simplified.txt | 32 + .../q8.native_datafusion/explain.txt | 223 ++ .../q8.native_datafusion/simplified.txt | 44 + .../q8.native_iceberg_compat/explain.txt | 241 ++ .../q8.native_iceberg_compat/simplified.txt | 44 + .../q80.native_datafusion/explain.txt | 374 +++ .../q80.native_datafusion/simplified.txt | 81 + .../q80.native_iceberg_compat/explain.txt | 574 ++++ .../q80.native_iceberg_compat/simplified.txt | 112 + .../q81.native_datafusion/explain.txt | 248 ++ .../q81.native_datafusion/simplified.txt | 49 + .../q81.native_iceberg_compat/explain.txt | 268 ++ .../q81.native_iceberg_compat/simplified.txt | 49 + .../q82.native_datafusion/explain.txt | 137 + .../q82.native_datafusion/simplified.txt | 28 + .../q82.native_iceberg_compat/explain.txt | 150 ++ .../q82.native_iceberg_compat/simplified.txt | 28 + .../q83.native_datafusion/explain.txt | 184 ++ .../q83.native_datafusion/simplified.txt | 37 + .../q83.native_iceberg_compat/explain.txt | 311 +++ .../q83.native_iceberg_compat/simplified.txt | 57 + .../q84.native_datafusion/explain.txt | 167 ++ .../q84.native_datafusion/simplified.txt | 34 + .../q84.native_iceberg_compat/explain.txt | 185 ++ .../q84.native_iceberg_compat/simplified.txt | 34 + .../q85.native_datafusion/explain.txt | 236 ++ .../q85.native_datafusion/simplified.txt | 47 + .../q85.native_iceberg_compat/explain.txt | 261 ++ .../q85.native_iceberg_compat/simplified.txt | 47 + .../q86.native_datafusion/explain.txt | 121 + .../q86.native_datafusion/simplified.txt | 27 + .../q86.native_iceberg_compat/explain.txt | 131 + .../q86.native_iceberg_compat/simplified.txt | 27 + .../q87.native_datafusion/explain.txt | 154 ++ .../q87.native_datafusion/simplified.txt | 36 + .../q87.native_iceberg_compat/explain.txt | 281 ++ .../q87.native_iceberg_compat/simplified.txt | 59 + .../q88.native_datafusion/explain.txt | 873 ++++++ .../q88.native_datafusion/simplified.txt | 195 ++ .../q88.native_iceberg_compat/explain.txt | 927 +++++++ .../q88.native_iceberg_compat/simplified.txt | 195 ++ .../q89.native_datafusion/explain.txt | 147 ++ .../q89.native_datafusion/simplified.txt | 32 + .../q89.native_iceberg_compat/explain.txt | 160 ++ .../q89.native_iceberg_compat/simplified.txt | 32 + .../q9.native_datafusion/explain.txt | 265 ++ .../q9.native_datafusion/simplified.txt | 71 + .../q9.native_iceberg_compat/explain.txt | 283 ++ .../q9.native_iceberg_compat/simplified.txt | 71 + .../q90.native_datafusion/explain.txt | 242 ++ .../q90.native_datafusion/simplified.txt | 52 + .../q90.native_iceberg_compat/explain.txt | 260 ++ .../q90.native_iceberg_compat/simplified.txt | 52 + .../q91.native_datafusion/explain.txt | 215 ++ .../q91.native_datafusion/simplified.txt | 43 + .../q91.native_iceberg_compat/explain.txt | 237 ++ .../q91.native_iceberg_compat/simplified.txt | 43 + .../q92.native_datafusion/explain.txt | 159 ++ .../q92.native_datafusion/simplified.txt | 32 + .../q92.native_iceberg_compat/explain.txt | 173 ++ .../q92.native_iceberg_compat/simplified.txt | 32 + .../q93.native_datafusion/explain.txt | 116 + .../q93.native_datafusion/simplified.txt | 24 + .../q93.native_iceberg_compat/explain.txt | 124 + .../q93.native_iceberg_compat/simplified.txt | 24 + .../q94.native_datafusion/explain.txt | 217 ++ .../q94.native_datafusion/simplified.txt | 44 + .../q94.native_iceberg_compat/explain.txt | 233 ++ .../q94.native_iceberg_compat/simplified.txt | 44 + .../q95.native_datafusion/explain.txt | 282 ++ .../q95.native_datafusion/simplified.txt | 57 + .../q95.native_iceberg_compat/explain.txt | 300 +++ .../q95.native_iceberg_compat/simplified.txt | 57 + .../q96.native_datafusion/explain.txt | 131 + .../q96.native_datafusion/simplified.txt | 27 + .../q96.native_iceberg_compat/explain.txt | 143 + .../q96.native_iceberg_compat/simplified.txt | 27 + .../q97.native_datafusion/explain.txt | 135 + .../q97.native_datafusion/simplified.txt | 27 + .../q97.native_iceberg_compat/explain.txt | 144 + .../q97.native_iceberg_compat/simplified.txt | 27 + .../q98.native_datafusion/explain.txt | 130 + .../q98.native_datafusion/simplified.txt | 31 + .../q98.native_iceberg_compat/explain.txt | 140 + .../q98.native_iceberg_compat/simplified.txt | 31 + .../q99.native_datafusion/explain.txt | 126 + .../q99.native_datafusion/simplified.txt | 26 + .../q99.native_iceberg_compat/explain.txt | 163 ++ .../q99.native_iceberg_compat/simplified.txt | 30 + .../q1.native_datafusion/explain.txt | 239 ++ .../q1.native_datafusion/simplified.txt | 60 + .../q10.native_datafusion/explain.txt | 214 ++ .../q10.native_datafusion/simplified.txt | 54 + .../q11.native_datafusion/explain.txt | 362 +++ .../q11.native_datafusion/simplified.txt | 85 + .../q12.native_datafusion/explain.txt | 120 + .../q12.native_datafusion/simplified.txt | 30 + .../q13.native_datafusion/explain.txt | 178 ++ .../q13.native_datafusion/simplified.txt | 37 + .../q14a.native_datafusion/explain.txt | 487 ++++ .../q14a.native_datafusion/simplified.txt | 108 + .../q15.native_datafusion/explain.txt | 126 + .../q15.native_datafusion/simplified.txt | 27 + .../q16.native_datafusion/explain.txt | 219 ++ .../q16.native_datafusion/simplified.txt | 44 + .../q17.native_datafusion/explain.txt | 224 ++ .../q17.native_datafusion/simplified.txt | 46 + .../q18.native_datafusion/explain.txt | 219 ++ .../q18.native_datafusion/simplified.txt | 45 + .../q19.native_datafusion/explain.txt | 172 ++ .../q19.native_datafusion/simplified.txt | 36 + .../q2.native_datafusion/explain.txt | 194 ++ .../q2.native_datafusion/simplified.txt | 51 + .../q20.native_datafusion/explain.txt | 120 + .../q20.native_datafusion/simplified.txt | 30 + .../q21.native_datafusion/explain.txt | 131 + .../q21.native_datafusion/simplified.txt | 28 + .../q22.native_datafusion/explain.txt | 131 + .../q22.native_datafusion/simplified.txt | 28 + .../q23a.native_datafusion/explain.txt | 484 ++++ .../q23a.native_datafusion/simplified.txt | 120 + .../q23b.native_datafusion/explain.txt | 603 +++++ .../q23b.native_datafusion/simplified.txt | 151 ++ .../q24a.native_datafusion/explain.txt | 382 +++ .../q24a.native_datafusion/simplified.txt | 86 + .../q24b.native_datafusion/explain.txt | 382 +++ .../q24b.native_datafusion/simplified.txt | 86 + .../q25.native_datafusion/explain.txt | 213 ++ .../q25.native_datafusion/simplified.txt | 44 + .../q26.native_datafusion/explain.txt | 162 ++ .../q26.native_datafusion/simplified.txt | 34 + .../q27.native_datafusion/explain.txt | 162 ++ .../q27.native_datafusion/simplified.txt | 34 + .../q28.native_datafusion/explain.txt | 419 +++ .../q28.native_datafusion/simplified.txt | 111 + .../q29.native_datafusion/explain.txt | 229 ++ .../q29.native_datafusion/simplified.txt | 47 + .../q3.native_datafusion/explain.txt | 105 + .../q3.native_datafusion/simplified.txt | 23 + .../q30.native_datafusion/explain.txt | 280 ++ .../q30.native_datafusion/simplified.txt | 68 + .../q31.native_datafusion/explain.txt | 341 +++ .../q31.native_datafusion/simplified.txt | 83 + .../q32.native_datafusion/explain.txt | 193 ++ .../q32.native_datafusion/simplified.txt | 47 + .../q33.native_datafusion/explain.txt | 273 ++ .../q33.native_datafusion/simplified.txt | 64 + .../q34.native_datafusion/explain.txt | 168 ++ .../q34.native_datafusion/simplified.txt | 34 + .../q35.native_datafusion/explain.txt | 209 ++ .../q35.native_datafusion/simplified.txt | 53 + .../q36.native_datafusion/explain.txt | 156 ++ .../q36.native_datafusion/simplified.txt | 37 + .../q37.native_datafusion/explain.txt | 137 + .../q37.native_datafusion/simplified.txt | 28 + .../q38.native_datafusion/explain.txt | 143 + .../q38.native_datafusion/simplified.txt | 29 + .../q39a.native_datafusion/explain.txt | 263 ++ .../q39a.native_datafusion/simplified.txt | 60 + .../q39b.native_datafusion/explain.txt | 263 ++ .../q39b.native_datafusion/simplified.txt | 60 + .../q4.native_datafusion/explain.txt | 283 ++ .../q4.native_datafusion/simplified.txt | 68 + .../q40.native_datafusion/explain.txt | 172 ++ .../q40.native_datafusion/simplified.txt | 36 + .../q41.native_datafusion/explain.txt | 102 + .../q41.native_datafusion/simplified.txt | 21 + .../q42.native_datafusion/explain.txt | 105 + .../q42.native_datafusion/simplified.txt | 23 + .../q43.native_datafusion/explain.txt | 105 + .../q43.native_datafusion/simplified.txt | 23 + .../q44.native_datafusion/explain.txt | 277 ++ .../q44.native_datafusion/simplified.txt | 81 + .../q45.native_datafusion/explain.txt | 193 ++ .../q45.native_datafusion/simplified.txt | 43 + .../q46.native_datafusion/explain.txt | 224 ++ .../q46.native_datafusion/simplified.txt | 52 + .../q47.native_datafusion/explain.txt | 241 ++ .../q47.native_datafusion/simplified.txt | 67 + .../q48.native_datafusion/explain.txt | 152 ++ .../q48.native_datafusion/simplified.txt | 32 + .../q49.native_datafusion/explain.txt | 237 ++ .../q49.native_datafusion/simplified.txt | 71 + .../q5.native_datafusion/explain.txt | 277 ++ .../q5.native_datafusion/simplified.txt | 65 + .../q50.native_datafusion/explain.txt | 152 ++ .../q50.native_datafusion/simplified.txt | 32 + .../q51.native_datafusion/explain.txt | 145 + .../q51.native_datafusion/simplified.txt | 46 + .../q52.native_datafusion/explain.txt | 105 + .../q52.native_datafusion/simplified.txt | 23 + .../q53.native_datafusion/explain.txt | 156 ++ .../q53.native_datafusion/simplified.txt | 37 + .../q55.native_datafusion/explain.txt | 105 + .../q55.native_datafusion/simplified.txt | 23 + .../q56.native_datafusion/explain.txt | 273 ++ .../q56.native_datafusion/simplified.txt | 64 + .../q57.native_datafusion/explain.txt | 241 ++ .../q57.native_datafusion/simplified.txt | 67 + .../q59.native_datafusion/explain.txt | 232 ++ .../q59.native_datafusion/simplified.txt | 62 + .../q60.native_datafusion/explain.txt | 273 ++ .../q60.native_datafusion/simplified.txt | 64 + .../q61.native_datafusion/explain.txt | 328 +++ .../q61.native_datafusion/simplified.txt | 71 + .../q62.native_datafusion/explain.txt | 130 + .../q62.native_datafusion/simplified.txt | 28 + .../q63.native_datafusion/explain.txt | 156 ++ .../q63.native_datafusion/simplified.txt | 37 + .../q64.native_datafusion/explain.txt | 936 +++++++ .../q64.native_datafusion/simplified.txt | 251 ++ .../q65.native_datafusion/explain.txt | 234 ++ .../q65.native_datafusion/simplified.txt | 59 + .../q66.native_datafusion/explain.txt | 193 ++ .../q66.native_datafusion/simplified.txt | 45 + .../q67.native_datafusion/explain.txt | 166 ++ .../q67.native_datafusion/simplified.txt | 39 + .../q68.native_datafusion/explain.txt | 224 ++ .../q68.native_datafusion/simplified.txt | 52 + .../q69.native_datafusion/explain.txt | 209 ++ .../q69.native_datafusion/simplified.txt | 53 + .../q7.native_datafusion/explain.txt | 162 ++ .../q7.native_datafusion/simplified.txt | 34 + .../q70.native_datafusion/explain.txt | 243 ++ .../q70.native_datafusion/simplified.txt | 63 + .../q71.native_datafusion/explain.txt | 201 ++ .../q71.native_datafusion/simplified.txt | 44 + .../q72.native_datafusion/explain.txt | 323 +++ .../q72.native_datafusion/simplified.txt | 64 + .../q73.native_datafusion/explain.txt | 168 ++ .../q73.native_datafusion/simplified.txt | 34 + .../q74.native_datafusion/explain.txt | 251 ++ .../q74.native_datafusion/simplified.txt | 60 + .../q75.native_datafusion/explain.txt | 456 ++++ .../q75.native_datafusion/simplified.txt | 102 + .../q76.native_datafusion/explain.txt | 181 ++ .../q76.native_datafusion/simplified.txt | 38 + .../q77.native_datafusion/explain.txt | 291 ++ .../q77.native_datafusion/simplified.txt | 75 + .../q78.native_datafusion/explain.txt | 266 ++ .../q78.native_datafusion/simplified.txt | 65 + .../q79.native_datafusion/explain.txt | 167 ++ .../q79.native_datafusion/simplified.txt | 38 + .../q8.native_datafusion/explain.txt | 227 ++ .../q8.native_datafusion/simplified.txt | 46 + .../q80.native_datafusion/explain.txt | 374 +++ .../q80.native_datafusion/simplified.txt | 84 + .../q81.native_datafusion/explain.txt | 275 ++ .../q81.native_datafusion/simplified.txt | 67 + .../q82.native_datafusion/explain.txt | 137 + .../q82.native_datafusion/simplified.txt | 28 + .../q83.ansi.native_datafusion/explain.txt | 192 ++ .../q83.ansi.native_datafusion/simplified.txt | 43 + .../q84.native_datafusion/explain.txt | 167 ++ .../q84.native_datafusion/simplified.txt | 34 + .../q85.native_datafusion/explain.txt | 240 ++ .../q85.native_datafusion/simplified.txt | 49 + .../q86.native_datafusion/explain.txt | 125 + .../q86.native_datafusion/simplified.txt | 31 + .../q87.native_datafusion/explain.txt | 154 ++ .../q87.native_datafusion/simplified.txt | 36 + .../q88.native_datafusion/explain.txt | 873 ++++++ .../q88.native_datafusion/simplified.txt | 195 ++ .../q89.native_datafusion/explain.txt | 151 ++ .../q89.native_datafusion/simplified.txt | 36 + .../q9.native_datafusion/explain.txt | 285 ++ .../q9.native_datafusion/simplified.txt | 81 + .../q90.native_datafusion/explain.txt | 242 ++ .../q90.native_datafusion/simplified.txt | 52 + .../q91.native_datafusion/explain.txt | 223 ++ .../q91.native_datafusion/simplified.txt | 48 + .../q92.native_datafusion/explain.txt | 193 ++ .../q92.native_datafusion/simplified.txt | 47 + .../q93.native_datafusion/explain.txt | 120 + .../q93.native_datafusion/simplified.txt | 26 + .../q94.native_datafusion/explain.txt | 219 ++ .../q94.native_datafusion/simplified.txt | 44 + .../q95.native_datafusion/explain.txt | 284 ++ .../q95.native_datafusion/simplified.txt | 57 + .../q96.native_datafusion/explain.txt | 131 + .../q96.native_datafusion/simplified.txt | 27 + .../q97.native_datafusion/explain.txt | 139 + .../q97.native_datafusion/simplified.txt | 29 + .../q98.native_datafusion/explain.txt | 134 + .../q98.native_datafusion/simplified.txt | 35 + .../q99.native_datafusion/explain.txt | 130 + .../q99.native_datafusion/simplified.txt | 28 + .../q1.native_datafusion/explain.txt | 262 ++ .../q1.native_datafusion/simplified.txt | 65 + .../q1.native_iceberg_compat/explain.txt | 262 ++ .../q1.native_iceberg_compat/simplified.txt | 65 + .../q10.native_datafusion/explain.txt | 272 ++ .../q10.native_datafusion/simplified.txt | 71 + .../q10.native_iceberg_compat/explain.txt | 272 ++ .../q10.native_iceberg_compat/simplified.txt | 71 + .../q11.native_datafusion/explain.txt | 423 +++ .../q11.native_datafusion/simplified.txt | 107 + .../q11.native_iceberg_compat/explain.txt | 423 +++ .../q11.native_iceberg_compat/simplified.txt | 107 + .../q12.native_datafusion/explain.txt | 140 + .../q12.native_datafusion/simplified.txt | 38 + .../q12.native_iceberg_compat/explain.txt | 140 + .../q12.native_iceberg_compat/simplified.txt | 38 + .../q13.native_datafusion/explain.txt | 222 ++ .../q13.native_datafusion/simplified.txt | 57 + .../q13.native_iceberg_compat/explain.txt | 222 ++ .../q13.native_iceberg_compat/simplified.txt | 57 + .../q14a.native_datafusion/explain.txt | 772 ++++++ .../q14a.native_datafusion/simplified.txt | 203 ++ .../q14a.native_iceberg_compat/explain.txt | 772 ++++++ .../q14a.native_iceberg_compat/simplified.txt | 203 ++ .../q14b.native_datafusion/explain.txt | 724 +++++ .../q14b.native_datafusion/simplified.txt | 191 ++ .../q14b.native_iceberg_compat/explain.txt | 724 +++++ .../q14b.native_iceberg_compat/simplified.txt | 191 ++ .../q15.native_datafusion/explain.txt | 154 ++ .../q15.native_datafusion/simplified.txt | 39 + .../q15.native_iceberg_compat/explain.txt | 154 ++ .../q15.native_iceberg_compat/simplified.txt | 39 + .../q16.native_datafusion/explain.txt | 260 ++ .../q16.native_datafusion/simplified.txt | 74 + .../q16.native_iceberg_compat/explain.txt | 260 ++ .../q16.native_iceberg_compat/simplified.txt | 74 + .../q17.native_datafusion/explain.txt | 279 ++ .../q17.native_datafusion/simplified.txt | 71 + .../q17.native_iceberg_compat/explain.txt | 279 ++ .../q17.native_iceberg_compat/simplified.txt | 71 + .../q18.native_datafusion/explain.txt | 271 ++ .../q18.native_datafusion/simplified.txt | 69 + .../q18.native_iceberg_compat/explain.txt | 271 ++ .../q18.native_iceberg_compat/simplified.txt | 69 + .../q19.native_datafusion/explain.txt | 227 ++ .../q19.native_datafusion/simplified.txt | 58 + .../q19.native_iceberg_compat/explain.txt | 227 ++ .../q19.native_iceberg_compat/simplified.txt | 58 + .../q2.native_datafusion/explain.txt | 212 ++ .../q2.native_datafusion/simplified.txt | 59 + .../q2.native_iceberg_compat/explain.txt | 212 ++ .../q2.native_iceberg_compat/simplified.txt | 59 + .../q20.native_datafusion/explain.txt | 140 + .../q20.native_datafusion/simplified.txt | 38 + .../q20.native_iceberg_compat/explain.txt | 140 + .../q20.native_iceberg_compat/simplified.txt | 38 + .../q21.native_datafusion/explain.txt | 159 ++ .../q21.native_datafusion/simplified.txt | 40 + .../q21.native_iceberg_compat/explain.txt | 159 ++ .../q21.native_iceberg_compat/simplified.txt | 40 + .../q22.native_datafusion/explain.txt | 159 ++ .../q22.native_datafusion/simplified.txt | 40 + .../q22.native_iceberg_compat/explain.txt | 159 ++ .../q22.native_iceberg_compat/simplified.txt | 40 + .../q23a.native_datafusion/explain.txt | 547 ++++ .../q23a.native_datafusion/simplified.txt | 148 ++ .../q23a.native_iceberg_compat/explain.txt | 547 ++++ .../q23a.native_iceberg_compat/simplified.txt | 148 ++ .../q23b.native_datafusion/explain.txt | 671 +++++ .../q23b.native_datafusion/simplified.txt | 181 ++ .../q23b.native_iceberg_compat/explain.txt | 671 +++++ .../q23b.native_iceberg_compat/simplified.txt | 181 ++ .../q24a.native_datafusion/explain.txt | 427 +++ .../q24a.native_datafusion/simplified.txt | 118 + .../q24a.native_iceberg_compat/explain.txt | 427 +++ .../q24a.native_iceberg_compat/simplified.txt | 118 + .../q24b.native_datafusion/explain.txt | 427 +++ .../q24b.native_datafusion/simplified.txt | 118 + .../q24b.native_iceberg_compat/explain.txt | 427 +++ .../q24b.native_iceberg_compat/simplified.txt | 118 + .../q25.native_datafusion/explain.txt | 279 ++ .../q25.native_datafusion/simplified.txt | 71 + .../q25.native_iceberg_compat/explain.txt | 279 ++ .../q25.native_iceberg_compat/simplified.txt | 71 + .../q26.native_datafusion/explain.txt | 198 ++ .../q26.native_datafusion/simplified.txt | 50 + .../q26.native_iceberg_compat/explain.txt | 198 ++ .../q26.native_iceberg_compat/simplified.txt | 50 + .../q27.native_datafusion/explain.txt | 198 ++ .../q27.native_datafusion/simplified.txt | 50 + .../q27.native_iceberg_compat/explain.txt | 198 ++ .../q27.native_iceberg_compat/simplified.txt | 50 + .../q28.native_datafusion/explain.txt | 437 +++ .../q28.native_datafusion/simplified.txt | 111 + .../q28.native_iceberg_compat/explain.txt | 437 +++ .../q28.native_iceberg_compat/simplified.txt | 111 + .../q29.native_datafusion/explain.txt | 302 +++ .../q29.native_datafusion/simplified.txt | 77 + .../q29.native_iceberg_compat/explain.txt | 302 +++ .../q29.native_iceberg_compat/simplified.txt | 77 + .../q3.native_datafusion/explain.txt | 125 + .../q3.native_datafusion/simplified.txt | 31 + .../q3.native_iceberg_compat/explain.txt | 125 + .../q3.native_iceberg_compat/simplified.txt | 31 + .../q30.native_datafusion/explain.txt | 312 +++ .../q30.native_datafusion/simplified.txt | 78 + .../q30.native_iceberg_compat/explain.txt | 312 +++ .../q30.native_iceberg_compat/simplified.txt | 78 + .../q31.native_datafusion/explain.txt | 586 +++++ .../q31.native_datafusion/simplified.txt | 150 ++ .../q31.native_iceberg_compat/explain.txt | 586 +++++ .../q31.native_iceberg_compat/simplified.txt | 150 ++ .../q32.native_datafusion/explain.txt | 197 ++ .../q32.native_datafusion/simplified.txt | 49 + .../q32.native_iceberg_compat/explain.txt | 197 ++ .../q32.native_iceberg_compat/simplified.txt | 49 + .../q33.native_datafusion/explain.txt | 391 +++ .../q33.native_datafusion/simplified.txt | 101 + .../q33.native_iceberg_compat/explain.txt | 391 +++ .../q33.native_iceberg_compat/simplified.txt | 101 + .../q34.native_datafusion/explain.txt | 208 ++ .../q34.native_datafusion/simplified.txt | 54 + .../q34.native_iceberg_compat/explain.txt | 208 ++ .../q34.native_iceberg_compat/simplified.txt | 54 + .../q35.native_datafusion/explain.txt | 267 ++ .../q35.native_datafusion/simplified.txt | 70 + .../q35.native_iceberg_compat/explain.txt | 267 ++ .../q35.native_iceberg_compat/simplified.txt | 70 + .../q36.native_datafusion/explain.txt | 184 ++ .../q36.native_datafusion/simplified.txt | 49 + .../q36.native_iceberg_compat/explain.txt | 184 ++ .../q36.native_iceberg_compat/simplified.txt | 49 + .../q37.native_datafusion/explain.txt | 169 ++ .../q37.native_datafusion/simplified.txt | 42 + .../q37.native_iceberg_compat/explain.txt | 169 ++ .../q37.native_iceberg_compat/simplified.txt | 42 + .../q38.native_datafusion/explain.txt | 307 +++ .../q38.native_datafusion/simplified.txt | 77 + .../q38.native_iceberg_compat/explain.txt | 307 +++ .../q38.native_iceberg_compat/simplified.txt | 77 + .../q39a.native_datafusion/explain.txt | 301 +++ .../q39a.native_datafusion/simplified.txt | 77 + .../q39a.native_iceberg_compat/explain.txt | 301 +++ .../q39a.native_iceberg_compat/simplified.txt | 77 + .../q39b.native_datafusion/explain.txt | 301 +++ .../q39b.native_datafusion/simplified.txt | 77 + .../q39b.native_iceberg_compat/explain.txt | 301 +++ .../q39b.native_iceberg_compat/simplified.txt | 77 + .../q4.native_datafusion/explain.txt | 616 +++++ .../q4.native_datafusion/simplified.txt | 156 ++ .../q4.native_iceberg_compat/explain.txt | 616 +++++ .../q4.native_iceberg_compat/simplified.txt | 156 ++ .../q40.native_datafusion/explain.txt | 208 ++ .../q40.native_datafusion/simplified.txt | 58 + .../q40.native_iceberg_compat/explain.txt | 208 ++ .../q40.native_iceberg_compat/simplified.txt | 58 + .../q41.native_datafusion/explain.txt | 121 + .../q41.native_datafusion/simplified.txt | 29 + .../q41.native_iceberg_compat/explain.txt | 121 + .../q41.native_iceberg_compat/simplified.txt | 29 + .../q42.native_datafusion/explain.txt | 125 + .../q42.native_datafusion/simplified.txt | 31 + .../q42.native_iceberg_compat/explain.txt | 125 + .../q42.native_iceberg_compat/simplified.txt | 31 + .../q43.native_datafusion/explain.txt | 125 + .../q43.native_datafusion/simplified.txt | 31 + .../q43.native_iceberg_compat/explain.txt | 125 + .../q43.native_iceberg_compat/simplified.txt | 31 + .../q44.native_datafusion/explain.txt | 226 ++ .../q44.native_datafusion/simplified.txt | 63 + .../q44.native_iceberg_compat/explain.txt | 226 ++ .../q44.native_iceberg_compat/simplified.txt | 63 + .../q45.native_datafusion/explain.txt | 232 ++ .../q45.native_datafusion/simplified.txt | 59 + .../q45.native_iceberg_compat/explain.txt | 232 ++ .../q45.native_iceberg_compat/simplified.txt | 59 + .../q46.native_datafusion/explain.txt | 248 ++ .../q46.native_datafusion/simplified.txt | 63 + .../q46.native_iceberg_compat/explain.txt | 248 ++ .../q46.native_iceberg_compat/simplified.txt | 63 + .../q47.native_datafusion/explain.txt | 269 ++ .../q47.native_datafusion/simplified.txt | 79 + .../q47.native_iceberg_compat/explain.txt | 269 ++ .../q47.native_iceberg_compat/simplified.txt | 79 + .../q48.native_datafusion/explain.txt | 188 ++ .../q48.native_datafusion/simplified.txt | 48 + .../q48.native_iceberg_compat/explain.txt | 188 ++ .../q48.native_iceberg_compat/simplified.txt | 48 + .../q49.native_datafusion/explain.txt | 457 ++++ .../q49.native_datafusion/simplified.txt | 129 + .../q49.native_iceberg_compat/explain.txt | 457 ++++ .../q49.native_iceberg_compat/simplified.txt | 129 + .../q5.native_datafusion/explain.txt | 448 ++++ .../q5.native_datafusion/simplified.txt | 123 + .../q5.native_iceberg_compat/explain.txt | 448 ++++ .../q5.native_iceberg_compat/simplified.txt | 123 + .../q50.native_datafusion/explain.txt | 189 ++ .../q50.native_datafusion/simplified.txt | 48 + .../q50.native_iceberg_compat/explain.txt | 189 ++ .../q50.native_iceberg_compat/simplified.txt | 48 + .../q51.native_datafusion/explain.txt | 233 ++ .../q51.native_datafusion/simplified.txt | 72 + .../q51.native_iceberg_compat/explain.txt | 233 ++ .../q51.native_iceberg_compat/simplified.txt | 72 + .../q52.native_datafusion/explain.txt | 125 + .../q52.native_datafusion/simplified.txt | 31 + .../q52.native_iceberg_compat/explain.txt | 125 + .../q52.native_iceberg_compat/simplified.txt | 31 + .../q53.native_datafusion/explain.txt | 184 ++ .../q53.native_datafusion/simplified.txt | 49 + .../q53.native_iceberg_compat/explain.txt | 184 ++ .../q53.native_iceberg_compat/simplified.txt | 49 + .../q54.native_datafusion/explain.txt | 469 ++++ .../q54.native_datafusion/simplified.txt | 121 + .../q54.native_iceberg_compat/explain.txt | 469 ++++ .../q54.native_iceberg_compat/simplified.txt | 121 + .../q55.native_datafusion/explain.txt | 125 + .../q55.native_datafusion/simplified.txt | 31 + .../q55.native_iceberg_compat/explain.txt | 125 + .../q55.native_iceberg_compat/simplified.txt | 31 + .../q56.native_datafusion/explain.txt | 391 +++ .../q56.native_datafusion/simplified.txt | 101 + .../q56.native_iceberg_compat/explain.txt | 391 +++ .../q56.native_iceberg_compat/simplified.txt | 101 + .../q57.native_datafusion/explain.txt | 269 ++ .../q57.native_datafusion/simplified.txt | 79 + .../q57.native_iceberg_compat/explain.txt | 269 ++ .../q57.native_iceberg_compat/simplified.txt | 79 + .../q58.native_datafusion/explain.txt | 373 +++ .../q58.native_datafusion/simplified.txt | 93 + .../q58.native_iceberg_compat/explain.txt | 373 +++ .../q58.native_iceberg_compat/simplified.txt | 93 + .../q59.native_datafusion/explain.txt | 256 ++ .../q59.native_datafusion/simplified.txt | 66 + .../q59.native_iceberg_compat/explain.txt | 256 ++ .../q59.native_iceberg_compat/simplified.txt | 66 + .../q6.native_datafusion/explain.txt | 302 +++ .../q6.native_datafusion/simplified.txt | 76 + .../q6.native_iceberg_compat/explain.txt | 302 +++ .../q6.native_iceberg_compat/simplified.txt | 76 + .../q60.native_datafusion/explain.txt | 391 +++ .../q60.native_datafusion/simplified.txt | 101 + .../q60.native_iceberg_compat/explain.txt | 391 +++ .../q60.native_iceberg_compat/simplified.txt | 101 + .../q61.native_datafusion/explain.txt | 405 +++ .../q61.native_datafusion/simplified.txt | 103 + .../q61.native_iceberg_compat/explain.txt | 405 +++ .../q61.native_iceberg_compat/simplified.txt | 103 + .../q62.native_datafusion/explain.txt | 187 ++ .../q62.native_datafusion/simplified.txt | 48 + .../q62.native_iceberg_compat/explain.txt | 187 ++ .../q62.native_iceberg_compat/simplified.txt | 48 + .../q63.native_datafusion/explain.txt | 184 ++ .../q63.native_datafusion/simplified.txt | 49 + .../q63.native_iceberg_compat/explain.txt | 184 ++ .../q63.native_iceberg_compat/simplified.txt | 49 + .../q64.native_datafusion/explain.txt | 999 +++++++ .../q64.native_datafusion/simplified.txt | 270 ++ .../q64.native_iceberg_compat/explain.txt | 999 +++++++ .../q64.native_iceberg_compat/simplified.txt | 270 ++ .../q65.native_datafusion/explain.txt | 257 ++ .../q65.native_datafusion/simplified.txt | 64 + .../q65.native_iceberg_compat/explain.txt | 257 ++ .../q65.native_iceberg_compat/simplified.txt | 64 + .../q66.native_datafusion/explain.txt | 320 +++ .../q66.native_datafusion/simplified.txt | 83 + .../q66.native_iceberg_compat/explain.txt | 320 +++ .../q66.native_iceberg_compat/simplified.txt | 83 + .../q67.native_datafusion/explain.txt | 179 ++ .../q67.native_datafusion/simplified.txt | 48 + .../q67.native_iceberg_compat/explain.txt | 179 ++ .../q67.native_iceberg_compat/simplified.txt | 48 + .../q68.native_datafusion/explain.txt | 248 ++ .../q68.native_datafusion/simplified.txt | 63 + .../q68.native_iceberg_compat/explain.txt | 248 ++ .../q68.native_iceberg_compat/simplified.txt | 63 + .../q69.native_datafusion/explain.txt | 267 ++ .../q69.native_datafusion/simplified.txt | 70 + .../q69.native_iceberg_compat/explain.txt | 267 ++ .../q69.native_iceberg_compat/simplified.txt | 70 + .../q7.native_datafusion/explain.txt | 198 ++ .../q7.native_datafusion/simplified.txt | 50 + .../q7.native_iceberg_compat/explain.txt | 198 ++ .../q7.native_iceberg_compat/simplified.txt | 50 + .../q70.native_datafusion/explain.txt | 266 ++ .../q70.native_datafusion/simplified.txt | 71 + .../q70.native_iceberg_compat/explain.txt | 266 ++ .../q70.native_iceberg_compat/simplified.txt | 71 + .../q71.native_datafusion/explain.txt | 240 ++ .../q71.native_datafusion/simplified.txt | 65 + .../q71.native_iceberg_compat/explain.txt | 240 ++ .../q71.native_iceberg_compat/simplified.txt | 65 + .../q72.native_datafusion/explain.txt | 423 +++ .../q72.native_datafusion/simplified.txt | 114 + .../q72.native_iceberg_compat/explain.txt | 423 +++ .../q72.native_iceberg_compat/simplified.txt | 114 + .../q73.native_datafusion/explain.txt | 208 ++ .../q73.native_datafusion/simplified.txt | 54 + .../q73.native_iceberg_compat/explain.txt | 208 ++ .../q73.native_iceberg_compat/simplified.txt | 54 + .../q74.native_datafusion/explain.txt | 418 +++ .../q74.native_datafusion/simplified.txt | 106 + .../q74.native_iceberg_compat/explain.txt | 418 +++ .../q74.native_iceberg_compat/simplified.txt | 106 + .../q75.native_datafusion/explain.txt | 754 ++++++ .../q75.native_datafusion/simplified.txt | 232 ++ .../q75.native_iceberg_compat/explain.txt | 754 ++++++ .../q75.native_iceberg_compat/simplified.txt | 232 ++ .../q76.native_datafusion/explain.txt | 218 ++ .../q76.native_datafusion/simplified.txt | 58 + .../q76.native_iceberg_compat/explain.txt | 218 ++ .../q76.native_iceberg_compat/simplified.txt | 58 + .../q77.native_datafusion/explain.txt | 527 ++++ .../q77.native_datafusion/simplified.txt | 136 + .../q77.native_iceberg_compat/explain.txt | 527 ++++ .../q77.native_iceberg_compat/simplified.txt | 136 + .../q78.native_datafusion/explain.txt | 417 +++ .../q78.native_datafusion/simplified.txt | 123 + .../q78.native_iceberg_compat/explain.txt | 417 +++ .../q78.native_iceberg_compat/simplified.txt | 123 + .../q79.native_datafusion/explain.txt | 198 ++ .../q79.native_datafusion/simplified.txt | 50 + .../q79.native_iceberg_compat/explain.txt | 198 ++ .../q79.native_iceberg_compat/simplified.txt | 50 + .../q8.native_datafusion/explain.txt | 278 ++ .../q8.native_datafusion/simplified.txt | 70 + .../q8.native_iceberg_compat/explain.txt | 278 ++ .../q8.native_iceberg_compat/simplified.txt | 70 + .../q80.native_datafusion/explain.txt | 631 +++++ .../q80.native_datafusion/simplified.txt | 178 ++ .../q80.native_iceberg_compat/explain.txt | 631 +++++ .../q80.native_iceberg_compat/simplified.txt | 178 ++ .../q81.native_datafusion/explain.txt | 307 +++ .../q81.native_datafusion/simplified.txt | 77 + .../q81.native_iceberg_compat/explain.txt | 307 +++ .../q81.native_iceberg_compat/simplified.txt | 77 + .../q82.native_datafusion/explain.txt | 169 ++ .../q82.native_datafusion/simplified.txt | 42 + .../q82.native_iceberg_compat/explain.txt | 169 ++ .../q82.native_iceberg_compat/simplified.txt | 42 + .../q83.native_datafusion/explain.txt | 357 +++ .../q83.native_datafusion/simplified.txt | 91 + .../q83.native_iceberg_compat/explain.txt | 357 +++ .../q83.native_iceberg_compat/simplified.txt | 91 + .../q84.native_datafusion/explain.txt | 210 ++ .../q84.native_datafusion/simplified.txt | 54 + .../q84.native_iceberg_compat/explain.txt | 210 ++ .../q84.native_iceberg_compat/simplified.txt | 54 + .../q85.native_datafusion/explain.txt | 300 +++ .../q85.native_datafusion/simplified.txt | 77 + .../q85.native_iceberg_compat/explain.txt | 300 +++ .../q85.native_iceberg_compat/simplified.txt | 77 + .../q86.native_datafusion/explain.txt | 145 + .../q86.native_datafusion/simplified.txt | 39 + .../q86.native_iceberg_compat/explain.txt | 145 + .../q86.native_iceberg_compat/simplified.txt | 39 + .../q87.native_datafusion/explain.txt | 307 +++ .../q87.native_datafusion/simplified.txt | 77 + .../q87.native_iceberg_compat/explain.txt | 307 +++ .../q87.native_iceberg_compat/simplified.txt | 77 + .../q88.native_datafusion/explain.txt | 1031 ++++++++ .../q88.native_datafusion/simplified.txt | 265 ++ .../q88.native_iceberg_compat/explain.txt | 1031 ++++++++ .../q88.native_iceberg_compat/simplified.txt | 265 ++ .../q89.native_datafusion/explain.txt | 179 ++ .../q89.native_datafusion/simplified.txt | 48 + .../q89.native_iceberg_compat/explain.txt | 179 ++ .../q89.native_iceberg_compat/simplified.txt | 48 + .../q9.native_datafusion/explain.txt | 303 +++ .../q9.native_datafusion/simplified.txt | 81 + .../q9.native_iceberg_compat/explain.txt | 303 +++ .../q9.native_iceberg_compat/simplified.txt | 81 + .../q90.native_datafusion/explain.txt | 292 ++ .../q90.native_datafusion/simplified.txt | 74 + .../q90.native_iceberg_compat/explain.txt | 292 ++ .../q90.native_iceberg_compat/simplified.txt | 74 + .../q91.native_datafusion/explain.txt | 271 ++ .../q91.native_datafusion/simplified.txt | 71 + .../q91.native_iceberg_compat/explain.txt | 271 ++ .../q91.native_iceberg_compat/simplified.txt | 71 + .../q92.native_datafusion/explain.txt | 197 ++ .../q92.native_datafusion/simplified.txt | 49 + .../q92.native_iceberg_compat/explain.txt | 197 ++ .../q92.native_iceberg_compat/simplified.txt | 49 + .../q93.native_datafusion/explain.txt | 138 + .../q93.native_datafusion/simplified.txt | 40 + .../q93.native_iceberg_compat/explain.txt | 138 + .../q93.native_iceberg_compat/simplified.txt | 40 + .../q94.native_datafusion/explain.txt | 260 ++ .../q94.native_datafusion/simplified.txt | 74 + .../q94.native_iceberg_compat/explain.txt | 260 ++ .../q94.native_iceberg_compat/simplified.txt | 74 + .../q95.native_datafusion/explain.txt | 330 +++ .../q95.native_datafusion/simplified.txt | 102 + .../q95.native_iceberg_compat/explain.txt | 330 +++ .../q95.native_iceberg_compat/simplified.txt | 102 + .../q96.native_datafusion/explain.txt | 163 ++ .../q96.native_datafusion/simplified.txt | 41 + .../q96.native_iceberg_compat/explain.txt | 163 ++ .../q96.native_iceberg_compat/simplified.txt | 41 + .../q97.native_datafusion/explain.txt | 167 ++ .../q97.native_datafusion/simplified.txt | 44 + .../q97.native_iceberg_compat/explain.txt | 167 ++ .../q97.native_iceberg_compat/simplified.txt | 44 + .../q98.native_datafusion/explain.txt | 150 ++ .../q98.native_datafusion/simplified.txt | 42 + .../q98.native_iceberg_compat/explain.txt | 150 ++ .../q98.native_iceberg_compat/simplified.txt | 42 + .../q99.native_datafusion/explain.txt | 187 ++ .../q99.native_datafusion/simplified.txt | 48 + .../q99.native_iceberg_compat/explain.txt | 187 ++ .../q99.native_iceberg_compat/simplified.txt | 48 + .../q10a.native_datafusion/explain.txt | 204 ++ .../q10a.native_datafusion/simplified.txt | 41 + .../q10a.native_iceberg_compat/explain.txt | 225 ++ .../q10a.native_iceberg_compat/simplified.txt | 41 + .../q11.native_datafusion/explain.txt | 326 +++ .../q11.native_datafusion/simplified.txt | 64 + .../q11.native_iceberg_compat/explain.txt | 363 +++ .../q11.native_iceberg_compat/simplified.txt | 66 + .../q12.native_datafusion/explain.txt | 116 + .../q12.native_datafusion/simplified.txt | 26 + .../q12.native_iceberg_compat/explain.txt | 126 + .../q12.native_iceberg_compat/simplified.txt | 26 + .../q14.native_datafusion/explain.txt | 542 ++++ .../q14.native_datafusion/simplified.txt | 114 + .../q14.native_iceberg_compat/explain.txt | 639 +++++ .../q14.native_iceberg_compat/simplified.txt | 122 + .../q14a.native_datafusion/explain.txt | 603 +++++ .../q14a.native_datafusion/simplified.txt | 119 + .../q14a.native_iceberg_compat/explain.txt | 812 ++++++ .../q14a.native_iceberg_compat/simplified.txt | 149 ++ .../q18a.native_datafusion/explain.txt | 729 +++++ .../q18a.native_datafusion/simplified.txt | 143 + .../q18a.native_iceberg_compat/explain.txt | 790 ++++++ .../q18a.native_iceberg_compat/simplified.txt | 145 + .../q20.native_datafusion/explain.txt | 116 + .../q20.native_datafusion/simplified.txt | 26 + .../q20.native_iceberg_compat/explain.txt | 126 + .../q20.native_iceberg_compat/simplified.txt | 26 + .../q22.native_datafusion/explain.txt | 128 + .../q22.native_datafusion/simplified.txt | 31 + .../q22.native_iceberg_compat/explain.txt | 141 + .../q22.native_iceberg_compat/simplified.txt | 31 + .../q22a.native_datafusion/explain.txt | 250 ++ .../q22a.native_datafusion/simplified.txt | 48 + .../q22a.native_iceberg_compat/explain.txt | 263 ++ .../q22a.native_iceberg_compat/simplified.txt | 48 + .../q24.native_datafusion/explain.txt | 396 +++ .../q24.native_datafusion/simplified.txt | 91 + .../q24.native_iceberg_compat/explain.txt | 417 +++ .../q24.native_iceberg_compat/simplified.txt | 91 + .../q27a.native_datafusion/explain.txt | 360 +++ .../q27a.native_datafusion/simplified.txt | 71 + .../q27a.native_iceberg_compat/explain.txt | 390 +++ .../q27a.native_iceberg_compat/simplified.txt | 71 + .../q34.native_datafusion/explain.txt | 168 ++ .../q34.native_datafusion/simplified.txt | 34 + .../q34.native_iceberg_compat/explain.txt | 184 ++ .../q34.native_iceberg_compat/simplified.txt | 34 + .../q35.native_datafusion/explain.txt | 209 ++ .../q35.native_datafusion/simplified.txt | 53 + .../q35.native_iceberg_compat/explain.txt | 255 ++ .../q35.native_iceberg_compat/simplified.txt | 60 + .../q35a.native_datafusion/explain.txt | 199 ++ .../q35a.native_datafusion/simplified.txt | 40 + .../q35a.native_iceberg_compat/explain.txt | 220 ++ .../q35a.native_iceberg_compat/simplified.txt | 40 + .../q36a.native_datafusion/explain.txt | 224 ++ .../q36a.native_datafusion/simplified.txt | 46 + .../q36a.native_iceberg_compat/explain.txt | 237 ++ .../q36a.native_iceberg_compat/simplified.txt | 46 + .../q47.native_datafusion/explain.txt | 243 ++ .../q47.native_datafusion/simplified.txt | 63 + .../q47.native_iceberg_compat/explain.txt | 256 ++ .../q47.native_iceberg_compat/simplified.txt | 63 + .../q49.native_datafusion/explain.txt | 241 ++ .../q49.native_datafusion/simplified.txt | 69 + .../q49.native_iceberg_compat/explain.txt | 423 +++ .../q49.native_iceberg_compat/simplified.txt | 99 + .../q51a.native_datafusion/explain.txt | 259 ++ .../q51a.native_datafusion/simplified.txt | 75 + .../q51a.native_iceberg_compat/explain.txt | 398 +++ .../q51a.native_iceberg_compat/simplified.txt | 110 + .../q57.native_datafusion/explain.txt | 243 ++ .../q57.native_datafusion/simplified.txt | 63 + .../q57.native_iceberg_compat/explain.txt | 256 ++ .../q57.native_iceberg_compat/simplified.txt | 63 + .../q5a.native_datafusion/explain.txt | 362 +++ .../q5a.native_datafusion/simplified.txt | 87 + .../q5a.native_iceberg_compat/explain.txt | 494 ++++ .../q5a.native_iceberg_compat/simplified.txt | 105 + .../q6.native_datafusion/explain.txt | 243 ++ .../q6.native_datafusion/simplified.txt | 50 + .../q6.native_iceberg_compat/explain.txt | 265 ++ .../q6.native_iceberg_compat/simplified.txt | 50 + .../q64.native_datafusion/explain.txt | 837 ++++++ .../q64.native_datafusion/simplified.txt | 165 ++ .../q64.native_iceberg_compat/explain.txt | 894 +++++++ .../q64.native_iceberg_compat/simplified.txt | 167 ++ .../q67a.native_datafusion/explain.txt | 385 +++ .../q67a.native_datafusion/simplified.txt | 77 + .../q67a.native_iceberg_compat/explain.txt | 398 +++ .../q67a.native_iceberg_compat/simplified.txt | 77 + .../q70a.native_datafusion/explain.txt | 324 +++ .../q70a.native_datafusion/simplified.txt | 86 + .../q70a.native_iceberg_compat/explain.txt | 341 +++ .../q70a.native_iceberg_compat/simplified.txt | 86 + .../q72.native_datafusion/explain.txt | 323 +++ .../q72.native_datafusion/simplified.txt | 64 + .../q72.native_iceberg_compat/explain.txt | 369 +++ .../q72.native_iceberg_compat/simplified.txt | 66 + .../q74.native_datafusion/explain.txt | 232 ++ .../q74.native_datafusion/simplified.txt | 46 + .../q74.native_iceberg_compat/explain.txt | 363 +++ .../q74.native_iceberg_compat/simplified.txt | 66 + .../q75.native_datafusion/explain.txt | 443 ++++ .../q75.native_datafusion/simplified.txt | 88 + .../q75.native_iceberg_compat/explain.txt | 681 +++++ .../q75.native_iceberg_compat/simplified.txt | 126 + .../q77a.native_datafusion/explain.txt | 362 +++ .../q77a.native_datafusion/simplified.txt | 91 + .../q77a.native_iceberg_compat/explain.txt | 556 ++++ .../q77a.native_iceberg_compat/simplified.txt | 122 + .../q78.native_datafusion/explain.txt | 250 ++ .../q78.native_datafusion/simplified.txt | 50 + .../q78.native_iceberg_compat/explain.txt | 373 +++ .../q78.native_iceberg_compat/simplified.txt | 69 + .../q80a.native_datafusion/explain.txt | 459 ++++ .../q80a.native_datafusion/simplified.txt | 106 + .../q80a.native_iceberg_compat/explain.txt | 659 +++++ .../q80a.native_iceberg_compat/simplified.txt | 137 + .../q86a.native_datafusion/explain.txt | 193 ++ .../q86a.native_datafusion/simplified.txt | 40 + .../q86a.native_iceberg_compat/explain.txt | 203 ++ .../q86a.native_iceberg_compat/simplified.txt | 40 + .../q98.native_datafusion/explain.txt | 125 + .../q98.native_datafusion/simplified.txt | 30 + .../q98.native_iceberg_compat/explain.txt | 135 + .../q98.native_iceberg_compat/simplified.txt | 30 + .../q10a.native_datafusion/explain.txt | 258 ++ .../q10a.native_datafusion/simplified.txt | 68 + .../q10a.native_iceberg_compat/explain.txt | 258 ++ .../q10a.native_iceberg_compat/simplified.txt | 68 + .../q11.native_datafusion/explain.txt | 418 +++ .../q11.native_datafusion/simplified.txt | 106 + .../q11.native_iceberg_compat/explain.txt | 418 +++ .../q11.native_iceberg_compat/simplified.txt | 106 + .../q12.native_datafusion/explain.txt | 140 + .../q12.native_datafusion/simplified.txt | 38 + .../q12.native_iceberg_compat/explain.txt | 140 + .../q12.native_iceberg_compat/simplified.txt | 38 + .../q14.native_datafusion/explain.txt | 724 +++++ .../q14.native_datafusion/simplified.txt | 191 ++ .../q14.native_iceberg_compat/explain.txt | 724 +++++ .../q14.native_iceberg_compat/simplified.txt | 191 ++ .../q14a.native_datafusion/explain.txt | 931 +++++++ .../q14a.native_datafusion/simplified.txt | 249 ++ .../q14a.native_iceberg_compat/explain.txt | 931 +++++++ .../q14a.native_iceberg_compat/simplified.txt | 249 ++ .../q18a.native_datafusion/explain.txt | 891 +++++++ .../q18a.native_datafusion/simplified.txt | 227 ++ .../q18a.native_iceberg_compat/explain.txt | 891 +++++++ .../q18a.native_iceberg_compat/simplified.txt | 227 ++ .../q20.native_datafusion/explain.txt | 140 + .../q20.native_datafusion/simplified.txt | 38 + .../q20.native_iceberg_compat/explain.txt | 140 + .../q20.native_iceberg_compat/simplified.txt | 38 + .../q22.native_datafusion/explain.txt | 151 ++ .../q22.native_datafusion/simplified.txt | 39 + .../q22.native_iceberg_compat/explain.txt | 151 ++ .../q22.native_iceberg_compat/simplified.txt | 39 + .../q22a.native_datafusion/explain.txt | 305 +++ .../q22a.native_datafusion/simplified.txt | 78 + .../q22a.native_iceberg_compat/explain.txt | 305 +++ .../q22a.native_iceberg_compat/simplified.txt | 78 + .../q24.native_datafusion/explain.txt | 437 +++ .../q24.native_datafusion/simplified.txt | 122 + .../q24.native_iceberg_compat/explain.txt | 437 +++ .../q24.native_iceberg_compat/simplified.txt | 122 + .../q27a.native_datafusion/explain.txt | 443 ++++ .../q27a.native_datafusion/simplified.txt | 113 + .../q27a.native_iceberg_compat/explain.txt | 443 ++++ .../q27a.native_iceberg_compat/simplified.txt | 113 + .../q34.native_datafusion/explain.txt | 208 ++ .../q34.native_datafusion/simplified.txt | 54 + .../q34.native_iceberg_compat/explain.txt | 208 ++ .../q34.native_iceberg_compat/simplified.txt | 54 + .../q35.native_datafusion/explain.txt | 267 ++ .../q35.native_datafusion/simplified.txt | 70 + .../q35.native_iceberg_compat/explain.txt | 267 ++ .../q35.native_iceberg_compat/simplified.txt | 70 + .../q35a.native_datafusion/explain.txt | 253 ++ .../q35a.native_datafusion/simplified.txt | 67 + .../q35a.native_iceberg_compat/explain.txt | 253 ++ .../q35a.native_iceberg_compat/simplified.txt | 67 + .../q36a.native_datafusion/explain.txt | 269 ++ .../q36a.native_datafusion/simplified.txt | 74 + .../q36a.native_iceberg_compat/explain.txt | 269 ++ .../q36a.native_iceberg_compat/simplified.txt | 74 + .../q47.native_datafusion/explain.txt | 269 ++ .../q47.native_datafusion/simplified.txt | 79 + .../q47.native_iceberg_compat/explain.txt | 269 ++ .../q47.native_iceberg_compat/simplified.txt | 79 + .../q49.native_datafusion/explain.txt | 457 ++++ .../q49.native_datafusion/simplified.txt | 129 + .../q49.native_iceberg_compat/explain.txt | 457 ++++ .../q49.native_iceberg_compat/simplified.txt | 129 + .../q51a.native_datafusion/explain.txt | 404 +++ .../q51a.native_datafusion/simplified.txt | 121 + .../q51a.native_iceberg_compat/explain.txt | 404 +++ .../q51a.native_iceberg_compat/simplified.txt | 121 + .../q57.native_datafusion/explain.txt | 269 ++ .../q57.native_datafusion/simplified.txt | 79 + .../q57.native_iceberg_compat/explain.txt | 269 ++ .../q57.native_iceberg_compat/simplified.txt | 79 + .../q5a.native_datafusion/explain.txt | 533 ++++ .../q5a.native_datafusion/simplified.txt | 148 ++ .../q5a.native_iceberg_compat/explain.txt | 533 ++++ .../q5a.native_iceberg_compat/simplified.txt | 148 ++ .../q6.native_datafusion/explain.txt | 302 +++ .../q6.native_datafusion/simplified.txt | 76 + .../q6.native_iceberg_compat/explain.txt | 302 +++ .../q6.native_iceberg_compat/simplified.txt | 76 + .../q64.native_datafusion/explain.txt | 999 +++++++ .../q64.native_datafusion/simplified.txt | 270 ++ .../q64.native_iceberg_compat/explain.txt | 999 +++++++ .../q64.native_iceberg_compat/simplified.txt | 270 ++ .../q67a.native_datafusion/explain.txt | 441 ++++ .../q67a.native_datafusion/simplified.txt | 120 + .../q67a.native_iceberg_compat/explain.txt | 441 ++++ .../q67a.native_iceberg_compat/simplified.txt | 120 + .../q70a.native_datafusion/explain.txt | 351 +++ .../q70a.native_datafusion/simplified.txt | 96 + .../q70a.native_iceberg_compat/explain.txt | 351 +++ .../q70a.native_iceberg_compat/simplified.txt | 96 + .../q72.native_datafusion/explain.txt | 423 +++ .../q72.native_datafusion/simplified.txt | 114 + .../q72.native_iceberg_compat/explain.txt | 423 +++ .../q72.native_iceberg_compat/simplified.txt | 114 + .../q74.native_datafusion/explain.txt | 418 +++ .../q74.native_datafusion/simplified.txt | 106 + .../q74.native_iceberg_compat/explain.txt | 418 +++ .../q74.native_iceberg_compat/simplified.txt | 106 + .../q75.native_datafusion/explain.txt | 754 ++++++ .../q75.native_datafusion/simplified.txt | 232 ++ .../q75.native_iceberg_compat/explain.txt | 754 ++++++ .../q75.native_iceberg_compat/simplified.txt | 232 ++ .../q77a.native_datafusion/explain.txt | 612 +++++ .../q77a.native_datafusion/simplified.txt | 161 ++ .../q77a.native_iceberg_compat/explain.txt | 612 +++++ .../q77a.native_iceberg_compat/simplified.txt | 161 ++ .../q78.native_datafusion/explain.txt | 417 +++ .../q78.native_datafusion/simplified.txt | 123 + .../q78.native_iceberg_compat/explain.txt | 417 +++ .../q78.native_iceberg_compat/simplified.txt | 123 + .../q80a.native_datafusion/explain.txt | 716 +++++ .../q80a.native_datafusion/simplified.txt | 203 ++ .../q80a.native_iceberg_compat/explain.txt | 716 +++++ .../q80a.native_iceberg_compat/simplified.txt | 203 ++ .../q86a.native_datafusion/explain.txt | 230 ++ .../q86a.native_datafusion/simplified.txt | 64 + .../q86a.native_iceberg_compat/explain.txt | 230 ++ .../q86a.native_iceberg_compat/simplified.txt | 64 + .../q98.native_datafusion/explain.txt | 145 + .../q98.native_datafusion/simplified.txt | 41 + .../q98.native_iceberg_compat/explain.txt | 145 + .../q98.native_iceberg_compat/simplified.txt | 41 + .../apache/comet/CometExpressionSuite.scala | 141 +- .../apache/comet/exec/CometExecSuite.scala | 10 +- .../comet/parquet/ParquetReadSuite.scala | 37 +- .../spark/sql/CometTPCDSQuerySuite.scala | 1 + .../spark/sql/CometTPCHQuerySuite.scala | 1 + .../org/apache/spark/sql/CometTestBase.scala | 5 +- .../sql/comet/CometPlanStabilitySuite.scala | 34 +- 1324 files changed, 239269 insertions(+), 108 deletions(-) create mode 100644 common/src/main/java/org/apache/comet/parquet/NativeBatchReader.java create mode 100644 common/src/main/java/org/apache/comet/parquet/NativeColumnReader.java create mode 100644 common/src/main/scala/org/apache/spark/sql/comet/CometArrowUtils.scala create mode 100644 native/core/src/parquet/parquet_support.rs create mode 100644 native/core/src/parquet/schema_adapter.rs create mode 100644 native/spark-expr/src/avg.rs create mode 100644 native/spark-expr/src/avg_decimal.rs create mode 100644 native/spark-expr/src/covariance.rs create mode 100644 native/spark-expr/src/strings.rs create mode 100644 native/spark-expr/src/sum_decimal.rs create mode 100644 native/spark-expr/src/temporal.rs create mode 100644 native/spark-expr/src/variance.rs create mode 100644 spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala create mode 100644 spark/src/main/scala/org/apache/spark/sql/comet/CometNativeScanExec.scala create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q1.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q1.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q10.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q10.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q11.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q11.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q12.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q12.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q13.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q13.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q14a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q14a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q15.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q15.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q16.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q16.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q17.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q17.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q18.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q18.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q19.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q19.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q2.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q2.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q20.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q20.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q21.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q21.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q22.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q22.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q25.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q25.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q26.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q26.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q27.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q27.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q28.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q28.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q29.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q29.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q3.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q3.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q30.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q30.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q31.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q31.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q32.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q32.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q33.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q33.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q34.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q34.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q35.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q35.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q36.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q36.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q37.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q37.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q38.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q38.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q4.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q4.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q40.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q40.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q41.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q41.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q42.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q42.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q43.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q43.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q44.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q44.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q45.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q45.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q46.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q46.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q47.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q47.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q48.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q48.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q49.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q49.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q5.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q5.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q50.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q50.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q51.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q51.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q52.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q52.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q53.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q53.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q55.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q55.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q56.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q56.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q57.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q57.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q59.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q59.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q60.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q60.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q61.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q61.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q62.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q62.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q63.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q63.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q64.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q64.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q65.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q65.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q66.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q66.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q67.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q67.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q68.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q68.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q69.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q69.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q7.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q7.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q70.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q70.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q71.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q71.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q72.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q72.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q73.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q73.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q74.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q74.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q75.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q75.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q76.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q76.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q77.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q77.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q78.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q78.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q79.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q79.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q8.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q8.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q80.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q80.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q81.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q81.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q82.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q82.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q83.ansi.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q83.ansi.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q84.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q84.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q85.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q85.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q86.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q86.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q87.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q87.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q88.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q88.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q89.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q89.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q90.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q90.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q91.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q91.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q92.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q92.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q93.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q93.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q94.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q94.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q95.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q95.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q96.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q96.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q97.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q97.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q98.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q98.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q99.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q99.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_iceberg_compat/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_datafusion/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_datafusion/simplified.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_iceberg_compat/explain.txt create mode 100644 spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_iceberg_compat/simplified.txt diff --git a/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java b/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java index 099c7b9733..ef97abf74c 100644 --- a/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java +++ b/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java @@ -23,6 +23,7 @@ import org.slf4j.LoggerFactory; import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.schema.Type; import org.apache.spark.sql.types.DataType; import org.apache.spark.sql.types.TimestampNTZType$; @@ -36,6 +37,9 @@ public abstract class AbstractColumnReader implements AutoCloseable { /** The Spark data type. */ protected final DataType type; + /** The Spark data type. */ + protected final Type fieldType; + /** Parquet column descriptor. */ protected final ColumnDescriptor descriptor; @@ -61,13 +65,23 @@ public abstract class AbstractColumnReader implements AutoCloseable { public AbstractColumnReader( DataType type, + Type fieldType, ColumnDescriptor descriptor, boolean useDecimal128, boolean useLegacyDateTimestamp) { this.type = type; + this.fieldType = fieldType; this.descriptor = descriptor; this.useDecimal128 = useDecimal128; this.useLegacyDateTimestamp = useLegacyDateTimestamp; + } + + public AbstractColumnReader( + DataType type, + ColumnDescriptor descriptor, + boolean useDecimal128, + boolean useLegacyDateTimestamp) { + this(type, null, descriptor, useDecimal128, useLegacyDateTimestamp); TypeUtil.checkParquetType(descriptor, type); } diff --git a/common/src/main/java/org/apache/comet/parquet/BatchReader.java b/common/src/main/java/org/apache/comet/parquet/BatchReader.java index 11c6d14dca..675dae9e78 100644 --- a/common/src/main/java/org/apache/comet/parquet/BatchReader.java +++ b/common/src/main/java/org/apache/comet/parquet/BatchReader.java @@ -272,7 +272,7 @@ public void init() throws URISyntaxException, IOException { requestedSchema = CometParquetReadSupport.clipParquetSchema( requestedSchema, sparkSchema, isCaseSensitive, useFieldId, ignoreMissingIds); - if (requestedSchema.getColumns().size() != sparkSchema.size()) { + if (requestedSchema.getFieldCount() != sparkSchema.size()) { throw new IllegalArgumentException( String.format( "Spark schema has %d columns while " + "Parquet schema has %d columns", diff --git a/common/src/main/java/org/apache/comet/parquet/Native.java b/common/src/main/java/org/apache/comet/parquet/Native.java index 1e666652e3..d0b0f951db 100644 --- a/common/src/main/java/org/apache/comet/parquet/Native.java +++ b/common/src/main/java/org/apache/comet/parquet/Native.java @@ -234,4 +234,56 @@ public static native void setPageV2( * @param handle the handle to the native Parquet column reader */ public static native void closeColumnReader(long handle); + + ///////////// Arrow Native Parquet Reader APIs + // TODO: Add partitionValues(?), improve requiredColumns to use a projection mask that corresponds + // to arrow. + // Add batch size, datetimeRebaseModeSpec, metrics(how?)... + + /** + * Initialize a record batch reader for a PartitionedFile + * + * @param filePath + * @param start + * @param length + * @return a handle to the record batch reader, used in subsequent calls. + */ + public static native long initRecordBatchReader( + String filePath, + long fileSize, + long start, + long length, + byte[] requiredSchema, + String sessionTimezone); + + // arrow native version of read batch + /** + * Read the next batch of data into memory on native side + * + * @param handle + * @return the number of rows read + */ + public static native int readNextRecordBatch(long handle); + + // arrow native equivalent of currentBatch. 'columnNum' is number of the column in the record + // batch + /** + * Load the column corresponding to columnNum in the currently loaded record batch into JVM + * + * @param handle + * @param columnNum + * @param arrayAddr + * @param schemaAddr + */ + public static native void currentColumnBatch( + long handle, int columnNum, long arrayAddr, long schemaAddr); + + // arrow native version to close record batch reader + + /** + * Close the record batch reader. Free the resources + * + * @param handle + */ + public static native void closeRecordBatchReader(long handle); } diff --git a/common/src/main/java/org/apache/comet/parquet/NativeBatchReader.java b/common/src/main/java/org/apache/comet/parquet/NativeBatchReader.java new file mode 100644 index 0000000000..9d79b707a5 --- /dev/null +++ b/common/src/main/java/org/apache/comet/parquet/NativeBatchReader.java @@ -0,0 +1,523 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.parquet; + +import java.io.ByteArrayOutputStream; +import java.io.Closeable; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.net.URISyntaxException; +import java.nio.channels.Channels; +import java.util.*; + +import scala.Option; +import scala.collection.Seq; +import scala.collection.mutable.Buffer; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.arrow.c.CometSchemaImporter; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.ipc.WriteChannel; +import org.apache.arrow.vector.ipc.message.MessageSerializer; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.parquet.HadoopReadOptions; +import org.apache.parquet.ParquetReadOptions; +import org.apache.parquet.Preconditions; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.Type; +import org.apache.spark.TaskContext; +import org.apache.spark.TaskContext$; +import org.apache.spark.executor.TaskMetrics; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.comet.CometArrowUtils; +import org.apache.spark.sql.comet.parquet.CometParquetReadSupport; +import org.apache.spark.sql.execution.datasources.PartitionedFile; +import org.apache.spark.sql.execution.datasources.parquet.ParquetToSparkSchemaConverter; +import org.apache.spark.sql.execution.metric.SQLMetric; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.vectorized.ColumnarBatch; +import org.apache.spark.util.AccumulatorV2; + +import org.apache.comet.CometConf; +import org.apache.comet.shims.ShimBatchReader; +import org.apache.comet.shims.ShimFileFormat; +import org.apache.comet.vector.CometVector; +import org.apache.comet.vector.NativeUtil; + +/** + * A vectorized Parquet reader that reads a Parquet file in a batched fashion. + * + *

Example of how to use this: + * + *

+ *   BatchReader reader = new BatchReader(parquetFile, batchSize);
+ *   try {
+ *     reader.init();
+ *     while (reader.readBatch()) {
+ *       ColumnarBatch batch = reader.currentBatch();
+ *       // consume the batch
+ *     }
+ *   } finally { // resources associated with the reader should be released
+ *     reader.close();
+ *   }
+ * 
+ */ +public class NativeBatchReader extends RecordReader implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(NativeBatchReader.class); + protected static final BufferAllocator ALLOCATOR = new RootAllocator(); + private NativeUtil nativeUtil = new NativeUtil(); + + private Configuration conf; + private int capacity; + private boolean isCaseSensitive; + private boolean useFieldId; + private boolean ignoreMissingIds; + private StructType partitionSchema; + private InternalRow partitionValues; + private PartitionedFile file; + private final Map metrics; + + private StructType sparkSchema; + private MessageType requestedSchema; + private CometVector[] vectors; + private AbstractColumnReader[] columnReaders; + private CometSchemaImporter importer; + private ColumnarBatch currentBatch; + // private FileReader fileReader; + private boolean[] missingColumns; + private boolean isInitialized; + private ParquetMetadata footer; + + /** + * Whether the native scan should always return decimal represented by 128 bits, regardless of its + * precision. Normally, this should be true if native execution is enabled, since Arrow compute + * kernels doesn't support 32 and 64 bit decimals yet. + */ + // TODO: (ARROW NATIVE) + private boolean useDecimal128; + + /** + * Whether to return dates/timestamps that were written with legacy hybrid (Julian + Gregorian) + * calendar as it is. If this is true, Comet will return them as it is, instead of rebasing them + * to the new Proleptic Gregorian calendar. If this is false, Comet will throw exceptions when + * seeing these dates/timestamps. + */ + // TODO: (ARROW NATIVE) + private boolean useLegacyDateTimestamp; + + /** The TaskContext object for executing this task. */ + private final TaskContext taskContext; + + private long handle; + + // Only for testing + public NativeBatchReader(String file, int capacity) { + this(file, capacity, null, null); + } + + // Only for testing + public NativeBatchReader( + String file, int capacity, StructType partitionSchema, InternalRow partitionValues) { + this(new Configuration(), file, capacity, partitionSchema, partitionValues); + } + + // Only for testing + public NativeBatchReader( + Configuration conf, + String file, + int capacity, + StructType partitionSchema, + InternalRow partitionValues) { + + this.conf = conf; + this.capacity = capacity; + this.isCaseSensitive = false; + this.useFieldId = false; + this.ignoreMissingIds = false; + this.partitionSchema = partitionSchema; + this.partitionValues = partitionValues; + + this.file = ShimBatchReader.newPartitionedFile(partitionValues, file); + this.metrics = new HashMap<>(); + + this.taskContext = TaskContext$.MODULE$.get(); + } + + public NativeBatchReader(AbstractColumnReader[] columnReaders) { + // Todo: set useDecimal128 and useLazyMaterialization + int numColumns = columnReaders.length; + this.columnReaders = new AbstractColumnReader[numColumns]; + vectors = new CometVector[numColumns]; + currentBatch = new ColumnarBatch(vectors); + // This constructor is used by Iceberg only. The columnReaders are + // initialized in Iceberg, so no need to call the init() + isInitialized = true; + this.taskContext = TaskContext$.MODULE$.get(); + this.metrics = new HashMap<>(); + } + + NativeBatchReader( + Configuration conf, + PartitionedFile inputSplit, + ParquetMetadata footer, + int capacity, + StructType sparkSchema, + boolean isCaseSensitive, + boolean useFieldId, + boolean ignoreMissingIds, + boolean useLegacyDateTimestamp, + StructType partitionSchema, + InternalRow partitionValues, + Map metrics) { + this.conf = conf; + this.capacity = capacity; + this.sparkSchema = sparkSchema; + this.isCaseSensitive = isCaseSensitive; + this.useFieldId = useFieldId; + this.ignoreMissingIds = ignoreMissingIds; + this.useLegacyDateTimestamp = useLegacyDateTimestamp; + this.partitionSchema = partitionSchema; + this.partitionValues = partitionValues; + this.file = inputSplit; + this.footer = footer; + this.metrics = metrics; + this.taskContext = TaskContext$.MODULE$.get(); + } + + /** + * Initialize this reader. The reason we don't do it in the constructor is that we want to close + * any resource hold by this reader when error happens during the initialization. + */ + public void init() throws URISyntaxException, IOException { + + useDecimal128 = + conf.getBoolean( + CometConf.COMET_USE_DECIMAL_128().key(), + (Boolean) CometConf.COMET_USE_DECIMAL_128().defaultValue().get()); + + long start = file.start(); + long length = file.length(); + String filePath = file.filePath().toString(); + long fileSize = file.fileSize(); + + requestedSchema = footer.getFileMetaData().getSchema(); + MessageType fileSchema = requestedSchema; + // TODO: (ARROW NATIVE) Get requested schema - Convert the Spark schema (from catalyst) into a + // list of fields to project (?). Fields must be matched by field id first and then by name + { //////// Get requested Schema - replace this block of code native (avoid reading the footer + ParquetReadOptions.Builder builder = HadoopReadOptions.builder(conf, new Path(filePath)); + + if (start >= 0 && length >= 0) { + builder = builder.withRange(start, start + length); + } + ParquetReadOptions readOptions = builder.build(); + + ReadOptions cometReadOptions = ReadOptions.builder(conf).build(); + + if (sparkSchema == null) { + sparkSchema = new ParquetToSparkSchemaConverter(conf).convert(requestedSchema); + } else { + requestedSchema = + CometParquetReadSupport.clipParquetSchema( + requestedSchema, sparkSchema, isCaseSensitive, useFieldId, ignoreMissingIds); + if (requestedSchema.getFieldCount() != sparkSchema.size()) { + throw new IllegalArgumentException( + String.format( + "Spark schema has %d columns while " + "Parquet schema has %d columns", + sparkSchema.size(), requestedSchema.getColumns().size())); + } + } + } ////// End get requested schema + + String timeZoneId = conf.get("spark.sql.session.timeZone"); + Schema arrowSchema = CometArrowUtils.toArrowSchema(sparkSchema, timeZoneId); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + WriteChannel writeChannel = new WriteChannel(Channels.newChannel(out)); + MessageSerializer.serialize(writeChannel, arrowSchema); + byte[] serializedRequestedArrowSchema = out.toByteArray(); + + //// Create Column readers + List columns = requestedSchema.getColumns(); + List fields = requestedSchema.getFields(); + int numColumns = fields.size(); + if (partitionSchema != null) numColumns += partitionSchema.size(); + columnReaders = new AbstractColumnReader[numColumns]; + + // Initialize missing columns and use null vectors for them + missingColumns = new boolean[columns.size()]; + List paths = requestedSchema.getPaths(); + StructField[] nonPartitionFields = sparkSchema.fields(); + // ShimFileFormat.findRowIndexColumnIndexInSchema(sparkSchema); + for (int i = 0; i < requestedSchema.getFieldCount(); i++) { + Type t = requestedSchema.getFields().get(i); + // Preconditions.checkState( + // t.isPrimitive() && !t.isRepetition(Type.Repetition.REPEATED), + // "Complex type is not supported"); + String[] colPath = paths.get(i); + if (nonPartitionFields[i].name().equals(ShimFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME())) { + // Values of ROW_INDEX_TEMPORARY_COLUMN_NAME column are always populated with + // generated row indexes, rather than read from the file. + // TODO(SPARK-40059): Allow users to include columns named + // FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME in their schemas. + // TODO: (ARROW NATIVE) Support row indices ... + // long[] rowIndices = fileReader.getRowIndices(); + // columnReaders[i] = new RowIndexColumnReader(nonPartitionFields[i], capacity, + // rowIndices); + missingColumns[i] = true; + } else if (fileSchema.containsPath(colPath)) { + ColumnDescriptor fd = fileSchema.getColumnDescription(colPath); + if (!fd.equals(columns.get(i))) { + throw new UnsupportedOperationException("Schema evolution is not supported"); + } + missingColumns[i] = false; + } else { + if (columns.get(i).getMaxDefinitionLevel() == 0) { + throw new IOException( + "Required column '" + + Arrays.toString(colPath) + + "' is missing" + + " in data file " + + filePath); + } + ConstantColumnReader reader = + new ConstantColumnReader(nonPartitionFields[i], capacity, useDecimal128); + columnReaders[i] = reader; + missingColumns[i] = true; + } + } + + // Initialize constant readers for partition columns + if (partitionSchema != null) { + StructField[] partitionFields = partitionSchema.fields(); + for (int i = columns.size(); i < columnReaders.length; i++) { + int fieldIndex = i - columns.size(); + StructField field = partitionFields[fieldIndex]; + ConstantColumnReader reader = + new ConstantColumnReader(field, capacity, partitionValues, fieldIndex, useDecimal128); + columnReaders[i] = reader; + } + } + + vectors = new CometVector[numColumns]; + currentBatch = new ColumnarBatch(vectors); + + // For test purpose only + // If the last external accumulator is `NumRowGroupsAccumulator`, the row group number to read + // will be updated to the accumulator. So we can check if the row groups are filtered or not + // in test case. + // Note that this tries to get thread local TaskContext object, if this is called at other + // thread, it won't update the accumulator. + if (taskContext != null) { + Option> accu = getTaskAccumulator(taskContext.taskMetrics()); + if (accu.isDefined() && accu.get().getClass().getSimpleName().equals("NumRowGroupsAcc")) { + @SuppressWarnings("unchecked") + AccumulatorV2 intAccum = (AccumulatorV2) accu.get(); + // TODO: Get num_row_groups from native + // intAccum.add(fileReader.getRowGroups().size()); + } + } + + this.handle = + Native.initRecordBatchReader( + filePath, fileSize, start, length, serializedRequestedArrowSchema, timeZoneId); + isInitialized = true; + } + + public void setSparkSchema(StructType schema) { + this.sparkSchema = schema; + } + + public AbstractColumnReader[] getColumnReaders() { + return columnReaders; + } + + @Override + public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) + throws IOException, InterruptedException { + // Do nothing. The initialization work is done in 'init' already. + } + + @Override + public boolean nextKeyValue() throws IOException { + return nextBatch(); + } + + @Override + public Void getCurrentKey() { + return null; + } + + @Override + public ColumnarBatch getCurrentValue() { + return currentBatch(); + } + + @Override + public float getProgress() { + return 0; + } + + /** + * Returns the current columnar batch being read. + * + *

Note that this must be called AFTER {@link NativeBatchReader#nextBatch()}. + */ + public ColumnarBatch currentBatch() { + return currentBatch; + } + + /** + * Loads the next batch of rows. This is called by Spark _and_ Iceberg + * + * @return true if there are no more rows to read, false otherwise. + */ + public boolean nextBatch() throws IOException { + Preconditions.checkState(isInitialized, "init() should be called first!"); + + // if (rowsRead >= totalRowCount) return false; + int batchSize; + + try { + batchSize = loadNextBatch(); + } catch (RuntimeException e) { + // Spark will check certain exception e.g. `SchemaColumnConvertNotSupportedException`. + throw e; + } catch (Throwable e) { + throw new IOException(e); + } + + if (batchSize == 0) return false; + + long totalDecodeTime = 0, totalLoadTime = 0; + for (int i = 0; i < columnReaders.length; i++) { + AbstractColumnReader reader = columnReaders[i]; + long startNs = System.nanoTime(); + // TODO: read from native reader + reader.readBatch(batchSize); + // totalDecodeTime += System.nanoTime() - startNs; + // startNs = System.nanoTime(); + vectors[i] = reader.currentBatch(); + totalLoadTime += System.nanoTime() - startNs; + } + + // TODO: (ARROW NATIVE) Add Metrics + // SQLMetric decodeMetric = metrics.get("ParquetNativeDecodeTime"); + // if (decodeMetric != null) { + // decodeMetric.add(totalDecodeTime); + // } + SQLMetric loadMetric = metrics.get("ParquetNativeLoadTime"); + if (loadMetric != null) { + loadMetric.add(totalLoadTime); + } + + currentBatch.setNumRows(batchSize); + return true; + } + + @Override + public void close() throws IOException { + if (columnReaders != null) { + for (AbstractColumnReader reader : columnReaders) { + if (reader != null) { + reader.close(); + } + } + } + if (importer != null) { + importer.close(); + importer = null; + } + nativeUtil.close(); + Native.closeRecordBatchReader(this.handle); + } + + @SuppressWarnings("deprecation") + private int loadNextBatch() throws Throwable { + long startNs = System.nanoTime(); + + int batchSize = Native.readNextRecordBatch(this.handle); + if (batchSize == 0) { + return batchSize; + } + if (importer != null) importer.close(); + importer = new CometSchemaImporter(ALLOCATOR); + + List columns = requestedSchema.getColumns(); + List fields = requestedSchema.getFields(); + for (int i = 0; i < fields.size(); i++) { + // TODO: (ARROW NATIVE) check this. Currently not handling missing columns correctly? + if (missingColumns[i]) continue; + if (columnReaders[i] != null) columnReaders[i].close(); + // TODO: (ARROW NATIVE) handle tz, datetime & int96 rebase + DataType dataType = sparkSchema.fields()[i].dataType(); + Type field = fields.get(i); + NativeColumnReader reader = + new NativeColumnReader( + this.handle, + i, + dataType, + field, + null, + importer, + nativeUtil, + capacity, + useDecimal128, + useLegacyDateTimestamp); + columnReaders[i] = reader; + } + return batchSize; + } + + // Signature of externalAccums changed from returning a Buffer to returning a Seq. If comet is + // expecting a Buffer but the Spark version returns a Seq or vice versa, we get a + // method not found exception. + @SuppressWarnings("unchecked") + private Option> getTaskAccumulator(TaskMetrics taskMetrics) { + Method externalAccumsMethod; + try { + externalAccumsMethod = TaskMetrics.class.getDeclaredMethod("externalAccums"); + externalAccumsMethod.setAccessible(true); + String returnType = externalAccumsMethod.getReturnType().getName(); + if (returnType.equals("scala.collection.mutable.Buffer")) { + return ((Buffer>) externalAccumsMethod.invoke(taskMetrics)) + .lastOption(); + } else if (returnType.equals("scala.collection.Seq")) { + return ((Seq>) externalAccumsMethod.invoke(taskMetrics)).lastOption(); + } else { + return Option.apply(null); // None + } + } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) { + return Option.apply(null); // None + } + } +} diff --git a/common/src/main/java/org/apache/comet/parquet/NativeColumnReader.java b/common/src/main/java/org/apache/comet/parquet/NativeColumnReader.java new file mode 100644 index 0000000000..c358999a54 --- /dev/null +++ b/common/src/main/java/org/apache/comet/parquet/NativeColumnReader.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.parquet; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.arrow.c.ArrowArray; +import org.apache.arrow.c.ArrowSchema; +import org.apache.arrow.c.CometSchemaImporter; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.schema.Type; +import org.apache.spark.sql.types.DataType; + +import org.apache.comet.vector.*; + +// TODO: extend ColumnReader instead of AbstractColumnReader to reduce code duplication +public class NativeColumnReader extends AbstractColumnReader { + protected static final Logger LOG = LoggerFactory.getLogger(NativeColumnReader.class); + protected final BufferAllocator ALLOCATOR = new RootAllocator(); + + /** + * The current Comet vector holding all the values read by this column reader. Owned by this + * reader and MUST be closed after use. + */ + private CometDecodedVector currentVector; + + /** Dictionary values for this column. Only set if the column is using dictionary encoding. */ + protected CometDictionary dictionary; + + /** + * The number of values in the current batch, used when we are skipping importing of Arrow + * vectors, in which case we'll simply update the null count of the existing vectors. + */ + int currentNumValues; + + /** + * Whether the last loaded vector contains any null value. This is used to determine if we can + * skip vector reloading. If the flag is false, Arrow C API will skip to import the validity + * buffer, and therefore we cannot skip vector reloading. + */ + boolean hadNull; + + private final CometSchemaImporter importer; + private final NativeUtil nativeUtil; + + private ArrowArray array = null; + private ArrowSchema schema = null; + + private long nativeBatchHandle = 0xDEADBEEFL; + private final int columnNum; + + public NativeColumnReader( + long nativeBatchHandle, + int columnNum, + DataType type, + Type fieldType, + ColumnDescriptor descriptor, + CometSchemaImporter importer, + NativeUtil nativeUtil, + int batchSize, + boolean useDecimal128, + boolean useLegacyDateTimestamp) { + super(type, fieldType, descriptor, useDecimal128, useLegacyDateTimestamp); + assert batchSize > 0 : "Batch size must be positive, found " + batchSize; + this.batchSize = batchSize; + this.nativeUtil = nativeUtil; + this.importer = importer; + this.nativeBatchHandle = nativeBatchHandle; + this.columnNum = columnNum; + initNative(); + } + + @Override + // Override in order to avoid creation of JVM side column readers + protected void initNative() { + LOG.debug( + "Native column reader {} is initialized", String.join(".", this.type.catalogString())); + nativeHandle = 0; + } + + @Override + public void readBatch(int total) { + LOG.debug("Reading column batch of size = {}", total); + + this.currentNumValues = total; + } + + /** Returns the {@link CometVector} read by this reader. */ + @Override + public CometVector currentBatch() { + return loadVector(); + } + + @Override + public void close() { + if (currentVector != null) { + currentVector.close(); + currentVector = null; + } + super.close(); + } + + /** Returns a decoded {@link CometDecodedVector Comet vector}. */ + public CometDecodedVector loadVector() { + + LOG.debug("Loading vector for next batch"); + + // Close the previous vector first to release struct memory allocated to import Arrow array & + // schema from native side, through the C data interface + if (currentVector != null) { + currentVector.close(); + } + + // TODO: ARROW NATIVE : Handle Uuid? + + array = ArrowArray.allocateNew(ALLOCATOR); + schema = ArrowSchema.allocateNew(ALLOCATOR); + + long arrayAddr = array.memoryAddress(); + long schemaAddr = schema.memoryAddress(); + + Native.currentColumnBatch(nativeBatchHandle, columnNum, arrayAddr, schemaAddr); + + ArrowArray[] arrays = {array}; + ArrowSchema[] schemas = {schema}; + + CometDecodedVector cometVector = + (CometDecodedVector) + scala.collection.JavaConverters.seqAsJavaList(nativeUtil.importVector(arrays, schemas)) + .get(0); + + // Update whether the current vector contains any null values. This is used in the following + // batch(s) to determine whether we can skip loading the native vector. + hadNull = cometVector.hasNull(); + + currentVector = cometVector; + return currentVector; + } +} diff --git a/common/src/main/java/org/apache/comet/vector/CometVector.java b/common/src/main/java/org/apache/comet/vector/CometVector.java index 3b0ca35bf9..6be8b28669 100644 --- a/common/src/main/java/org/apache/comet/vector/CometVector.java +++ b/common/src/main/java/org/apache/comet/vector/CometVector.java @@ -232,7 +232,7 @@ public DictionaryProvider getDictionaryProvider() { * @param useDecimal128 Whether to use Decimal128 for decimal column * @return `CometVector` implementation */ - protected static CometVector getVector( + public static CometVector getVector( ValueVector vector, boolean useDecimal128, DictionaryProvider dictionaryProvider) { if (vector instanceof StructVector) { return new CometStructVector(vector, useDecimal128, dictionaryProvider); diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala index 12e6f971f7..2e64403c64 100644 --- a/common/src/main/scala/org/apache/comet/CometConf.scala +++ b/common/src/main/scala/org/apache/comet/CometConf.scala @@ -77,6 +77,27 @@ object CometConf extends ShimCometConf { .booleanConf .createWithDefault(true) + val SCAN_NATIVE_COMET = "native_comet" + val SCAN_NATIVE_DATAFUSION = "native_datafusion" + val SCAN_NATIVE_ICEBERG_COMPAT = "native_iceberg_compat" + + val COMET_NATIVE_SCAN_IMPL: ConfigEntry[String] = conf("spark.comet.scan.impl") + .doc( + s"The implementation of Comet Native Scan to use. Available modes are '$SCAN_NATIVE_COMET'," + + s"'$SCAN_NATIVE_DATAFUSION', and '$SCAN_NATIVE_ICEBERG_COMPAT'. " + + s"'$SCAN_NATIVE_COMET' is for the original Comet native scan which uses a jvm based " + + "parquet file reader and native column decoding. Supports simple types only " + + s"'$SCAN_NATIVE_DATAFUSION' is a fully native implementation of scan based on DataFusion" + + s"'$SCAN_NATIVE_ICEBERG_COMPAT' is a native implementation that exposes apis to read " + + "parquet columns natively.") + .internal() + .stringConf + .transform(_.toLowerCase(Locale.ROOT)) + .checkValues(Set(SCAN_NATIVE_COMET, SCAN_NATIVE_DATAFUSION, SCAN_NATIVE_ICEBERG_COMPAT)) + .createWithDefault(sys.env + .getOrElse("COMET_PARQUET_SCAN_IMPL", SCAN_NATIVE_COMET) + .toLowerCase(Locale.ROOT)) + val COMET_PARQUET_PARALLEL_IO_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.parquet.read.parallel.io.enabled") .doc( diff --git a/common/src/main/scala/org/apache/spark/sql/comet/CometArrowUtils.scala b/common/src/main/scala/org/apache/spark/sql/comet/CometArrowUtils.scala new file mode 100644 index 0000000000..2f4f55fc0b --- /dev/null +++ b/common/src/main/scala/org/apache/spark/sql/comet/CometArrowUtils.scala @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.comet + +import scala.collection.JavaConverters._ + +import org.apache.arrow.memory.RootAllocator +import org.apache.arrow.vector.complex.MapVector +import org.apache.arrow.vector.types.{DateUnit, FloatingPointPrecision, IntervalUnit, TimeUnit} +import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ + +object CometArrowUtils { + + val rootAllocator = new RootAllocator(Long.MaxValue) + + // todo: support more types. + + /** Maps data type from Spark to Arrow. NOTE: timeZoneId required for TimestampTypes */ + def toArrowType(dt: DataType, timeZoneId: String): ArrowType = dt match { + case BooleanType => ArrowType.Bool.INSTANCE + case ByteType => new ArrowType.Int(8, true) + case ShortType => new ArrowType.Int(8 * 2, true) + case IntegerType => new ArrowType.Int(8 * 4, true) + case LongType => new ArrowType.Int(8 * 8, true) + case FloatType => new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + case DoubleType => new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) + case StringType => ArrowType.Utf8.INSTANCE + case BinaryType => ArrowType.Binary.INSTANCE + case DecimalType.Fixed(precision, scale) => new ArrowType.Decimal(precision, scale) + case DateType => new ArrowType.Date(DateUnit.DAY) + case TimestampType if timeZoneId == null => + throw new IllegalStateException("Missing timezoneId where it is mandatory.") + case TimestampType => new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZoneId) + case TimestampNTZType => + new ArrowType.Timestamp(TimeUnit.MICROSECOND, null) + case NullType => ArrowType.Null.INSTANCE + case _: YearMonthIntervalType => new ArrowType.Interval(IntervalUnit.YEAR_MONTH) + case _: DayTimeIntervalType => new ArrowType.Duration(TimeUnit.MICROSECOND) + case _ => + throw new IllegalArgumentException() + } + + def fromArrowType(dt: ArrowType): DataType = dt match { + case ArrowType.Bool.INSTANCE => BooleanType + case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 => ByteType + case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 2 => ShortType + case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 4 => IntegerType + case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 8 => LongType + case float: ArrowType.FloatingPoint + if float.getPrecision() == FloatingPointPrecision.SINGLE => + FloatType + case float: ArrowType.FloatingPoint + if float.getPrecision() == FloatingPointPrecision.DOUBLE => + DoubleType + case ArrowType.Utf8.INSTANCE => StringType + case ArrowType.Binary.INSTANCE => BinaryType + case d: ArrowType.Decimal => DecimalType(d.getPrecision, d.getScale) + case date: ArrowType.Date if date.getUnit == DateUnit.DAY => DateType + case ts: ArrowType.Timestamp + if ts.getUnit == TimeUnit.MICROSECOND && ts.getTimezone == null => + TimestampNTZType + case ts: ArrowType.Timestamp if ts.getUnit == TimeUnit.MICROSECOND => TimestampType + case ArrowType.Null.INSTANCE => NullType + case yi: ArrowType.Interval if yi.getUnit == IntervalUnit.YEAR_MONTH => + YearMonthIntervalType() + case di: ArrowType.Duration if di.getUnit == TimeUnit.MICROSECOND => DayTimeIntervalType() + case _ => throw new IllegalArgumentException() + // throw QueryExecutionErrors.unsupportedArrowTypeError(dt) + } + + /** Maps field from Spark to Arrow. NOTE: timeZoneId required for TimestampType */ + def toArrowField(name: String, dt: DataType, nullable: Boolean, timeZoneId: String): Field = { + dt match { + case ArrayType(elementType, containsNull) => + val fieldType = new FieldType(nullable, ArrowType.List.INSTANCE, null) + new Field( + name, + fieldType, + Seq(toArrowField("element", elementType, containsNull, timeZoneId)).asJava) + case StructType(fields) => + val fieldType = new FieldType(nullable, ArrowType.Struct.INSTANCE, null) + new Field( + name, + fieldType, + fields + .map { field => + toArrowField(field.name, field.dataType, field.nullable, timeZoneId) + } + .toSeq + .asJava) + case MapType(keyType, valueType, valueContainsNull) => + val mapType = new FieldType(nullable, new ArrowType.Map(false), null) + // Note: Map Type struct can not be null, Struct Type key field can not be null + new Field( + name, + mapType, + Seq( + toArrowField( + MapVector.DATA_VECTOR_NAME, + new StructType() + .add(MapVector.KEY_NAME, keyType, nullable = false) + .add(MapVector.VALUE_NAME, valueType, nullable = valueContainsNull), + nullable = false, + timeZoneId)).asJava) + case udt: UserDefinedType[_] => toArrowField(name, udt.sqlType, nullable, timeZoneId) + case dataType => + val fieldType = new FieldType(nullable, toArrowType(dataType, timeZoneId), null) + new Field(name, fieldType, Seq.empty[Field].asJava) + } + } + + def fromArrowField(field: Field): DataType = { + field.getType match { + case _: ArrowType.Map => + val elementField = field.getChildren.get(0) + val keyType = fromArrowField(elementField.getChildren.get(0)) + val valueType = fromArrowField(elementField.getChildren.get(1)) + MapType(keyType, valueType, elementField.getChildren.get(1).isNullable) + case ArrowType.List.INSTANCE => + val elementField = field.getChildren().get(0) + val elementType = fromArrowField(elementField) + ArrayType(elementType, containsNull = elementField.isNullable) + case ArrowType.Struct.INSTANCE => + val fields = field.getChildren().asScala.map { child => + val dt = fromArrowField(child) + StructField(child.getName, dt, child.isNullable) + } + StructType(fields.toArray) + case arrowType => fromArrowType(arrowType) + } + } + + /** + * Maps schema from Spark to Arrow. NOTE: timeZoneId required for TimestampType in StructType + */ + def toArrowSchema(schema: StructType, timeZoneId: String): Schema = { + new Schema(schema.map { field => + toArrowField(field.name, field.dataType, field.nullable, timeZoneId) + }.asJava) + } + + def fromArrowSchema(schema: Schema): StructType = { + StructType(schema.getFields.asScala.map { field => + val dt = fromArrowField(field) + StructField(field.getName, dt, field.isNullable) + }.toArray) + } + + /** Return Map with conf settings to be used in ArrowPythonRunner */ + def getPythonRunnerConfMap(conf: SQLConf): Map[String, String] = { + val timeZoneConf = Seq(SQLConf.SESSION_LOCAL_TIMEZONE.key -> conf.sessionLocalTimeZone) + val pandasColsByName = Seq( + SQLConf.PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME.key -> + conf.pandasGroupedMapAssignColumnsByName.toString) + val arrowSafeTypeCheck = Seq( + SQLConf.PANDAS_ARROW_SAFE_TYPE_CONVERSION.key -> + conf.arrowSafeTypeConversion.toString) + Map(timeZoneConf ++ pandasColsByName ++ arrowSafeTypeCheck: _*) + } + +} diff --git a/docs/source/contributor-guide/benchmarking.md b/docs/source/contributor-guide/benchmarking.md index 1193ada625..e2372b3d66 100644 --- a/docs/source/contributor-guide/benchmarking.md +++ b/docs/source/contributor-guide/benchmarking.md @@ -24,6 +24,10 @@ benchmarking documentation and scripts are available in the [DataFusion Benchmar We also have many micro benchmarks that can be run from an IDE located [here](https://github.com/apache/datafusion-comet/tree/main/spark/src/test/scala/org/apache/spark/sql/benchmark). +### TPC-DS + +For TPC-DS, use `spark.comet.exec.replaceSortMergeJoin=false`. + ## Current Benchmark Results - [Benchmarks derived from TPC-H](benchmark-results/tpc-h) diff --git a/native/Cargo.lock b/native/Cargo.lock index 70c8c4295f..918f94a254 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -888,6 +888,7 @@ dependencies = [ "assertables", "async-trait", "bytes", + "chrono", "crc32fast", "criterion", "datafusion", @@ -908,6 +909,7 @@ dependencies = [ "lz4_flex", "mimalloc", "num", + "object_store", "once_cell", "parquet", "paste", @@ -921,6 +923,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", + "url", "zstd 0.11.2+zstd.1.5.2", ] diff --git a/native/Cargo.toml b/native/Cargo.toml index ad5e1a141c..624d63ad2d 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -57,6 +57,8 @@ num = "0.4" rand = "0.8" regex = "1.9.6" thiserror = "1" +object_store = "0.11.0" +url = "2.2" [profile.release] debug = true diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml index 8937236dda..88c81be313 100644 --- a/native/core/Cargo.toml +++ b/native/core/Cargo.toml @@ -62,7 +62,7 @@ bytes = "1.5.0" tempfile = "3.8.0" itertools = "0.11.0" paste = "1.0.14" -datafusion-common = { workspace = true } +datafusion-common = { workspace = true, features= ["object_store"] } datafusion = { workspace = true } datafusion-functions-nested = { workspace = true } datafusion-expr = { workspace = true } @@ -74,6 +74,9 @@ crc32fast = "1.3.2" simd-adler32 = "0.3.7" datafusion-comet-spark-expr = { workspace = true } datafusion-comet-proto = { workspace = true } +object_store = { workspace = true } +url = { workspace = true } +chrono = { workspace = true } [dev-dependencies] pprof = { version = "0.13.0", features = ["flamegraph"] } diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs index b3c33b7948..fe29d8da14 100644 --- a/native/core/src/execution/jni_api.rs +++ b/native/core/src/execution/jni_api.rs @@ -77,6 +77,8 @@ struct ExecutionContext { pub task_attempt_id: i64, /// The deserialized Spark plan pub spark_plan: Operator, + /// The number of partitions + pub partition_count: usize, /// The DataFusion root operator converted from the `spark_plan` pub root_op: Option>, /// The input sources for the DataFusion plan @@ -156,6 +158,7 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan( id: jlong, iterators: jobjectArray, serialized_query: jbyteArray, + partition_count: jint, metrics_node: JObject, comet_task_memory_manager_obj: JObject, batch_size: jint, @@ -223,6 +226,7 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_createPlan( id, task_attempt_id, spark_plan, + partition_count: partition_count as usize, root_op: None, scans: vec![], input_sources, @@ -472,6 +476,7 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan( let (scans, root_op) = planner.create_plan( &exec_context.spark_plan, &mut exec_context.input_sources.clone(), + exec_context.partition_count, )?; let physical_plan_time = start.elapsed(); @@ -491,7 +496,7 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan( .as_ref() .unwrap() .native_plan - .execute(0, task_ctx)?; + .execute(partition as usize, task_ctx)?; exec_context.stream = Some(stream); } else { // Pull input batches diff --git a/native/core/src/execution/operators/filter.rs b/native/core/src/execution/operators/filter.rs index eab30a3560..0312f869e7 100644 --- a/native/core/src/execution/operators/filter.rs +++ b/native/core/src/execution/operators/filter.rs @@ -228,7 +228,7 @@ impl DisplayAs for FilterExec { impl ExecutionPlan for FilterExec { fn name(&self) -> &'static str { - "FilterExec" + "CometFilterExec" } /// Return a reference to Any that can be used for downcasting diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index 27a0ad58e7..ccf7e31e4e 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -71,6 +71,11 @@ use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctio use crate::execution::shuffle::CompressionCodec; use crate::execution::spark_plan::SparkPlan; +use crate::parquet::parquet_support::SparkParquetOptions; +use crate::parquet::schema_adapter::SparkSchemaAdapterFactory; +use datafusion::datasource::listing::PartitionedFile; +use datafusion::datasource::physical_plan::parquet::ParquetExecBuilder; +use datafusion::datasource::physical_plan::FileScanConfig; use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec; use datafusion_comet_proto::{ spark_expression::{ @@ -91,11 +96,13 @@ use datafusion_comet_spark_expr::{ SparkCastOptions, StartsWith, Stddev, StringSpaceExpr, SubstringExpr, SumDecimal, TimestampTruncExpr, ToJson, UnboundColumn, Variance, }; +use datafusion_common::config::TableParquetOptions; use datafusion_common::scalar::ScalarStructBuilder; use datafusion_common::{ tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter}, JoinType as DFJoinType, ScalarValue, }; +use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_expr::{ AggregateUDF, ScalarUDF, WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, @@ -107,8 +114,10 @@ use datafusion_physical_expr::LexOrdering; use itertools::Itertools; use jni::objects::GlobalRef; use num::{BigInt, ToPrimitive}; +use object_store::path::Path; use std::cmp::max; use std::{collections::HashMap, sync::Arc}; +use url::Url; // For clippy error on type_complexity. type PhyAggResult = Result, ExecutionError>; @@ -897,12 +906,13 @@ impl PhysicalPlanner { &'a self, spark_plan: &'a Operator, inputs: &mut Vec>, + partition_count: usize, ) -> Result<(Vec, Arc), ExecutionError> { let children = &spark_plan.children; match spark_plan.op_struct.as_ref().unwrap() { OpStruct::Projection(project) => { assert!(children.len() == 1); - let (scans, child) = self.create_plan(&children[0], inputs)?; + let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; let exprs: PhyExprResult = project .project_list .iter() @@ -923,7 +933,7 @@ impl PhysicalPlanner { } OpStruct::Filter(filter) => { assert!(children.len() == 1); - let (scans, child) = self.create_plan(&children[0], inputs)?; + let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; let predicate = self.create_expr(filter.predicate.as_ref().unwrap(), child.schema())?; @@ -938,7 +948,7 @@ impl PhysicalPlanner { } OpStruct::HashAgg(agg) => { assert!(children.len() == 1); - let (scans, child) = self.create_plan(&children[0], inputs)?; + let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; let group_exprs: PhyExprResult = agg .grouping_exprs @@ -1017,7 +1027,7 @@ impl PhysicalPlanner { OpStruct::Limit(limit) => { assert!(children.len() == 1); let num = limit.limit; - let (scans, child) = self.create_plan(&children[0], inputs)?; + let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; let limit = Arc::new(LocalLimitExec::new( Arc::clone(&child.native_plan), @@ -1030,7 +1040,7 @@ impl PhysicalPlanner { } OpStruct::Sort(sort) => { assert!(children.len() == 1); - let (scans, child) = self.create_plan(&children[0], inputs)?; + let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; let exprs: Result, ExecutionError> = sort .sort_orders @@ -1060,6 +1070,148 @@ impl PhysicalPlanner { )), )) } + OpStruct::NativeScan(scan) => { + let data_schema = convert_spark_types_to_arrow_schema(scan.data_schema.as_slice()); + let required_schema: SchemaRef = + convert_spark_types_to_arrow_schema(scan.required_schema.as_slice()); + let partition_schema: SchemaRef = + convert_spark_types_to_arrow_schema(scan.partition_schema.as_slice()); + let projection_vector: Vec = scan + .projection_vector + .iter() + .map(|offset| *offset as usize) + .collect(); + + // Convert the Spark expressions to Physical expressions + let data_filters: Result>, ExecutionError> = scan + .data_filters + .iter() + .map(|expr| self.create_expr(expr, Arc::clone(&required_schema))) + .collect(); + + // Create a conjunctive form of the vector because ParquetExecBuilder takes + // a single expression + let data_filters = data_filters?; + let cnf_data_filters = data_filters.clone().into_iter().reduce(|left, right| { + Arc::new(BinaryExpr::new( + left, + datafusion::logical_expr::Operator::And, + right, + )) + }); + + let object_store = object_store::local::LocalFileSystem::new(); + // register the object store with the runtime environment + let url = Url::try_from("file://").unwrap(); + self.session_ctx + .runtime_env() + .register_object_store(&url, Arc::new(object_store)); + + // Generate file groups + let mut file_groups: Vec> = + Vec::with_capacity(partition_count); + scan.file_partitions.iter().try_for_each(|partition| { + let mut files = Vec::with_capacity(partition.partitioned_file.len()); + partition.partitioned_file.iter().try_for_each(|file| { + assert!(file.start + file.length <= file.file_size); + + let mut partitioned_file = PartitionedFile::new_with_range( + String::new(), // Dummy file path. + file.file_size as u64, + file.start, + file.start + file.length, + ); + + // Spark sends the path over as URL-encoded, parse that first. + let url = Url::parse(file.file_path.as_ref()).unwrap(); + // Convert that to a Path object to use in the PartitionedFile. + let path = Path::from_url_path(url.path()).unwrap(); + partitioned_file.object_meta.location = path; + + // Process partition values + // Create an empty input schema for partition values because they are all literals. + let empty_schema = Arc::new(Schema::empty()); + let partition_values: Result, _> = file + .partition_values + .iter() + .map(|partition_value| { + let literal = self.create_expr( + partition_value, + Arc::::clone(&empty_schema), + )?; + literal + .as_any() + .downcast_ref::() + .ok_or_else(|| { + ExecutionError::GeneralError( + "Expected literal of partition value".to_string(), + ) + }) + .map(|literal| literal.value().clone()) + }) + .collect(); + let partition_values = partition_values?; + + partitioned_file.partition_values = partition_values; + + files.push(partitioned_file); + Ok::<(), ExecutionError>(()) + })?; + + file_groups.push(files); + Ok::<(), ExecutionError>(()) + })?; + + // TODO: I think we can remove partition_count in the future, but leave for testing. + assert_eq!(file_groups.len(), partition_count); + + let object_store_url = ObjectStoreUrl::local_filesystem(); + let partition_fields: Vec = partition_schema + .fields() + .iter() + .map(|field| { + Field::new(field.name(), field.data_type().clone(), field.is_nullable()) + }) + .collect_vec(); + let mut file_scan_config = + FileScanConfig::new(object_store_url, Arc::clone(&data_schema)) + .with_file_groups(file_groups) + .with_table_partition_cols(partition_fields); + + assert_eq!( + projection_vector.len(), + required_schema.fields.len() + partition_schema.fields.len() + ); + file_scan_config = file_scan_config.with_projection(Some(projection_vector)); + + let mut table_parquet_options = TableParquetOptions::new(); + // TODO: Maybe these are configs? + table_parquet_options.global.pushdown_filters = true; + table_parquet_options.global.reorder_filters = true; + + let mut spark_parquet_options = SparkParquetOptions::new( + EvalMode::Legacy, + scan.session_timezone.as_str(), + false, + ); + spark_parquet_options.allow_cast_unsigned_ints = true; + + let mut builder = ParquetExecBuilder::new(file_scan_config) + .with_table_parquet_options(table_parquet_options) + .with_schema_adapter_factory(Arc::new(SparkSchemaAdapterFactory::new( + spark_parquet_options, + ))); + + if let Some(filter) = cnf_data_filters { + builder = builder.with_predicate(filter); + } + + let scan = builder.build(); + Ok(( + vec![], + Arc::new(SparkPlan::new(spark_plan.plan_id, Arc::new(scan), vec![])), + )) + } OpStruct::Scan(scan) => { let data_types = scan.fields.iter().map(to_arrow_datatype).collect_vec(); @@ -1090,7 +1242,7 @@ impl PhysicalPlanner { } OpStruct::ShuffleWriter(writer) => { assert!(children.len() == 1); - let (scans, child) = self.create_plan(&children[0], inputs)?; + let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; let partitioning = self .create_partitioning(writer.partitioning.as_ref().unwrap(), child.schema())?; @@ -1128,7 +1280,7 @@ impl PhysicalPlanner { } OpStruct::Expand(expand) => { assert!(children.len() == 1); - let (scans, child) = self.create_plan(&children[0], inputs)?; + let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; let mut projections = vec![]; let mut projection = vec![]; @@ -1189,6 +1341,7 @@ impl PhysicalPlanner { &join.right_join_keys, join.join_type, &join.condition, + partition_count, )?; let sort_options = join @@ -1262,6 +1415,7 @@ impl PhysicalPlanner { &join.right_join_keys, join.join_type, &join.condition, + partition_count, )?; // HashJoinExec may cache the input batch internally. We need @@ -1316,7 +1470,7 @@ impl PhysicalPlanner { } } OpStruct::Window(wnd) => { - let (scans, child) = self.create_plan(&children[0], inputs)?; + let (scans, child) = self.create_plan(&children[0], inputs, partition_count)?; let input_schema = child.schema(); let sort_exprs: Result, ExecutionError> = wnd .order_by_list @@ -1369,10 +1523,11 @@ impl PhysicalPlanner { right_join_keys: &[Expr], join_type: i32, condition: &Option, + partition_count: usize, ) -> Result<(JoinParameters, Vec), ExecutionError> { assert!(children.len() == 2); - let (mut left_scans, left) = self.create_plan(&children[0], inputs)?; - let (mut right_scans, right) = self.create_plan(&children[1], inputs)?; + let (mut left_scans, left) = self.create_plan(&children[0], inputs, partition_count)?; + let (mut right_scans, right) = self.create_plan(&children[1], inputs, partition_count)?; left_scans.append(&mut right_scans); @@ -2325,6 +2480,23 @@ fn from_protobuf_eval_mode(value: i32) -> Result { } } +fn convert_spark_types_to_arrow_schema( + spark_types: &[spark_operator::SparkStructField], +) -> SchemaRef { + let arrow_fields = spark_types + .iter() + .map(|spark_type| { + Field::new( + String::clone(&spark_type.name), + to_arrow_datatype(spark_type.data_type.as_ref().unwrap()), + spark_type.nullable, + ) + }) + .collect_vec(); + let arrow_schema: SchemaRef = Arc::new(Schema::new(arrow_fields)); + arrow_schema +} + #[cfg(test)] mod tests { use std::{sync::Arc, task::Poll}; @@ -2371,7 +2543,7 @@ mod tests { let input_array = DictionaryArray::new(keys, Arc::new(values)); let input_batch = InputBatch::Batch(vec![Arc::new(input_array)], row_count); - let (mut scans, datafusion_plan) = planner.create_plan(&op, &mut vec![]).unwrap(); + let (mut scans, datafusion_plan) = planner.create_plan(&op, &mut vec![], 1).unwrap(); scans[0].set_input_batch(input_batch); let session_ctx = SessionContext::new(); @@ -2453,7 +2625,7 @@ mod tests { let input_array = DictionaryArray::new(keys, Arc::new(values)); let input_batch = InputBatch::Batch(vec![Arc::new(input_array)], row_count); - let (mut scans, datafusion_plan) = planner.create_plan(&op, &mut vec![]).unwrap(); + let (mut scans, datafusion_plan) = planner.create_plan(&op, &mut vec![], 1).unwrap(); // Scan's schema is determined by the input batch, so we need to set it before execution. scans[0].set_input_batch(input_batch); @@ -2513,7 +2685,7 @@ mod tests { let op = create_filter(op_scan, 0); let planner = PhysicalPlanner::default(); - let (mut scans, datafusion_plan) = planner.create_plan(&op, &mut vec![]).unwrap(); + let (mut scans, datafusion_plan) = planner.create_plan(&op, &mut vec![], 1).unwrap(); let scan = &mut scans[0]; scan.set_input_batch(InputBatch::EOF); @@ -2592,9 +2764,9 @@ mod tests { let op = create_filter(op_scan, 0); let planner = PhysicalPlanner::default(); - let (mut _scans, filter_exec) = planner.create_plan(&op, &mut vec![]).unwrap(); + let (mut _scans, filter_exec) = planner.create_plan(&op, &mut vec![], 1).unwrap(); - assert_eq!("FilterExec", filter_exec.native_plan.name()); + assert_eq!("CometFilterExec", filter_exec.native_plan.name()); assert_eq!(1, filter_exec.children.len()); assert_eq!(0, filter_exec.additional_native_plans.len()); } @@ -2616,7 +2788,7 @@ mod tests { let planner = PhysicalPlanner::default(); - let (_scans, hash_join_exec) = planner.create_plan(&op_join, &mut vec![]).unwrap(); + let (_scans, hash_join_exec) = planner.create_plan(&op_join, &mut vec![], 1).unwrap(); assert_eq!("HashJoinExec", hash_join_exec.native_plan.name()); assert_eq!(2, hash_join_exec.children.len()); diff --git a/native/core/src/parquet/mod.rs b/native/core/src/parquet/mod.rs index 6bea31f441..b0a74864cf 100644 --- a/native/core/src/parquet/mod.rs +++ b/native/core/src/parquet/mod.rs @@ -21,8 +21,11 @@ pub use mutable_vector::*; #[macro_use] pub mod util; +pub mod parquet_support; pub mod read; +pub mod schema_adapter; +use std::task::Poll; use std::{boxed::Box, ptr::NonNull, sync::Arc}; use crate::errors::{try_unwrap_or_throw, CometError}; @@ -39,14 +42,26 @@ use jni::{ }, }; +use self::util::jni::TypePromotionInfo; +use crate::execution::operators::ExecutionError; use crate::execution::utils::SparkArrowConvert; +use crate::parquet::data_type::AsBytes; +use crate::parquet::parquet_support::SparkParquetOptions; +use crate::parquet::schema_adapter::SparkSchemaAdapterFactory; use arrow::buffer::{Buffer, MutableBuffer}; -use jni::objects::{JBooleanArray, JLongArray, JPrimitiveArray, ReleaseMode}; +use arrow_array::{Array, RecordBatch}; +use datafusion::datasource::listing::PartitionedFile; +use datafusion::datasource::physical_plan::parquet::ParquetExecBuilder; +use datafusion::datasource::physical_plan::FileScanConfig; +use datafusion::physical_plan::ExecutionPlan; +use datafusion_comet_spark_expr::EvalMode; +use datafusion_common::config::TableParquetOptions; +use datafusion_execution::{SendableRecordBatchStream, TaskContext}; +use futures::{poll, StreamExt}; +use jni::objects::{JBooleanArray, JByteArray, JLongArray, JPrimitiveArray, JString, ReleaseMode}; +use jni::sys::jstring; use read::ColumnReader; -use util::jni::{convert_column_descriptor, convert_encoding}; - -use self::util::jni::TypePromotionInfo; - +use util::jni::{convert_column_descriptor, convert_encoding, deserialize_schema, get_file_path}; /// Parquet read context maintained across multiple JNI calls. struct Context { pub column_reader: ColumnReader, @@ -580,3 +595,205 @@ fn from_u8_slice(src: &mut [u8]) -> &mut [i8] { let raw_ptr = src.as_mut_ptr() as *mut i8; unsafe { std::slice::from_raw_parts_mut(raw_ptr, src.len()) } } + +// TODO: (ARROW NATIVE) remove this if not needed. +enum ParquetReaderState { + Init, + Reading, + Complete, +} +/// Parquet read context maintained across multiple JNI calls. +struct BatchContext { + runtime: tokio::runtime::Runtime, + batch_stream: Option, + current_batch: Option, + reader_state: ParquetReaderState, +} + +#[inline] +fn get_batch_context<'a>(handle: jlong) -> Result<&'a mut BatchContext, CometError> { + unsafe { + (handle as *mut BatchContext) + .as_mut() + .ok_or_else(|| CometError::NullPointer("null batch context handle".to_string())) + } +} + +/* +#[inline] +fn get_batch_reader<'a>(handle: jlong) -> Result<&'a mut ParquetRecordBatchReader, CometError> { + Ok(&mut get_batch_context(handle)?.batch_reader.unwrap()) +} +*/ + +/// # Safety +/// This function is inherently unsafe since it deals with raw pointers passed from JNI. +#[no_mangle] +pub unsafe extern "system" fn Java_org_apache_comet_parquet_Native_initRecordBatchReader( + e: JNIEnv, + _jclass: JClass, + file_path: jstring, + file_size: jlong, + start: jlong, + length: jlong, + required_schema: jbyteArray, + session_timezone: jstring, +) -> jlong { + try_unwrap_or_throw(&e, |mut env| unsafe { + let path: String = env + .get_string(&JString::from_raw(file_path)) + .unwrap() + .into(); + let batch_stream: Option; + // TODO: (ARROW NATIVE) Use the common global runtime + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; + + // EXPERIMENTAL - BEGIN + //TODO: Need an execution context and a spark plan equivalent so that we can reuse + // code from jni_api.rs + let (object_store_url, object_store_path) = get_file_path(path.clone()).unwrap(); + // TODO: (ARROW NATIVE) - Remove code duplication between this and POC 1 + // copy the input on-heap buffer to native + let required_schema_array = JByteArray::from_raw(required_schema); + let required_schema_buffer = env.convert_byte_array(&required_schema_array)?; + let required_schema_arrow = deserialize_schema(required_schema_buffer.as_bytes())?; + let mut partitioned_file = PartitionedFile::new_with_range( + String::new(), // Dummy file path. We will override this with our path so that url encoding does not occur + file_size as u64, + start, + start + length, + ); + partitioned_file.object_meta.location = object_store_path; + // We build the file scan config with the *required* schema so that the reader knows + // the output schema we want + let file_scan_config = FileScanConfig::new(object_store_url, Arc::new(required_schema_arrow)) + .with_file(partitioned_file) + // TODO: (ARROW NATIVE) - do partition columns in native + // - will need partition schema and partition values to do so + // .with_table_partition_cols(partition_fields) + ; + let mut table_parquet_options = TableParquetOptions::new(); + // TODO: Maybe these are configs? + table_parquet_options.global.pushdown_filters = true; + table_parquet_options.global.reorder_filters = true; + let session_timezone: String = env + .get_string(&JString::from_raw(session_timezone)) + .unwrap() + .into(); + + let mut spark_parquet_options = + SparkParquetOptions::new(EvalMode::Legacy, session_timezone.as_str(), false); + spark_parquet_options.allow_cast_unsigned_ints = true; + + let builder2 = ParquetExecBuilder::new(file_scan_config) + .with_table_parquet_options(table_parquet_options) + .with_schema_adapter_factory(Arc::new(SparkSchemaAdapterFactory::new( + spark_parquet_options, + ))); + + //TODO: (ARROW NATIVE) - predicate pushdown?? + // builder = builder.with_predicate(filter); + + let scan = builder2.build(); + let ctx = TaskContext::default(); + let partition_index: usize = 0; + batch_stream = Some(scan.execute(partition_index, Arc::new(ctx))?); + + // EXPERIMENTAL - END + + let ctx = BatchContext { + runtime, + batch_stream, + current_batch: None, + reader_state: ParquetReaderState::Init, + }; + let res = Box::new(ctx); + Ok(Box::into_raw(res) as i64) + }) +} + +#[no_mangle] +pub extern "system" fn Java_org_apache_comet_parquet_Native_readNextRecordBatch( + e: JNIEnv, + _jclass: JClass, + handle: jlong, +) -> jint { + try_unwrap_or_throw(&e, |_env| { + let context = get_batch_context(handle)?; + let mut rows_read: i32 = 0; + let batch_stream = context.batch_stream.as_mut().unwrap(); + let runtime = &context.runtime; + + loop { + let next_item = batch_stream.next(); + let poll_batch: Poll>> = + runtime.block_on(async { poll!(next_item) }); + + match poll_batch { + Poll::Ready(Some(batch)) => { + let batch = batch?; + rows_read = batch.num_rows() as i32; + context.current_batch = Some(batch); + context.reader_state = ParquetReaderState::Reading; + break; + } + Poll::Ready(None) => { + // EOF + + // TODO: (ARROW NATIVE) We can update metrics here + // crate::execution::jni_api::update_metrics(&mut env, exec_context)?; + + context.current_batch = None; + context.reader_state = ParquetReaderState::Complete; + break; + } + Poll::Pending => { + // TODO: (ARROW NATIVE): Just keeping polling?? + // Ideally we want to yield to avoid consuming CPU while blocked on IO ?? + continue; + } + } + } + Ok(rows_read) + }) +} + +#[no_mangle] +pub extern "system" fn Java_org_apache_comet_parquet_Native_currentColumnBatch( + e: JNIEnv, + _jclass: JClass, + handle: jlong, + column_idx: jint, + array_addr: jlong, + schema_addr: jlong, +) { + try_unwrap_or_throw(&e, |_env| { + let context = get_batch_context(handle)?; + let batch_reader = context + .current_batch + .as_mut() + .ok_or_else(|| CometError::Execution { + source: ExecutionError::GeneralError("There is no more data to read".to_string()), + }); + let data = batch_reader?.column(column_idx as usize).into_data(); + data.move_to_spark(array_addr, schema_addr) + .map_err(|e| e.into()) + }) +} + +#[no_mangle] +pub extern "system" fn Java_org_apache_comet_parquet_Native_closeRecordBatchReader( + env: JNIEnv, + _jclass: JClass, + handle: jlong, +) { + try_unwrap_or_throw(&env, |_| { + unsafe { + let ctx = handle as *mut BatchContext; + let _ = Box::from_raw(ctx); + }; + Ok(()) + }) +} diff --git a/native/core/src/parquet/parquet_support.rs b/native/core/src/parquet/parquet_support.rs new file mode 100644 index 0000000000..248f2babd6 --- /dev/null +++ b/native/core/src/parquet/parquet_support.rs @@ -0,0 +1,2338 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::{ + array::{ + cast::AsArray, + types::{Date32Type, Int16Type, Int32Type, Int8Type}, + Array, ArrayRef, BooleanArray, Decimal128Array, Float32Array, Float64Array, + GenericStringArray, Int16Array, Int32Array, Int64Array, Int8Array, OffsetSizeTrait, + PrimitiveArray, + }, + compute::{cast_with_options, take, unary, CastOptions}, + datatypes::{ + ArrowPrimitiveType, Decimal128Type, DecimalType, Float32Type, Float64Type, Int64Type, + TimestampMicrosecondType, + }, + error::ArrowError, + util::display::FormatOptions, +}; +use arrow_array::builder::StringBuilder; +use arrow_array::{DictionaryArray, StringArray, StructArray}; +use arrow_schema::DataType; +use chrono::{NaiveDate, NaiveDateTime, TimeZone, Timelike}; +use datafusion_comet_spark_expr::utils::array_with_timezone; +use datafusion_comet_spark_expr::{timezone, EvalMode, SparkError, SparkResult}; +use datafusion_common::{cast::as_generic_string_array, Result as DataFusionResult, ScalarValue}; +use datafusion_expr::ColumnarValue; +// use datafusion_physical_expr::PhysicalExpr; +use num::{ + cast::AsPrimitive, integer::div_floor, traits::CheckedNeg, CheckedSub, Integer, Num, + ToPrimitive, +}; +use regex::Regex; +use std::collections::HashMap; +use std::str::FromStr; +use std::{fmt::Debug, hash::Hash, num::Wrapping, sync::Arc}; + +static TIMESTAMP_FORMAT: Option<&str> = Some("%Y-%m-%d %H:%M:%S%.f"); + +const MICROS_PER_SECOND: i64 = 1000000; + +static PARQUET_OPTIONS: CastOptions = CastOptions { + safe: true, + format_options: FormatOptions::new() + .with_timestamp_tz_format(TIMESTAMP_FORMAT) + .with_timestamp_format(TIMESTAMP_FORMAT), +}; + +struct TimeStampInfo { + year: i32, + month: u32, + day: u32, + hour: u32, + minute: u32, + second: u32, + microsecond: u32, +} + +impl Default for TimeStampInfo { + fn default() -> Self { + TimeStampInfo { + year: 1, + month: 1, + day: 1, + hour: 0, + minute: 0, + second: 0, + microsecond: 0, + } + } +} + +impl TimeStampInfo { + pub fn with_year(&mut self, year: i32) -> &mut Self { + self.year = year; + self + } + + pub fn with_month(&mut self, month: u32) -> &mut Self { + self.month = month; + self + } + + pub fn with_day(&mut self, day: u32) -> &mut Self { + self.day = day; + self + } + + pub fn with_hour(&mut self, hour: u32) -> &mut Self { + self.hour = hour; + self + } + + pub fn with_minute(&mut self, minute: u32) -> &mut Self { + self.minute = minute; + self + } + + pub fn with_second(&mut self, second: u32) -> &mut Self { + self.second = second; + self + } + + pub fn with_microsecond(&mut self, microsecond: u32) -> &mut Self { + self.microsecond = microsecond; + self + } +} + +macro_rules! cast_utf8_to_int { + ($array:expr, $eval_mode:expr, $array_type:ty, $cast_method:ident) => {{ + let len = $array.len(); + let mut cast_array = PrimitiveArray::<$array_type>::builder(len); + for i in 0..len { + if $array.is_null(i) { + cast_array.append_null() + } else if let Some(cast_value) = $cast_method($array.value(i), $eval_mode)? { + cast_array.append_value(cast_value); + } else { + cast_array.append_null() + } + } + let result: SparkResult = Ok(Arc::new(cast_array.finish()) as ArrayRef); + result + }}; +} +macro_rules! cast_utf8_to_timestamp { + ($array:expr, $eval_mode:expr, $array_type:ty, $cast_method:ident, $tz:expr) => {{ + let len = $array.len(); + let mut cast_array = PrimitiveArray::<$array_type>::builder(len).with_timezone("UTC"); + for i in 0..len { + if $array.is_null(i) { + cast_array.append_null() + } else if let Ok(Some(cast_value)) = + $cast_method($array.value(i).trim(), $eval_mode, $tz) + { + cast_array.append_value(cast_value); + } else { + cast_array.append_null() + } + } + let result: ArrayRef = Arc::new(cast_array.finish()) as ArrayRef; + result + }}; +} + +macro_rules! cast_float_to_string { + ($from:expr, $eval_mode:expr, $type:ty, $output_type:ty, $offset_type:ty) => {{ + + fn cast( + from: &dyn Array, + _eval_mode: EvalMode, + ) -> SparkResult + where + OffsetSize: OffsetSizeTrait, { + let array = from.as_any().downcast_ref::<$output_type>().unwrap(); + + // If the absolute number is less than 10,000,000 and greater or equal than 0.001, the + // result is expressed without scientific notation with at least one digit on either side of + // the decimal point. Otherwise, Spark uses a mantissa followed by E and an + // exponent. The mantissa has an optional leading minus sign followed by one digit to the + // left of the decimal point, and the minimal number of digits greater than zero to the + // right. The exponent has and optional leading minus sign. + // source: https://docs.databricks.com/en/sql/language-manual/functions/cast.html + + const LOWER_SCIENTIFIC_BOUND: $type = 0.001; + const UPPER_SCIENTIFIC_BOUND: $type = 10000000.0; + + let output_array = array + .iter() + .map(|value| match value { + Some(value) if value == <$type>::INFINITY => Ok(Some("Infinity".to_string())), + Some(value) if value == <$type>::NEG_INFINITY => Ok(Some("-Infinity".to_string())), + Some(value) + if (value.abs() < UPPER_SCIENTIFIC_BOUND + && value.abs() >= LOWER_SCIENTIFIC_BOUND) + || value.abs() == 0.0 => + { + let trailing_zero = if value.fract() == 0.0 { ".0" } else { "" }; + + Ok(Some(format!("{value}{trailing_zero}"))) + } + Some(value) + if value.abs() >= UPPER_SCIENTIFIC_BOUND + || value.abs() < LOWER_SCIENTIFIC_BOUND => + { + let formatted = format!("{value:E}"); + + if formatted.contains(".") { + Ok(Some(formatted)) + } else { + // `formatted` is already in scientific notation and can be split up by E + // in order to add the missing trailing 0 which gets removed for numbers with a fraction of 0.0 + let prepare_number: Vec<&str> = formatted.split("E").collect(); + + let coefficient = prepare_number[0]; + + let exponent = prepare_number[1]; + + Ok(Some(format!("{coefficient}.0E{exponent}"))) + } + } + Some(value) => Ok(Some(value.to_string())), + _ => Ok(None), + }) + .collect::, SparkError>>()?; + + Ok(Arc::new(output_array)) + } + + cast::<$offset_type>($from, $eval_mode) + }}; +} + +macro_rules! cast_int_to_int_macro { + ( + $array: expr, + $eval_mode:expr, + $from_arrow_primitive_type: ty, + $to_arrow_primitive_type: ty, + $from_data_type: expr, + $to_native_type: ty, + $spark_from_data_type_name: expr, + $spark_to_data_type_name: expr + ) => {{ + let cast_array = $array + .as_any() + .downcast_ref::>() + .unwrap(); + let spark_int_literal_suffix = match $from_data_type { + &DataType::Int64 => "L", + &DataType::Int16 => "S", + &DataType::Int8 => "T", + _ => "", + }; + + let output_array = match $eval_mode { + EvalMode::Legacy => cast_array + .iter() + .map(|value| match value { + Some(value) => { + Ok::, SparkError>(Some(value as $to_native_type)) + } + _ => Ok(None), + }) + .collect::, _>>(), + _ => cast_array + .iter() + .map(|value| match value { + Some(value) => { + let res = <$to_native_type>::try_from(value); + if res.is_err() { + Err(cast_overflow( + &(value.to_string() + spark_int_literal_suffix), + $spark_from_data_type_name, + $spark_to_data_type_name, + )) + } else { + Ok::, SparkError>(Some(res.unwrap())) + } + } + _ => Ok(None), + }) + .collect::, _>>(), + }?; + let result: SparkResult = Ok(Arc::new(output_array) as ArrayRef); + result + }}; +} + +// When Spark casts to Byte/Short Types, it does not cast directly to Byte/Short. +// It casts to Int first and then to Byte/Short. Because of potential overflows in the Int cast, +// this can cause unexpected Short/Byte cast results. Replicate this behavior. +macro_rules! cast_float_to_int16_down { + ( + $array:expr, + $eval_mode:expr, + $src_array_type:ty, + $dest_array_type:ty, + $rust_src_type:ty, + $rust_dest_type:ty, + $src_type_str:expr, + $dest_type_str:expr, + $format_str:expr + ) => {{ + let cast_array = $array + .as_any() + .downcast_ref::<$src_array_type>() + .expect(concat!("Expected a ", stringify!($src_array_type))); + + let output_array = match $eval_mode { + EvalMode::Ansi => cast_array + .iter() + .map(|value| match value { + Some(value) => { + let is_overflow = value.is_nan() || value.abs() as i32 == i32::MAX; + if is_overflow { + return Err(cast_overflow( + &format!($format_str, value).replace("e", "E"), + $src_type_str, + $dest_type_str, + )); + } + let i32_value = value as i32; + <$rust_dest_type>::try_from(i32_value) + .map_err(|_| { + cast_overflow( + &format!($format_str, value).replace("e", "E"), + $src_type_str, + $dest_type_str, + ) + }) + .map(Some) + } + None => Ok(None), + }) + .collect::>()?, + _ => cast_array + .iter() + .map(|value| match value { + Some(value) => { + let i32_value = value as i32; + Ok::, SparkError>(Some( + i32_value as $rust_dest_type, + )) + } + None => Ok(None), + }) + .collect::>()?, + }; + Ok(Arc::new(output_array) as ArrayRef) + }}; +} + +macro_rules! cast_float_to_int32_up { + ( + $array:expr, + $eval_mode:expr, + $src_array_type:ty, + $dest_array_type:ty, + $rust_src_type:ty, + $rust_dest_type:ty, + $src_type_str:expr, + $dest_type_str:expr, + $max_dest_val:expr, + $format_str:expr + ) => {{ + let cast_array = $array + .as_any() + .downcast_ref::<$src_array_type>() + .expect(concat!("Expected a ", stringify!($src_array_type))); + + let output_array = match $eval_mode { + EvalMode::Ansi => cast_array + .iter() + .map(|value| match value { + Some(value) => { + let is_overflow = + value.is_nan() || value.abs() as $rust_dest_type == $max_dest_val; + if is_overflow { + return Err(cast_overflow( + &format!($format_str, value).replace("e", "E"), + $src_type_str, + $dest_type_str, + )); + } + Ok(Some(value as $rust_dest_type)) + } + None => Ok(None), + }) + .collect::>()?, + _ => cast_array + .iter() + .map(|value| match value { + Some(value) => { + Ok::, SparkError>(Some(value as $rust_dest_type)) + } + None => Ok(None), + }) + .collect::>()?, + }; + Ok(Arc::new(output_array) as ArrayRef) + }}; +} + +// When Spark casts to Byte/Short Types, it does not cast directly to Byte/Short. +// It casts to Int first and then to Byte/Short. Because of potential overflows in the Int cast, +// this can cause unexpected Short/Byte cast results. Replicate this behavior. +macro_rules! cast_decimal_to_int16_down { + ( + $array:expr, + $eval_mode:expr, + $dest_array_type:ty, + $rust_dest_type:ty, + $dest_type_str:expr, + $precision:expr, + $scale:expr + ) => {{ + let cast_array = $array + .as_any() + .downcast_ref::() + .expect(concat!("Expected a Decimal128ArrayType")); + + let output_array = match $eval_mode { + EvalMode::Ansi => cast_array + .iter() + .map(|value| match value { + Some(value) => { + let divisor = 10_i128.pow($scale as u32); + let (truncated, decimal) = (value / divisor, (value % divisor).abs()); + let is_overflow = truncated.abs() > i32::MAX.into(); + if is_overflow { + return Err(cast_overflow( + &format!("{}.{}BD", truncated, decimal), + &format!("DECIMAL({},{})", $precision, $scale), + $dest_type_str, + )); + } + let i32_value = truncated as i32; + <$rust_dest_type>::try_from(i32_value) + .map_err(|_| { + cast_overflow( + &format!("{}.{}BD", truncated, decimal), + &format!("DECIMAL({},{})", $precision, $scale), + $dest_type_str, + ) + }) + .map(Some) + } + None => Ok(None), + }) + .collect::>()?, + _ => cast_array + .iter() + .map(|value| match value { + Some(value) => { + let divisor = 10_i128.pow($scale as u32); + let i32_value = (value / divisor) as i32; + Ok::, SparkError>(Some( + i32_value as $rust_dest_type, + )) + } + None => Ok(None), + }) + .collect::>()?, + }; + Ok(Arc::new(output_array) as ArrayRef) + }}; +} + +macro_rules! cast_decimal_to_int32_up { + ( + $array:expr, + $eval_mode:expr, + $dest_array_type:ty, + $rust_dest_type:ty, + $dest_type_str:expr, + $max_dest_val:expr, + $precision:expr, + $scale:expr + ) => {{ + let cast_array = $array + .as_any() + .downcast_ref::() + .expect(concat!("Expected a Decimal128ArrayType")); + + let output_array = match $eval_mode { + EvalMode::Ansi => cast_array + .iter() + .map(|value| match value { + Some(value) => { + let divisor = 10_i128.pow($scale as u32); + let (truncated, decimal) = (value / divisor, (value % divisor).abs()); + let is_overflow = truncated.abs() > $max_dest_val.into(); + if is_overflow { + return Err(cast_overflow( + &format!("{}.{}BD", truncated, decimal), + &format!("DECIMAL({},{})", $precision, $scale), + $dest_type_str, + )); + } + Ok(Some(truncated as $rust_dest_type)) + } + None => Ok(None), + }) + .collect::>()?, + _ => cast_array + .iter() + .map(|value| match value { + Some(value) => { + let divisor = 10_i128.pow($scale as u32); + let truncated = value / divisor; + Ok::, SparkError>(Some( + truncated as $rust_dest_type, + )) + } + None => Ok(None), + }) + .collect::>()?, + }; + Ok(Arc::new(output_array) as ArrayRef) + }}; +} + +/// Spark cast options +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct SparkParquetOptions { + /// Spark evaluation mode + pub eval_mode: EvalMode, + /// When cast from/to timezone related types, we need timezone, which will be resolved with + /// session local timezone by an analyzer in Spark. + // TODO we should change timezone to Tz to avoid repeated parsing + pub timezone: String, + /// Allow casts that are supported but not guaranteed to be 100% compatible + pub allow_incompat: bool, + /// Support casting unsigned ints to signed ints (used by Parquet SchemaAdapter) + pub allow_cast_unsigned_ints: bool, + /// We also use the cast logic for adapting Parquet schemas, so this flag is used + /// for that use case + pub is_adapting_schema: bool, + /// Whether to always represent decimals using 128 bits. If false, the native reader may represent decimals using 32 or 64 bits, depending on the precision. + pub use_decimal_128: bool, + /// Whether to read dates/timestamps that were written in the legacy hybrid Julian + Gregorian calendar as it is. If false, throw exceptions instead. If the spark type is TimestampNTZ, this should be true. + pub use_legacy_date_timestamp_or_ntz: bool, +} + +impl SparkParquetOptions { + pub fn new(eval_mode: EvalMode, timezone: &str, allow_incompat: bool) -> Self { + Self { + eval_mode, + timezone: timezone.to_string(), + allow_incompat, + allow_cast_unsigned_ints: false, + is_adapting_schema: false, + use_decimal_128: false, + use_legacy_date_timestamp_or_ntz: false, + } + } + + pub fn new_without_timezone(eval_mode: EvalMode, allow_incompat: bool) -> Self { + Self { + eval_mode, + timezone: "".to_string(), + allow_incompat, + allow_cast_unsigned_ints: false, + is_adapting_schema: false, + use_decimal_128: false, + use_legacy_date_timestamp_or_ntz: false, + } + } +} + +/// Spark-compatible cast implementation. Defers to DataFusion's cast where that is known +/// to be compatible, and returns an error when a not supported and not DF-compatible cast +/// is requested. +pub fn spark_parquet_convert( + arg: ColumnarValue, + data_type: &DataType, + parquet_options: &SparkParquetOptions, +) -> DataFusionResult { + match arg { + ColumnarValue::Array(array) => Ok(ColumnarValue::Array(cast_array( + array, + data_type, + parquet_options, + )?)), + ColumnarValue::Scalar(scalar) => { + // Note that normally CAST(scalar) should be fold in Spark JVM side. However, for + // some cases e.g., scalar subquery, Spark will not fold it, so we need to handle it + // here. + let array = scalar.to_array()?; + let scalar = + ScalarValue::try_from_array(&cast_array(array, data_type, parquet_options)?, 0)?; + Ok(ColumnarValue::Scalar(scalar)) + } + } +} + +fn cast_array( + array: ArrayRef, + to_type: &DataType, + parquet_options: &SparkParquetOptions, +) -> DataFusionResult { + use DataType::*; + let array = array_with_timezone(array, parquet_options.timezone.clone(), Some(to_type))?; + let from_type = array.data_type().clone(); + + let array = match &from_type { + Dictionary(key_type, value_type) + if key_type.as_ref() == &Int32 + && (value_type.as_ref() == &Utf8 || value_type.as_ref() == &LargeUtf8) => + { + let dict_array = array + .as_any() + .downcast_ref::>() + .expect("Expected a dictionary array"); + + let casted_dictionary = DictionaryArray::::new( + dict_array.keys().clone(), + cast_array(Arc::clone(dict_array.values()), to_type, parquet_options)?, + ); + + let casted_result = match to_type { + Dictionary(_, _) => Arc::new(casted_dictionary.clone()), + _ => take(casted_dictionary.values().as_ref(), dict_array.keys(), None)?, + }; + return Ok(spark_cast_postprocess(casted_result, &from_type, to_type)); + } + _ => array, + }; + let from_type = array.data_type(); + let eval_mode = parquet_options.eval_mode; + + let cast_result = match (from_type, to_type) { + (Utf8, Boolean) => spark_cast_utf8_to_boolean::(&array, eval_mode), + (LargeUtf8, Boolean) => spark_cast_utf8_to_boolean::(&array, eval_mode), + (Utf8, Timestamp(_, _)) => { + cast_string_to_timestamp(&array, to_type, eval_mode, &parquet_options.timezone) + } + (Utf8, Date32) => cast_string_to_date(&array, to_type, eval_mode), + (Int64, Int32) + | (Int64, Int16) + | (Int64, Int8) + | (Int32, Int16) + | (Int32, Int8) + | (Int16, Int8) + if eval_mode != EvalMode::Try => + { + spark_cast_int_to_int(&array, eval_mode, from_type, to_type) + } + (Utf8, Int8 | Int16 | Int32 | Int64) => { + cast_string_to_int::(to_type, &array, eval_mode) + } + (LargeUtf8, Int8 | Int16 | Int32 | Int64) => { + cast_string_to_int::(to_type, &array, eval_mode) + } + (Float64, Utf8) => spark_cast_float64_to_utf8::(&array, eval_mode), + (Float64, LargeUtf8) => spark_cast_float64_to_utf8::(&array, eval_mode), + (Float32, Utf8) => spark_cast_float32_to_utf8::(&array, eval_mode), + (Float32, LargeUtf8) => spark_cast_float32_to_utf8::(&array, eval_mode), + (Float32, Decimal128(precision, scale)) => { + cast_float32_to_decimal128(&array, *precision, *scale, eval_mode) + } + (Float64, Decimal128(precision, scale)) => { + cast_float64_to_decimal128(&array, *precision, *scale, eval_mode) + } + (Float32, Int8) + | (Float32, Int16) + | (Float32, Int32) + | (Float32, Int64) + | (Float64, Int8) + | (Float64, Int16) + | (Float64, Int32) + | (Float64, Int64) + | (Decimal128(_, _), Int8) + | (Decimal128(_, _), Int16) + | (Decimal128(_, _), Int32) + | (Decimal128(_, _), Int64) + if eval_mode != EvalMode::Try => + { + spark_cast_nonintegral_numeric_to_integral(&array, eval_mode, from_type, to_type) + } + (Struct(_), Utf8) => Ok(casts_struct_to_string(array.as_struct(), parquet_options)?), + (Struct(_), Struct(_)) => Ok(cast_struct_to_struct( + array.as_struct(), + from_type, + to_type, + parquet_options, + )?), + (UInt8 | UInt16 | UInt32 | UInt64, Int8 | Int16 | Int32 | Int64) + if parquet_options.allow_cast_unsigned_ints => + { + Ok(cast_with_options(&array, to_type, &PARQUET_OPTIONS)?) + } + _ if parquet_options.is_adapting_schema + || is_datafusion_spark_compatible( + from_type, + to_type, + parquet_options.allow_incompat, + ) => + { + // use DataFusion cast only when we know that it is compatible with Spark + Ok(cast_with_options(&array, to_type, &PARQUET_OPTIONS)?) + } + _ => { + // we should never reach this code because the Scala code should be checking + // for supported cast operations and falling back to Spark for anything that + // is not yet supported + Err(SparkError::Internal(format!( + "Native cast invoked for unsupported cast from {from_type:?} to {to_type:?}" + ))) + } + }; + Ok(spark_cast_postprocess(cast_result?, from_type, to_type)) +} + +/// Determines if DataFusion supports the given cast in a way that is +/// compatible with Spark +fn is_datafusion_spark_compatible( + from_type: &DataType, + to_type: &DataType, + allow_incompat: bool, +) -> bool { + if from_type == to_type { + return true; + } + match from_type { + DataType::Boolean => matches!( + to_type, + DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::Float32 + | DataType::Float64 + | DataType::Utf8 + ), + DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => { + // note that the cast from Int32/Int64 -> Decimal128 here is actually + // not compatible with Spark (no overflow checks) but we have tests that + // rely on this cast working so we have to leave it here for now + matches!( + to_type, + DataType::Boolean + | DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::Float32 + | DataType::Float64 + | DataType::Decimal128(_, _) + | DataType::Utf8 + ) + } + DataType::Float32 | DataType::Float64 => matches!( + to_type, + DataType::Boolean + | DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::Float32 + | DataType::Float64 + ), + DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => matches!( + to_type, + DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::Float32 + | DataType::Float64 + | DataType::Decimal128(_, _) + | DataType::Decimal256(_, _) + | DataType::Utf8 // note that there can be formatting differences + ), + DataType::Utf8 if allow_incompat => matches!( + to_type, + DataType::Binary | DataType::Float32 | DataType::Float64 | DataType::Decimal128(_, _) + ), + DataType::Utf8 => matches!(to_type, DataType::Binary), + DataType::Date32 => matches!(to_type, DataType::Utf8), + DataType::Timestamp(_, _) => { + matches!( + to_type, + DataType::Int64 | DataType::Date32 | DataType::Utf8 | DataType::Timestamp(_, _) + ) + } + DataType::Binary => { + // note that this is not completely Spark compatible because + // DataFusion only supports binary data containing valid UTF-8 strings + matches!(to_type, DataType::Utf8) + } + _ => false, + } +} + +/// Cast between struct types based on logic in +/// `org.apache.spark.sql.catalyst.expressions.Cast#castStruct`. +fn cast_struct_to_struct( + array: &StructArray, + from_type: &DataType, + to_type: &DataType, + parquet_options: &SparkParquetOptions, +) -> DataFusionResult { + match (from_type, to_type) { + (DataType::Struct(from_fields), DataType::Struct(to_fields)) => { + // TODO some of this logic may be specific to converting Parquet to Spark + let mut field_name_to_index_map = HashMap::new(); + for (i, field) in from_fields.iter().enumerate() { + field_name_to_index_map.insert(field.name(), i); + } + assert_eq!(field_name_to_index_map.len(), from_fields.len()); + let mut cast_fields: Vec = Vec::with_capacity(to_fields.len()); + for i in 0..to_fields.len() { + let from_index = field_name_to_index_map[to_fields[i].name()]; + let cast_field = cast_array( + Arc::clone(array.column(from_index)), + to_fields[i].data_type(), + parquet_options, + )?; + cast_fields.push(cast_field); + } + Ok(Arc::new(StructArray::new( + to_fields.clone(), + cast_fields, + array.nulls().cloned(), + ))) + } + _ => unreachable!(), + } +} + +fn casts_struct_to_string( + array: &StructArray, + parquet_options: &SparkParquetOptions, +) -> DataFusionResult { + // cast each field to a string + let string_arrays: Vec = array + .columns() + .iter() + .map(|arr| { + spark_parquet_convert( + ColumnarValue::Array(Arc::clone(arr)), + &DataType::Utf8, + parquet_options, + ) + .and_then(|cv| cv.into_array(arr.len())) + }) + .collect::>>()?; + let string_arrays: Vec<&StringArray> = + string_arrays.iter().map(|arr| arr.as_string()).collect(); + // build the struct string containing entries in the format `"field_name":field_value` + let mut builder = StringBuilder::with_capacity(array.len(), array.len() * 16); + let mut str = String::with_capacity(array.len() * 16); + for row_index in 0..array.len() { + if array.is_null(row_index) { + builder.append_null(); + } else { + str.clear(); + let mut any_fields_written = false; + str.push('{'); + for field in &string_arrays { + if any_fields_written { + str.push_str(", "); + } + if field.is_null(row_index) { + str.push_str("null"); + } else { + str.push_str(field.value(row_index)); + } + any_fields_written = true; + } + str.push('}'); + builder.append_value(&str); + } + } + Ok(Arc::new(builder.finish())) +} + +fn cast_string_to_int( + to_type: &DataType, + array: &ArrayRef, + eval_mode: EvalMode, +) -> SparkResult { + let string_array = array + .as_any() + .downcast_ref::>() + .expect("cast_string_to_int expected a string array"); + + let cast_array: ArrayRef = match to_type { + DataType::Int8 => cast_utf8_to_int!(string_array, eval_mode, Int8Type, cast_string_to_i8)?, + DataType::Int16 => { + cast_utf8_to_int!(string_array, eval_mode, Int16Type, cast_string_to_i16)? + } + DataType::Int32 => { + cast_utf8_to_int!(string_array, eval_mode, Int32Type, cast_string_to_i32)? + } + DataType::Int64 => { + cast_utf8_to_int!(string_array, eval_mode, Int64Type, cast_string_to_i64)? + } + dt => unreachable!( + "{}", + format!("invalid integer type {dt} in cast from string") + ), + }; + Ok(cast_array) +} + +fn cast_string_to_date( + array: &ArrayRef, + to_type: &DataType, + eval_mode: EvalMode, +) -> SparkResult { + let string_array = array + .as_any() + .downcast_ref::>() + .expect("Expected a string array"); + + if to_type != &DataType::Date32 { + unreachable!("Invalid data type {:?} in cast from string", to_type); + } + + let len = string_array.len(); + let mut cast_array = PrimitiveArray::::builder(len); + + for i in 0..len { + let value = if string_array.is_null(i) { + None + } else { + match date_parser(string_array.value(i), eval_mode) { + Ok(Some(cast_value)) => Some(cast_value), + Ok(None) => None, + Err(e) => return Err(e), + } + }; + + match value { + Some(cast_value) => cast_array.append_value(cast_value), + None => cast_array.append_null(), + } + } + + Ok(Arc::new(cast_array.finish()) as ArrayRef) +} + +fn cast_string_to_timestamp( + array: &ArrayRef, + to_type: &DataType, + eval_mode: EvalMode, + timezone_str: &str, +) -> SparkResult { + let string_array = array + .as_any() + .downcast_ref::>() + .expect("Expected a string array"); + + let tz = &timezone::Tz::from_str(timezone_str).unwrap(); + + let cast_array: ArrayRef = match to_type { + DataType::Timestamp(_, _) => { + cast_utf8_to_timestamp!( + string_array, + eval_mode, + TimestampMicrosecondType, + timestamp_parser, + tz + ) + } + _ => unreachable!("Invalid data type {:?} in cast from string", to_type), + }; + Ok(cast_array) +} + +fn cast_float64_to_decimal128( + array: &dyn Array, + precision: u8, + scale: i8, + eval_mode: EvalMode, +) -> SparkResult { + cast_floating_point_to_decimal128::(array, precision, scale, eval_mode) +} + +fn cast_float32_to_decimal128( + array: &dyn Array, + precision: u8, + scale: i8, + eval_mode: EvalMode, +) -> SparkResult { + cast_floating_point_to_decimal128::(array, precision, scale, eval_mode) +} + +fn cast_floating_point_to_decimal128( + array: &dyn Array, + precision: u8, + scale: i8, + eval_mode: EvalMode, +) -> SparkResult +where + ::Native: AsPrimitive, +{ + let input = array.as_any().downcast_ref::>().unwrap(); + let mut cast_array = PrimitiveArray::::builder(input.len()); + + let mul = 10_f64.powi(scale as i32); + + for i in 0..input.len() { + if input.is_null(i) { + cast_array.append_null(); + } else { + let input_value = input.value(i).as_(); + let value = (input_value * mul).round().to_i128(); + + match value { + Some(v) => { + if Decimal128Type::validate_decimal_precision(v, precision).is_err() { + if eval_mode == EvalMode::Ansi { + return Err(SparkError::NumericValueOutOfRange { + value: input_value.to_string(), + precision, + scale, + }); + } else { + cast_array.append_null(); + } + } + cast_array.append_value(v); + } + None => { + if eval_mode == EvalMode::Ansi { + return Err(SparkError::NumericValueOutOfRange { + value: input_value.to_string(), + precision, + scale, + }); + } else { + cast_array.append_null(); + } + } + } + } + } + + let res = Arc::new( + cast_array + .with_precision_and_scale(precision, scale)? + .finish(), + ) as ArrayRef; + Ok(res) +} + +fn spark_cast_float64_to_utf8( + from: &dyn Array, + _eval_mode: EvalMode, +) -> SparkResult +where + OffsetSize: OffsetSizeTrait, +{ + cast_float_to_string!(from, _eval_mode, f64, Float64Array, OffsetSize) +} + +fn spark_cast_float32_to_utf8( + from: &dyn Array, + _eval_mode: EvalMode, +) -> SparkResult +where + OffsetSize: OffsetSizeTrait, +{ + cast_float_to_string!(from, _eval_mode, f32, Float32Array, OffsetSize) +} + +fn spark_cast_int_to_int( + array: &dyn Array, + eval_mode: EvalMode, + from_type: &DataType, + to_type: &DataType, +) -> SparkResult { + match (from_type, to_type) { + (DataType::Int64, DataType::Int32) => cast_int_to_int_macro!( + array, eval_mode, Int64Type, Int32Type, from_type, i32, "BIGINT", "INT" + ), + (DataType::Int64, DataType::Int16) => cast_int_to_int_macro!( + array, eval_mode, Int64Type, Int16Type, from_type, i16, "BIGINT", "SMALLINT" + ), + (DataType::Int64, DataType::Int8) => cast_int_to_int_macro!( + array, eval_mode, Int64Type, Int8Type, from_type, i8, "BIGINT", "TINYINT" + ), + (DataType::Int32, DataType::Int16) => cast_int_to_int_macro!( + array, eval_mode, Int32Type, Int16Type, from_type, i16, "INT", "SMALLINT" + ), + (DataType::Int32, DataType::Int8) => cast_int_to_int_macro!( + array, eval_mode, Int32Type, Int8Type, from_type, i8, "INT", "TINYINT" + ), + (DataType::Int16, DataType::Int8) => cast_int_to_int_macro!( + array, eval_mode, Int16Type, Int8Type, from_type, i8, "SMALLINT", "TINYINT" + ), + _ => unreachable!( + "{}", + format!("invalid integer type {to_type} in cast from {from_type}") + ), + } +} + +fn spark_cast_utf8_to_boolean( + from: &dyn Array, + eval_mode: EvalMode, +) -> SparkResult +where + OffsetSize: OffsetSizeTrait, +{ + let array = from + .as_any() + .downcast_ref::>() + .unwrap(); + + let output_array = array + .iter() + .map(|value| match value { + Some(value) => match value.to_ascii_lowercase().trim() { + "t" | "true" | "y" | "yes" | "1" => Ok(Some(true)), + "f" | "false" | "n" | "no" | "0" => Ok(Some(false)), + _ if eval_mode == EvalMode::Ansi => Err(SparkError::CastInvalidValue { + value: value.to_string(), + from_type: "STRING".to_string(), + to_type: "BOOLEAN".to_string(), + }), + _ => Ok(None), + }, + _ => Ok(None), + }) + .collect::>()?; + + Ok(Arc::new(output_array)) +} + +fn spark_cast_nonintegral_numeric_to_integral( + array: &dyn Array, + eval_mode: EvalMode, + from_type: &DataType, + to_type: &DataType, +) -> SparkResult { + match (from_type, to_type) { + (DataType::Float32, DataType::Int8) => cast_float_to_int16_down!( + array, + eval_mode, + Float32Array, + Int8Array, + f32, + i8, + "FLOAT", + "TINYINT", + "{:e}" + ), + (DataType::Float32, DataType::Int16) => cast_float_to_int16_down!( + array, + eval_mode, + Float32Array, + Int16Array, + f32, + i16, + "FLOAT", + "SMALLINT", + "{:e}" + ), + (DataType::Float32, DataType::Int32) => cast_float_to_int32_up!( + array, + eval_mode, + Float32Array, + Int32Array, + f32, + i32, + "FLOAT", + "INT", + i32::MAX, + "{:e}" + ), + (DataType::Float32, DataType::Int64) => cast_float_to_int32_up!( + array, + eval_mode, + Float32Array, + Int64Array, + f32, + i64, + "FLOAT", + "BIGINT", + i64::MAX, + "{:e}" + ), + (DataType::Float64, DataType::Int8) => cast_float_to_int16_down!( + array, + eval_mode, + Float64Array, + Int8Array, + f64, + i8, + "DOUBLE", + "TINYINT", + "{:e}D" + ), + (DataType::Float64, DataType::Int16) => cast_float_to_int16_down!( + array, + eval_mode, + Float64Array, + Int16Array, + f64, + i16, + "DOUBLE", + "SMALLINT", + "{:e}D" + ), + (DataType::Float64, DataType::Int32) => cast_float_to_int32_up!( + array, + eval_mode, + Float64Array, + Int32Array, + f64, + i32, + "DOUBLE", + "INT", + i32::MAX, + "{:e}D" + ), + (DataType::Float64, DataType::Int64) => cast_float_to_int32_up!( + array, + eval_mode, + Float64Array, + Int64Array, + f64, + i64, + "DOUBLE", + "BIGINT", + i64::MAX, + "{:e}D" + ), + (DataType::Decimal128(precision, scale), DataType::Int8) => { + cast_decimal_to_int16_down!( + array, eval_mode, Int8Array, i8, "TINYINT", precision, *scale + ) + } + (DataType::Decimal128(precision, scale), DataType::Int16) => { + cast_decimal_to_int16_down!( + array, eval_mode, Int16Array, i16, "SMALLINT", precision, *scale + ) + } + (DataType::Decimal128(precision, scale), DataType::Int32) => { + cast_decimal_to_int32_up!( + array, + eval_mode, + Int32Array, + i32, + "INT", + i32::MAX, + *precision, + *scale + ) + } + (DataType::Decimal128(precision, scale), DataType::Int64) => { + cast_decimal_to_int32_up!( + array, + eval_mode, + Int64Array, + i64, + "BIGINT", + i64::MAX, + *precision, + *scale + ) + } + _ => unreachable!( + "{}", + format!("invalid cast from non-integral numeric type: {from_type} to integral numeric type: {to_type}") + ), + } +} + +/// Equivalent to org.apache.spark.unsafe.types.UTF8String.toByte +fn cast_string_to_i8(str: &str, eval_mode: EvalMode) -> SparkResult> { + Ok(cast_string_to_int_with_range_check( + str, + eval_mode, + "TINYINT", + i8::MIN as i32, + i8::MAX as i32, + )? + .map(|v| v as i8)) +} + +/// Equivalent to org.apache.spark.unsafe.types.UTF8String.toShort +fn cast_string_to_i16(str: &str, eval_mode: EvalMode) -> SparkResult> { + Ok(cast_string_to_int_with_range_check( + str, + eval_mode, + "SMALLINT", + i16::MIN as i32, + i16::MAX as i32, + )? + .map(|v| v as i16)) +} + +/// Equivalent to org.apache.spark.unsafe.types.UTF8String.toInt(IntWrapper intWrapper) +fn cast_string_to_i32(str: &str, eval_mode: EvalMode) -> SparkResult> { + do_cast_string_to_int::(str, eval_mode, "INT", i32::MIN) +} + +/// Equivalent to org.apache.spark.unsafe.types.UTF8String.toLong(LongWrapper intWrapper) +fn cast_string_to_i64(str: &str, eval_mode: EvalMode) -> SparkResult> { + do_cast_string_to_int::(str, eval_mode, "BIGINT", i64::MIN) +} + +fn cast_string_to_int_with_range_check( + str: &str, + eval_mode: EvalMode, + type_name: &str, + min: i32, + max: i32, +) -> SparkResult> { + match do_cast_string_to_int(str, eval_mode, type_name, i32::MIN)? { + None => Ok(None), + Some(v) if v >= min && v <= max => Ok(Some(v)), + _ if eval_mode == EvalMode::Ansi => Err(invalid_value(str, "STRING", type_name)), + _ => Ok(None), + } +} + +/// Equivalent to +/// - org.apache.spark.unsafe.types.UTF8String.toInt(IntWrapper intWrapper, boolean allowDecimal) +/// - org.apache.spark.unsafe.types.UTF8String.toLong(LongWrapper longWrapper, boolean allowDecimal) +fn do_cast_string_to_int< + T: Num + PartialOrd + Integer + CheckedSub + CheckedNeg + From + Copy, +>( + str: &str, + eval_mode: EvalMode, + type_name: &str, + min_value: T, +) -> SparkResult> { + let trimmed_str = str.trim(); + if trimmed_str.is_empty() { + return none_or_err(eval_mode, type_name, str); + } + let len = trimmed_str.len(); + let mut result: T = T::zero(); + let mut negative = false; + let radix = T::from(10); + let stop_value = min_value / radix; + let mut parse_sign_and_digits = true; + + for (i, ch) in trimmed_str.char_indices() { + if parse_sign_and_digits { + if i == 0 { + negative = ch == '-'; + let positive = ch == '+'; + if negative || positive { + if i + 1 == len { + // input string is just "+" or "-" + return none_or_err(eval_mode, type_name, str); + } + // consume this char + continue; + } + } + + if ch == '.' { + if eval_mode == EvalMode::Legacy { + // truncate decimal in legacy mode + parse_sign_and_digits = false; + continue; + } else { + return none_or_err(eval_mode, type_name, str); + } + } + + let digit = if ch.is_ascii_digit() { + (ch as u32) - ('0' as u32) + } else { + return none_or_err(eval_mode, type_name, str); + }; + + // We are going to process the new digit and accumulate the result. However, before + // doing this, if the result is already smaller than the + // stopValue(Integer.MIN_VALUE / radix), then result * 10 will definitely be + // smaller than minValue, and we can stop + if result < stop_value { + return none_or_err(eval_mode, type_name, str); + } + + // Since the previous result is greater than or equal to stopValue(Integer.MIN_VALUE / + // radix), we can just use `result > 0` to check overflow. If result + // overflows, we should stop + let v = result * radix; + let digit = (digit as i32).into(); + match v.checked_sub(&digit) { + Some(x) if x <= T::zero() => result = x, + _ => { + return none_or_err(eval_mode, type_name, str); + } + } + } else { + // make sure fractional digits are valid digits but ignore them + if !ch.is_ascii_digit() { + return none_or_err(eval_mode, type_name, str); + } + } + } + + if !negative { + if let Some(neg) = result.checked_neg() { + if neg < T::zero() { + return none_or_err(eval_mode, type_name, str); + } + result = neg; + } else { + return none_or_err(eval_mode, type_name, str); + } + } + + Ok(Some(result)) +} + +/// Either return Ok(None) or Err(SparkError::CastInvalidValue) depending on the evaluation mode +#[inline] +fn none_or_err(eval_mode: EvalMode, type_name: &str, str: &str) -> SparkResult> { + match eval_mode { + EvalMode::Ansi => Err(invalid_value(str, "STRING", type_name)), + _ => Ok(None), + } +} + +#[inline] +fn invalid_value(value: &str, from_type: &str, to_type: &str) -> SparkError { + SparkError::CastInvalidValue { + value: value.to_string(), + from_type: from_type.to_string(), + to_type: to_type.to_string(), + } +} + +#[inline] +fn cast_overflow(value: &str, from_type: &str, to_type: &str) -> SparkError { + SparkError::CastOverFlow { + value: value.to_string(), + from_type: from_type.to_string(), + to_type: to_type.to_string(), + } +} + +fn timestamp_parser( + value: &str, + eval_mode: EvalMode, + tz: &T, +) -> SparkResult> { + let value = value.trim(); + if value.is_empty() { + return Ok(None); + } + // Define regex patterns and corresponding parsing functions + let patterns = &[ + ( + Regex::new(r"^\d{4,5}$").unwrap(), + parse_str_to_year_timestamp as fn(&str, &T) -> SparkResult>, + ), + ( + Regex::new(r"^\d{4,5}-\d{2}$").unwrap(), + parse_str_to_month_timestamp, + ), + ( + Regex::new(r"^\d{4,5}-\d{2}-\d{2}$").unwrap(), + parse_str_to_day_timestamp, + ), + ( + Regex::new(r"^\d{4,5}-\d{2}-\d{2}T\d{1,2}$").unwrap(), + parse_str_to_hour_timestamp, + ), + ( + Regex::new(r"^\d{4,5}-\d{2}-\d{2}T\d{2}:\d{2}$").unwrap(), + parse_str_to_minute_timestamp, + ), + ( + Regex::new(r"^\d{4,5}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$").unwrap(), + parse_str_to_second_timestamp, + ), + ( + Regex::new(r"^\d{4,5}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6}$").unwrap(), + parse_str_to_microsecond_timestamp, + ), + ( + Regex::new(r"^T\d{1,2}$").unwrap(), + parse_str_to_time_only_timestamp, + ), + ]; + + let mut timestamp = None; + + // Iterate through patterns and try matching + for (pattern, parse_func) in patterns { + if pattern.is_match(value) { + timestamp = parse_func(value, tz)?; + break; + } + } + + if timestamp.is_none() { + return if eval_mode == EvalMode::Ansi { + Err(SparkError::CastInvalidValue { + value: value.to_string(), + from_type: "STRING".to_string(), + to_type: "TIMESTAMP".to_string(), + }) + } else { + Ok(None) + }; + } + + match timestamp { + Some(ts) => Ok(Some(ts)), + None => Err(SparkError::Internal( + "Failed to parse timestamp".to_string(), + )), + } +} + +fn parse_timestamp_to_micros( + timestamp_info: &TimeStampInfo, + tz: &T, +) -> SparkResult> { + let datetime = tz.with_ymd_and_hms( + timestamp_info.year, + timestamp_info.month, + timestamp_info.day, + timestamp_info.hour, + timestamp_info.minute, + timestamp_info.second, + ); + + // Check if datetime is not None + let tz_datetime = match datetime.single() { + Some(dt) => dt + .with_timezone(tz) + .with_nanosecond(timestamp_info.microsecond * 1000), + None => { + return Err(SparkError::Internal( + "Failed to parse timestamp".to_string(), + )); + } + }; + + let result = match tz_datetime { + Some(dt) => dt.timestamp_micros(), + None => { + return Err(SparkError::Internal( + "Failed to parse timestamp".to_string(), + )); + } + }; + + Ok(Some(result)) +} + +fn get_timestamp_values( + value: &str, + timestamp_type: &str, + tz: &T, +) -> SparkResult> { + let values: Vec<_> = value.split(['T', '-', ':', '.']).collect(); + let year = values[0].parse::().unwrap_or_default(); + let month = values.get(1).map_or(1, |m| m.parse::().unwrap_or(1)); + let day = values.get(2).map_or(1, |d| d.parse::().unwrap_or(1)); + let hour = values.get(3).map_or(0, |h| h.parse::().unwrap_or(0)); + let minute = values.get(4).map_or(0, |m| m.parse::().unwrap_or(0)); + let second = values.get(5).map_or(0, |s| s.parse::().unwrap_or(0)); + let microsecond = values.get(6).map_or(0, |ms| ms.parse::().unwrap_or(0)); + + let mut timestamp_info = TimeStampInfo::default(); + + let timestamp_info = match timestamp_type { + "year" => timestamp_info.with_year(year), + "month" => timestamp_info.with_year(year).with_month(month), + "day" => timestamp_info + .with_year(year) + .with_month(month) + .with_day(day), + "hour" => timestamp_info + .with_year(year) + .with_month(month) + .with_day(day) + .with_hour(hour), + "minute" => timestamp_info + .with_year(year) + .with_month(month) + .with_day(day) + .with_hour(hour) + .with_minute(minute), + "second" => timestamp_info + .with_year(year) + .with_month(month) + .with_day(day) + .with_hour(hour) + .with_minute(minute) + .with_second(second), + "microsecond" => timestamp_info + .with_year(year) + .with_month(month) + .with_day(day) + .with_hour(hour) + .with_minute(minute) + .with_second(second) + .with_microsecond(microsecond), + _ => { + return Err(SparkError::CastInvalidValue { + value: value.to_string(), + from_type: "STRING".to_string(), + to_type: "TIMESTAMP".to_string(), + }) + } + }; + + parse_timestamp_to_micros(timestamp_info, tz) +} + +fn parse_str_to_year_timestamp(value: &str, tz: &T) -> SparkResult> { + get_timestamp_values(value, "year", tz) +} + +fn parse_str_to_month_timestamp(value: &str, tz: &T) -> SparkResult> { + get_timestamp_values(value, "month", tz) +} + +fn parse_str_to_day_timestamp(value: &str, tz: &T) -> SparkResult> { + get_timestamp_values(value, "day", tz) +} + +fn parse_str_to_hour_timestamp(value: &str, tz: &T) -> SparkResult> { + get_timestamp_values(value, "hour", tz) +} + +fn parse_str_to_minute_timestamp(value: &str, tz: &T) -> SparkResult> { + get_timestamp_values(value, "minute", tz) +} + +fn parse_str_to_second_timestamp(value: &str, tz: &T) -> SparkResult> { + get_timestamp_values(value, "second", tz) +} + +fn parse_str_to_microsecond_timestamp( + value: &str, + tz: &T, +) -> SparkResult> { + get_timestamp_values(value, "microsecond", tz) +} + +fn parse_str_to_time_only_timestamp(value: &str, tz: &T) -> SparkResult> { + let values: Vec<&str> = value.split('T').collect(); + let time_values: Vec = values[1] + .split(':') + .map(|v| v.parse::().unwrap_or(0)) + .collect(); + + let datetime = tz.from_utc_datetime(&chrono::Utc::now().naive_utc()); + let timestamp = datetime + .with_timezone(tz) + .with_hour(time_values.first().copied().unwrap_or_default()) + .and_then(|dt| dt.with_minute(*time_values.get(1).unwrap_or(&0))) + .and_then(|dt| dt.with_second(*time_values.get(2).unwrap_or(&0))) + .and_then(|dt| dt.with_nanosecond(*time_values.get(3).unwrap_or(&0) * 1_000)) + .map(|dt| dt.timestamp_micros()) + .unwrap_or_default(); + + Ok(Some(timestamp)) +} + +//a string to date parser - port of spark's SparkDateTimeUtils#stringToDate. +fn date_parser(date_str: &str, eval_mode: EvalMode) -> SparkResult> { + // local functions + fn get_trimmed_start(bytes: &[u8]) -> usize { + let mut start = 0; + while start < bytes.len() && is_whitespace_or_iso_control(bytes[start]) { + start += 1; + } + start + } + + fn get_trimmed_end(start: usize, bytes: &[u8]) -> usize { + let mut end = bytes.len() - 1; + while end > start && is_whitespace_or_iso_control(bytes[end]) { + end -= 1; + } + end + 1 + } + + fn is_whitespace_or_iso_control(byte: u8) -> bool { + byte.is_ascii_whitespace() || byte.is_ascii_control() + } + + fn is_valid_digits(segment: i32, digits: usize) -> bool { + // An integer is able to represent a date within [+-]5 million years. + let max_digits_year = 7; + //year (segment 0) can be between 4 to 7 digits, + //month and day (segment 1 and 2) can be between 1 to 2 digits + (segment == 0 && digits >= 4 && digits <= max_digits_year) + || (segment != 0 && digits > 0 && digits <= 2) + } + + fn return_result(date_str: &str, eval_mode: EvalMode) -> SparkResult> { + if eval_mode == EvalMode::Ansi { + Err(SparkError::CastInvalidValue { + value: date_str.to_string(), + from_type: "STRING".to_string(), + to_type: "DATE".to_string(), + }) + } else { + Ok(None) + } + } + // end local functions + + if date_str.is_empty() { + return return_result(date_str, eval_mode); + } + + //values of date segments year, month and day defaulting to 1 + let mut date_segments = [1, 1, 1]; + let mut sign = 1; + let mut current_segment = 0; + let mut current_segment_value = Wrapping(0); + let mut current_segment_digits = 0; + let bytes = date_str.as_bytes(); + + let mut j = get_trimmed_start(bytes); + let str_end_trimmed = get_trimmed_end(j, bytes); + + if j == str_end_trimmed { + return return_result(date_str, eval_mode); + } + + //assign a sign to the date + if bytes[j] == b'-' || bytes[j] == b'+' { + sign = if bytes[j] == b'-' { -1 } else { 1 }; + j += 1; + } + + //loop to the end of string until we have processed 3 segments, + //exit loop on encountering any space ' ' or 'T' after the 3rd segment + while j < str_end_trimmed && (current_segment < 3 && !(bytes[j] == b' ' || bytes[j] == b'T')) { + let b = bytes[j]; + if current_segment < 2 && b == b'-' { + //check for validity of year and month segments if current byte is separator + if !is_valid_digits(current_segment, current_segment_digits) { + return return_result(date_str, eval_mode); + } + //if valid update corresponding segment with the current segment value. + date_segments[current_segment as usize] = current_segment_value.0; + current_segment_value = Wrapping(0); + current_segment_digits = 0; + current_segment += 1; + } else if !b.is_ascii_digit() { + return return_result(date_str, eval_mode); + } else { + //increment value of current segment by the next digit + let parsed_value = Wrapping((b - b'0') as i32); + current_segment_value = current_segment_value * Wrapping(10) + parsed_value; + current_segment_digits += 1; + } + j += 1; + } + + //check for validity of last segment + if !is_valid_digits(current_segment, current_segment_digits) { + return return_result(date_str, eval_mode); + } + + if current_segment < 2 && j < str_end_trimmed { + // For the `yyyy` and `yyyy-[m]m` formats, entire input must be consumed. + return return_result(date_str, eval_mode); + } + + date_segments[current_segment as usize] = current_segment_value.0; + + match NaiveDate::from_ymd_opt( + sign * date_segments[0], + date_segments[1] as u32, + date_segments[2] as u32, + ) { + Some(date) => { + let duration_since_epoch = date + .signed_duration_since(NaiveDateTime::UNIX_EPOCH.date()) + .num_days(); + Ok(Some(duration_since_epoch.to_i32().unwrap())) + } + None => Ok(None), + } +} + +/// This takes for special casting cases of Spark. E.g., Timestamp to Long. +/// This function runs as a post process of the DataFusion cast(). By the time it arrives here, +/// Dictionary arrays are already unpacked by the DataFusion cast() since Spark cannot specify +/// Dictionary as to_type. The from_type is taken before the DataFusion cast() runs in +/// expressions/cast.rs, so it can be still Dictionary. +fn spark_cast_postprocess(array: ArrayRef, from_type: &DataType, to_type: &DataType) -> ArrayRef { + match (from_type, to_type) { + (DataType::Timestamp(_, _), DataType::Int64) => { + // See Spark's `Cast` expression + unary_dyn::<_, Int64Type>(&array, |v| div_floor(v, MICROS_PER_SECOND)).unwrap() + } + (DataType::Dictionary(_, value_type), DataType::Int64) + if matches!(value_type.as_ref(), &DataType::Timestamp(_, _)) => + { + // See Spark's `Cast` expression + unary_dyn::<_, Int64Type>(&array, |v| div_floor(v, MICROS_PER_SECOND)).unwrap() + } + (DataType::Timestamp(_, _), DataType::Utf8) => remove_trailing_zeroes(array), + (DataType::Dictionary(_, value_type), DataType::Utf8) + if matches!(value_type.as_ref(), &DataType::Timestamp(_, _)) => + { + remove_trailing_zeroes(array) + } + _ => array, + } +} + +/// A fork & modified version of Arrow's `unary_dyn` which is being deprecated +fn unary_dyn(array: &ArrayRef, op: F) -> Result +where + T: ArrowPrimitiveType, + F: Fn(T::Native) -> T::Native, +{ + if let Some(d) = array.as_any_dictionary_opt() { + let new_values = unary_dyn::(d.values(), op)?; + return Ok(Arc::new(d.with_values(Arc::new(new_values)))); + } + + match array.as_primitive_opt::() { + Some(a) if PrimitiveArray::::is_compatible(a.data_type()) => { + Ok(Arc::new(unary::( + array.as_any().downcast_ref::>().unwrap(), + op, + ))) + } + _ => Err(ArrowError::NotYetImplemented(format!( + "Cannot perform unary operation of type {} on array of type {}", + T::DATA_TYPE, + array.data_type() + ))), + } +} + +/// Remove any trailing zeroes in the string if they occur after in the fractional seconds, +/// to match Spark behavior +/// example: +/// "1970-01-01 05:29:59.900" => "1970-01-01 05:29:59.9" +/// "1970-01-01 05:29:59.990" => "1970-01-01 05:29:59.99" +/// "1970-01-01 05:29:59.999" => "1970-01-01 05:29:59.999" +/// "1970-01-01 05:30:00" => "1970-01-01 05:30:00" +/// "1970-01-01 05:30:00.001" => "1970-01-01 05:30:00.001" +fn remove_trailing_zeroes(array: ArrayRef) -> ArrayRef { + let string_array = as_generic_string_array::(&array).unwrap(); + let result = string_array + .iter() + .map(|s| s.map(trim_end)) + .collect::>(); + Arc::new(result) as ArrayRef +} + +fn trim_end(s: &str) -> &str { + if s.rfind('.').is_some() { + s.trim_end_matches('0') + } else { + s + } +} + +#[cfg(test)] +mod tests { + use arrow::datatypes::TimestampMicrosecondType; + use arrow_array::StringArray; + use arrow_schema::{Field, Fields, TimeUnit}; + use std::str::FromStr; + + use super::*; + + #[test] + #[cfg_attr(miri, ignore)] // test takes too long with miri + fn timestamp_parser_test() { + let tz = &timezone::Tz::from_str("UTC").unwrap(); + // write for all formats + assert_eq!( + timestamp_parser("2020", EvalMode::Legacy, tz).unwrap(), + Some(1577836800000000) // this is in milliseconds + ); + assert_eq!( + timestamp_parser("2020-01", EvalMode::Legacy, tz).unwrap(), + Some(1577836800000000) + ); + assert_eq!( + timestamp_parser("2020-01-01", EvalMode::Legacy, tz).unwrap(), + Some(1577836800000000) + ); + assert_eq!( + timestamp_parser("2020-01-01T12", EvalMode::Legacy, tz).unwrap(), + Some(1577880000000000) + ); + assert_eq!( + timestamp_parser("2020-01-01T12:34", EvalMode::Legacy, tz).unwrap(), + Some(1577882040000000) + ); + assert_eq!( + timestamp_parser("2020-01-01T12:34:56", EvalMode::Legacy, tz).unwrap(), + Some(1577882096000000) + ); + assert_eq!( + timestamp_parser("2020-01-01T12:34:56.123456", EvalMode::Legacy, tz).unwrap(), + Some(1577882096123456) + ); + assert_eq!( + timestamp_parser("0100", EvalMode::Legacy, tz).unwrap(), + Some(-59011459200000000) + ); + assert_eq!( + timestamp_parser("0100-01", EvalMode::Legacy, tz).unwrap(), + Some(-59011459200000000) + ); + assert_eq!( + timestamp_parser("0100-01-01", EvalMode::Legacy, tz).unwrap(), + Some(-59011459200000000) + ); + assert_eq!( + timestamp_parser("0100-01-01T12", EvalMode::Legacy, tz).unwrap(), + Some(-59011416000000000) + ); + assert_eq!( + timestamp_parser("0100-01-01T12:34", EvalMode::Legacy, tz).unwrap(), + Some(-59011413960000000) + ); + assert_eq!( + timestamp_parser("0100-01-01T12:34:56", EvalMode::Legacy, tz).unwrap(), + Some(-59011413904000000) + ); + assert_eq!( + timestamp_parser("0100-01-01T12:34:56.123456", EvalMode::Legacy, tz).unwrap(), + Some(-59011413903876544) + ); + assert_eq!( + timestamp_parser("10000", EvalMode::Legacy, tz).unwrap(), + Some(253402300800000000) + ); + assert_eq!( + timestamp_parser("10000-01", EvalMode::Legacy, tz).unwrap(), + Some(253402300800000000) + ); + assert_eq!( + timestamp_parser("10000-01-01", EvalMode::Legacy, tz).unwrap(), + Some(253402300800000000) + ); + assert_eq!( + timestamp_parser("10000-01-01T12", EvalMode::Legacy, tz).unwrap(), + Some(253402344000000000) + ); + assert_eq!( + timestamp_parser("10000-01-01T12:34", EvalMode::Legacy, tz).unwrap(), + Some(253402346040000000) + ); + assert_eq!( + timestamp_parser("10000-01-01T12:34:56", EvalMode::Legacy, tz).unwrap(), + Some(253402346096000000) + ); + assert_eq!( + timestamp_parser("10000-01-01T12:34:56.123456", EvalMode::Legacy, tz).unwrap(), + Some(253402346096123456) + ); + // assert_eq!( + // timestamp_parser("T2", EvalMode::Legacy).unwrap(), + // Some(1714356000000000) // this value needs to change everyday. + // ); + } + + #[test] + #[cfg_attr(miri, ignore)] // test takes too long with miri + fn test_cast_string_to_timestamp() { + let array: ArrayRef = Arc::new(StringArray::from(vec![ + Some("2020-01-01T12:34:56.123456"), + Some("T2"), + Some("0100-01-01T12:34:56.123456"), + Some("10000-01-01T12:34:56.123456"), + ])); + let tz = &timezone::Tz::from_str("UTC").unwrap(); + + let string_array = array + .as_any() + .downcast_ref::>() + .expect("Expected a string array"); + + let eval_mode = EvalMode::Legacy; + let result = cast_utf8_to_timestamp!( + &string_array, + eval_mode, + TimestampMicrosecondType, + timestamp_parser, + tz + ); + + assert_eq!( + result.data_type(), + &DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())) + ); + assert_eq!(result.len(), 4); + } + + #[test] + fn test_cast_dict_string_to_timestamp() -> DataFusionResult<()> { + // prepare input data + let keys = Int32Array::from(vec![0, 1]); + let values: ArrayRef = Arc::new(StringArray::from(vec![ + Some("2020-01-01T12:34:56.123456"), + Some("T2"), + ])); + let dict_array = Arc::new(DictionaryArray::new(keys, values)); + + let timezone = "UTC".to_string(); + // test casting string dictionary array to timestamp array + let parquet_options = SparkParquetOptions::new(EvalMode::Legacy, &timezone, false); + let result = cast_array( + dict_array, + &DataType::Timestamp(TimeUnit::Microsecond, Some(timezone.clone().into())), + &parquet_options, + )?; + assert_eq!( + *result.data_type(), + DataType::Timestamp(TimeUnit::Microsecond, Some(timezone.into())) + ); + assert_eq!(result.len(), 2); + + Ok(()) + } + + #[test] + fn date_parser_test() { + for date in &[ + "2020", + "2020-01", + "2020-01-01", + "02020-01-01", + "002020-01-01", + "0002020-01-01", + "2020-1-1", + "2020-01-01 ", + "2020-01-01T", + ] { + for eval_mode in &[EvalMode::Legacy, EvalMode::Ansi, EvalMode::Try] { + assert_eq!(date_parser(date, *eval_mode).unwrap(), Some(18262)); + } + } + + //dates in invalid formats + for date in &[ + "abc", + "", + "not_a_date", + "3/", + "3/12", + "3/12/2020", + "3/12/2002 T", + "202", + "2020-010-01", + "2020-10-010", + "2020-10-010T", + "--262143-12-31", + "--262143-12-31 ", + ] { + for eval_mode in &[EvalMode::Legacy, EvalMode::Try] { + assert_eq!(date_parser(date, *eval_mode).unwrap(), None); + } + assert!(date_parser(date, EvalMode::Ansi).is_err()); + } + + for date in &["-3638-5"] { + for eval_mode in &[EvalMode::Legacy, EvalMode::Try, EvalMode::Ansi] { + assert_eq!(date_parser(date, *eval_mode).unwrap(), Some(-2048160)); + } + } + + //Naive Date only supports years 262142 AD to 262143 BC + //returns None for dates out of range supported by Naive Date. + for date in &[ + "-262144-1-1", + "262143-01-1", + "262143-1-1", + "262143-01-1 ", + "262143-01-01T ", + "262143-1-01T 1234", + "-0973250", + ] { + for eval_mode in &[EvalMode::Legacy, EvalMode::Try, EvalMode::Ansi] { + assert_eq!(date_parser(date, *eval_mode).unwrap(), None); + } + } + } + + #[test] + fn test_cast_string_to_date() { + let array: ArrayRef = Arc::new(StringArray::from(vec![ + Some("2020"), + Some("2020-01"), + Some("2020-01-01"), + Some("2020-01-01T"), + ])); + + let result = cast_string_to_date(&array, &DataType::Date32, EvalMode::Legacy).unwrap(); + + let date32_array = result + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(date32_array.len(), 4); + date32_array + .iter() + .for_each(|v| assert_eq!(v.unwrap(), 18262)); + } + + #[test] + fn test_cast_string_array_with_valid_dates() { + let array_with_invalid_date: ArrayRef = Arc::new(StringArray::from(vec![ + Some("-262143-12-31"), + Some("\n -262143-12-31 "), + Some("-262143-12-31T \t\n"), + Some("\n\t-262143-12-31T\r"), + Some("-262143-12-31T 123123123"), + Some("\r\n-262143-12-31T \r123123123"), + Some("\n -262143-12-31T \n\t"), + ])); + + for eval_mode in &[EvalMode::Legacy, EvalMode::Try, EvalMode::Ansi] { + let result = + cast_string_to_date(&array_with_invalid_date, &DataType::Date32, *eval_mode) + .unwrap(); + + let date32_array = result + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(result.len(), 7); + date32_array + .iter() + .for_each(|v| assert_eq!(v.unwrap(), -96464928)); + } + } + + #[test] + fn test_cast_string_array_with_invalid_dates() { + let array_with_invalid_date: ArrayRef = Arc::new(StringArray::from(vec![ + Some("2020"), + Some("2020-01"), + Some("2020-01-01"), + //4 invalid dates + Some("2020-010-01T"), + Some("202"), + Some(" 202 "), + Some("\n 2020-\r8 "), + Some("2020-01-01T"), + // Overflows i32 + Some("-4607172990231812908"), + ])); + + for eval_mode in &[EvalMode::Legacy, EvalMode::Try] { + let result = + cast_string_to_date(&array_with_invalid_date, &DataType::Date32, *eval_mode) + .unwrap(); + + let date32_array = result + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!( + date32_array.iter().collect::>(), + vec![ + Some(18262), + Some(18262), + Some(18262), + None, + None, + None, + None, + Some(18262), + None + ] + ); + } + + let result = + cast_string_to_date(&array_with_invalid_date, &DataType::Date32, EvalMode::Ansi); + match result { + Err(e) => assert!( + e.to_string().contains( + "[CAST_INVALID_INPUT] The value '2020-010-01T' of the type \"STRING\" cannot be cast to \"DATE\" because it is malformed") + ), + _ => panic!("Expected error"), + } + } + + #[test] + fn test_cast_string_as_i8() { + // basic + assert_eq!( + cast_string_to_i8("127", EvalMode::Legacy).unwrap(), + Some(127_i8) + ); + assert_eq!(cast_string_to_i8("128", EvalMode::Legacy).unwrap(), None); + assert!(cast_string_to_i8("128", EvalMode::Ansi).is_err()); + // decimals + assert_eq!( + cast_string_to_i8("0.2", EvalMode::Legacy).unwrap(), + Some(0_i8) + ); + assert_eq!( + cast_string_to_i8(".", EvalMode::Legacy).unwrap(), + Some(0_i8) + ); + // TRY should always return null for decimals + assert_eq!(cast_string_to_i8("0.2", EvalMode::Try).unwrap(), None); + assert_eq!(cast_string_to_i8(".", EvalMode::Try).unwrap(), None); + // ANSI mode should throw error on decimal + assert!(cast_string_to_i8("0.2", EvalMode::Ansi).is_err()); + assert!(cast_string_to_i8(".", EvalMode::Ansi).is_err()); + } + + #[test] + fn test_cast_unsupported_timestamp_to_date() { + // Since datafusion uses chrono::Datetime internally not all dates representable by TimestampMicrosecondType are supported + let timestamps: PrimitiveArray = vec![i64::MAX].into(); + let parquet_options = SparkParquetOptions::new(EvalMode::Legacy, "UTC", false); + let result = cast_array( + Arc::new(timestamps.with_timezone("Europe/Copenhagen")), + &DataType::Date32, + &parquet_options, + ); + assert!(result.is_err()) + } + + #[test] + fn test_cast_invalid_timezone() { + let timestamps: PrimitiveArray = vec![i64::MAX].into(); + let parquet_options = + SparkParquetOptions::new(EvalMode::Legacy, "Not a valid timezone", false); + let result = cast_array( + Arc::new(timestamps.with_timezone("Europe/Copenhagen")), + &DataType::Date32, + &parquet_options, + ); + assert!(result.is_err()) + } + + #[test] + fn test_cast_struct_to_utf8() { + let a: ArrayRef = Arc::new(Int32Array::from(vec![ + Some(1), + Some(2), + None, + Some(4), + Some(5), + ])); + let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"])); + let c: ArrayRef = Arc::new(StructArray::from(vec![ + (Arc::new(Field::new("a", DataType::Int32, true)), a), + (Arc::new(Field::new("b", DataType::Utf8, true)), b), + ])); + let string_array = cast_array( + c, + &DataType::Utf8, + &SparkParquetOptions::new(EvalMode::Legacy, "UTC", false), + ) + .unwrap(); + let string_array = string_array.as_string::(); + assert_eq!(5, string_array.len()); + assert_eq!(r#"{1, a}"#, string_array.value(0)); + assert_eq!(r#"{2, b}"#, string_array.value(1)); + assert_eq!(r#"{null, c}"#, string_array.value(2)); + assert_eq!(r#"{4, d}"#, string_array.value(3)); + assert_eq!(r#"{5, e}"#, string_array.value(4)); + } + + #[test] + fn test_cast_struct_to_struct() { + let a: ArrayRef = Arc::new(Int32Array::from(vec![ + Some(1), + Some(2), + None, + Some(4), + Some(5), + ])); + let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"])); + let c: ArrayRef = Arc::new(StructArray::from(vec![ + (Arc::new(Field::new("a", DataType::Int32, true)), a), + (Arc::new(Field::new("b", DataType::Utf8, true)), b), + ])); + // change type of "a" from Int32 to Utf8 + let fields = Fields::from(vec![ + Field::new("a", DataType::Utf8, true), + Field::new("b", DataType::Utf8, true), + ]); + let cast_array = spark_parquet_convert( + ColumnarValue::Array(c), + &DataType::Struct(fields), + &SparkParquetOptions::new(EvalMode::Legacy, "UTC", false), + ) + .unwrap(); + if let ColumnarValue::Array(cast_array) = cast_array { + assert_eq!(5, cast_array.len()); + let a = cast_array.as_struct().column(0).as_string::(); + assert_eq!("1", a.value(0)); + } else { + unreachable!() + } + } + + #[test] + fn test_cast_struct_to_struct_drop_column() { + let a: ArrayRef = Arc::new(Int32Array::from(vec![ + Some(1), + Some(2), + None, + Some(4), + Some(5), + ])); + let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"])); + let c: ArrayRef = Arc::new(StructArray::from(vec![ + (Arc::new(Field::new("a", DataType::Int32, true)), a), + (Arc::new(Field::new("b", DataType::Utf8, true)), b), + ])); + // change type of "a" from Int32 to Utf8 and drop "b" + let fields = Fields::from(vec![Field::new("a", DataType::Utf8, true)]); + let cast_array = spark_parquet_convert( + ColumnarValue::Array(c), + &DataType::Struct(fields), + &SparkParquetOptions::new(EvalMode::Legacy, "UTC", false), + ) + .unwrap(); + if let ColumnarValue::Array(cast_array) = cast_array { + assert_eq!(5, cast_array.len()); + let struct_array = cast_array.as_struct(); + assert_eq!(1, struct_array.columns().len()); + let a = struct_array.column(0).as_string::(); + assert_eq!("1", a.value(0)); + } else { + unreachable!() + } + } +} diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs new file mode 100644 index 0000000000..1eee68c95b --- /dev/null +++ b/native/core/src/parquet/schema_adapter.rs @@ -0,0 +1,626 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Custom schema adapter that uses Spark-compatible conversions + +use crate::parquet::parquet_support::{spark_parquet_convert, SparkParquetOptions}; +use arrow_array::{new_null_array, Array, RecordBatch, RecordBatchOptions}; +use arrow_schema::{DataType, Schema, SchemaRef}; +use datafusion::datasource::schema_adapter::{SchemaAdapter, SchemaAdapterFactory, SchemaMapper}; +use datafusion_comet_spark_expr::EvalMode; +use datafusion_common::plan_err; +use datafusion_expr::ColumnarValue; +use std::collections::HashMap; +use std::sync::Arc; + +/// An implementation of DataFusion's `SchemaAdapterFactory` that uses a Spark-compatible +/// `cast` implementation. +#[derive(Clone, Debug)] +pub struct SparkSchemaAdapterFactory { + /// Spark cast options + parquet_options: SparkParquetOptions, +} + +impl SparkSchemaAdapterFactory { + pub fn new(options: SparkParquetOptions) -> Self { + Self { + parquet_options: options, + } + } +} + +impl SchemaAdapterFactory for SparkSchemaAdapterFactory { + /// Create a new factory for mapping batches from a file schema to a table + /// schema. + /// + /// This is a convenience for [`DefaultSchemaAdapterFactory::create`] with + /// the same schema for both the projected table schema and the table + /// schema. + fn create( + &self, + required_schema: SchemaRef, + table_schema: SchemaRef, + ) -> Box { + Box::new(SparkSchemaAdapter { + required_schema, + table_schema, + parquet_options: self.parquet_options.clone(), + }) + } +} + +/// This SchemaAdapter requires both the table schema and the projected table +/// schema. See [`SchemaMapping`] for more details +#[derive(Clone, Debug)] +pub struct SparkSchemaAdapter { + /// The schema for the table, projected to include only the fields being output (projected) by the + /// associated ParquetExec + required_schema: SchemaRef, + /// The entire table schema for the table we're using this to adapt. + /// + /// This is used to evaluate any filters pushed down into the scan + /// which may refer to columns that are not referred to anywhere + /// else in the plan. + table_schema: SchemaRef, + /// Spark cast options + parquet_options: SparkParquetOptions, +} + +impl SchemaAdapter for SparkSchemaAdapter { + /// Map a column index in the table schema to a column index in a particular + /// file schema + /// + /// Panics if index is not in range for the table schema + fn map_column_index(&self, index: usize, file_schema: &Schema) -> Option { + let field = self.required_schema.field(index); + Some(file_schema.fields.find(field.name())?.0) + } + + /// Creates a `SchemaMapping` for casting or mapping the columns from the + /// file schema to the table schema. + /// + /// If the provided `file_schema` contains columns of a different type to + /// the expected `table_schema`, the method will attempt to cast the array + /// data from the file schema to the table schema where possible. + /// + /// Returns a [`SchemaMapping`] that can be applied to the output batch + /// along with an ordered list of columns to project from the file + fn map_schema( + &self, + file_schema: &Schema, + ) -> datafusion_common::Result<(Arc, Vec)> { + let mut projection = Vec::with_capacity(file_schema.fields().len()); + let mut field_mappings = vec![None; self.required_schema.fields().len()]; + + for (file_idx, file_field) in file_schema.fields.iter().enumerate() { + if let Some((table_idx, table_field)) = + self.required_schema.fields().find(file_field.name()) + { + if cast_supported( + file_field.data_type(), + table_field.data_type(), + &self.parquet_options, + ) { + field_mappings[table_idx] = Some(projection.len()); + projection.push(file_idx); + } else { + return plan_err!( + "Cannot cast file schema field {} of type {:?} to required schema field of type {:?}", + file_field.name(), + file_field.data_type(), + table_field.data_type() + ); + } + } + } + + Ok(( + Arc::new(SchemaMapping { + required_schema: Arc::::clone(&self.required_schema), + field_mappings, + table_schema: Arc::::clone(&self.table_schema), + parquet_options: self.parquet_options.clone(), + }), + projection, + )) + } +} + +// TODO SchemaMapping is mostly copied from DataFusion but calls spark_cast +// instead of arrow cast - can we reduce the amount of code copied here and make +// the DataFusion version more extensible? + +/// The SchemaMapping struct holds a mapping from the file schema to the table +/// schema and any necessary type conversions. +/// +/// Note, because `map_batch` and `map_partial_batch` functions have different +/// needs, this struct holds two schemas: +/// +/// 1. The projected **table** schema +/// 2. The full table schema +/// +/// [`map_batch`] is used by the ParquetOpener to produce a RecordBatch which +/// has the projected schema, since that's the schema which is supposed to come +/// out of the execution of this query. Thus `map_batch` uses +/// `projected_table_schema` as it can only operate on the projected fields. +/// +/// [`map_partial_batch`] is used to create a RecordBatch with a schema that +/// can be used for Parquet predicate pushdown, meaning that it may contain +/// fields which are not in the projected schema (as the fields that parquet +/// pushdown filters operate can be completely distinct from the fields that are +/// projected (output) out of the ParquetExec). `map_partial_batch` thus uses +/// `table_schema` to create the resulting RecordBatch (as it could be operating +/// on any fields in the schema). +/// +/// [`map_batch`]: Self::map_batch +/// [`map_partial_batch`]: Self::map_partial_batch +#[derive(Debug)] +pub struct SchemaMapping { + /// The schema of the table. This is the expected schema after conversion + /// and it should match the schema of the query result. + required_schema: SchemaRef, + /// Mapping from field index in `projected_table_schema` to index in + /// projected file_schema. + /// + /// They are Options instead of just plain `usize`s because the table could + /// have fields that don't exist in the file. + field_mappings: Vec>, + /// The entire table schema, as opposed to the projected_table_schema (which + /// only contains the columns that we are projecting out of this query). + /// This contains all fields in the table, regardless of if they will be + /// projected out or not. + table_schema: SchemaRef, + /// Spark cast options + parquet_options: SparkParquetOptions, +} + +impl SchemaMapper for SchemaMapping { + /// Adapts a `RecordBatch` to match the `projected_table_schema` using the stored mapping and + /// conversions. The produced RecordBatch has a schema that contains only the projected + /// columns, so if one needs a RecordBatch with a schema that references columns which are not + /// in the projected, it would be better to use `map_partial_batch` + fn map_batch(&self, batch: RecordBatch) -> datafusion_common::Result { + let batch_rows = batch.num_rows(); + let batch_cols = batch.columns().to_vec(); + + let cols = self + .required_schema + // go through each field in the projected schema + .fields() + .iter() + // and zip it with the index that maps fields from the projected table schema to the + // projected file schema in `batch` + .zip(&self.field_mappings) + // and for each one... + .map(|(field, file_idx)| { + file_idx.map_or_else( + // If this field only exists in the table, and not in the file, then we know + // that it's null, so just return that. + || Ok(new_null_array(field.data_type(), batch_rows)), + // However, if it does exist in both, then try to cast it to the correct output + // type + |batch_idx| { + spark_parquet_convert( + ColumnarValue::Array(Arc::clone(&batch_cols[batch_idx])), + field.data_type(), + &self.parquet_options, + )? + .into_array(batch_rows) + }, + ) + }) + .collect::, _>>()?; + + // Necessary to handle empty batches + let options = RecordBatchOptions::new().with_row_count(Some(batch.num_rows())); + + let schema = Arc::::clone(&self.required_schema); + let record_batch = RecordBatch::try_new_with_options(schema, cols, &options)?; + Ok(record_batch) + } + + /// Adapts a [`RecordBatch`]'s schema into one that has all the correct output types and only + /// contains the fields that exist in both the file schema and table schema. + /// + /// Unlike `map_batch` this method also preserves the columns that + /// may not appear in the final output (`projected_table_schema`) but may + /// appear in push down predicates + fn map_partial_batch(&self, batch: RecordBatch) -> datafusion_common::Result { + let batch_cols = batch.columns().to_vec(); + let schema = batch.schema(); + + // for each field in the batch's schema (which is based on a file, not a table)... + let (cols, fields) = schema + .fields() + .iter() + .zip(batch_cols.iter()) + .flat_map(|(field, batch_col)| { + self.table_schema + // try to get the same field from the table schema that we have stored in self + .field_with_name(field.name()) + // and if we don't have it, that's fine, ignore it. This may occur when we've + // created an external table whose fields are a subset of the fields in this + // file, then tried to read data from the file into this table. If that is the + // case here, it's fine to ignore because we don't care about this field + // anyways + .ok() + // but if we do have it, + .map(|table_field| { + // try to cast it into the correct output type. we don't want to ignore this + // error, though, so it's propagated. + spark_parquet_convert( + ColumnarValue::Array(Arc::clone(batch_col)), + table_field.data_type(), + &self.parquet_options, + )? + .into_array(batch_col.len()) + // and if that works, return the field and column. + .map(|new_col| (new_col, table_field.clone())) + }) + }) + .collect::, _>>()? + .into_iter() + .unzip::<_, _, Vec<_>, Vec<_>>(); + + // Necessary to handle empty batches + let options = RecordBatchOptions::new().with_row_count(Some(batch.num_rows())); + + let schema = Arc::new(Schema::new_with_metadata(fields, schema.metadata().clone())); + let record_batch = RecordBatch::try_new_with_options(schema, cols, &options)?; + Ok(record_batch) + } +} + +/// Determine if Comet supports a cast, taking options such as EvalMode and Timezone into account. +fn cast_supported(from_type: &DataType, to_type: &DataType, options: &SparkParquetOptions) -> bool { + use DataType::*; + + let from_type = if let Dictionary(_, dt) = from_type { + dt + } else { + from_type + }; + + let to_type = if let Dictionary(_, dt) = to_type { + dt + } else { + to_type + }; + + if from_type == to_type { + return true; + } + + match (from_type, to_type) { + (Boolean, _) => can_convert_from_boolean(to_type, options), + (UInt8 | UInt16 | UInt32 | UInt64, Int8 | Int16 | Int32 | Int64) + if options.allow_cast_unsigned_ints => + { + true + } + (Int8, _) => can_convert_from_byte(to_type, options), + (Int16, _) => can_convert_from_short(to_type, options), + (Int32, _) => can_convert_from_int(to_type, options), + (Int64, _) => can_convert_from_long(to_type, options), + (Float32, _) => can_convert_from_float(to_type, options), + (Float64, _) => can_convert_from_double(to_type, options), + (Decimal128(p, s), _) => can_convert_from_decimal(p, s, to_type, options), + (Timestamp(_, None), _) => can_convert_from_timestamp_ntz(to_type, options), + (Timestamp(_, Some(_)), _) => can_convert_from_timestamp(to_type, options), + (Utf8 | LargeUtf8, _) => can_convert_from_string(to_type, options), + (_, Utf8 | LargeUtf8) => can_cast_to_string(from_type, options), + (Struct(from_fields), Struct(to_fields)) => { + // TODO some of this logic may be specific to converting Parquet to Spark + let mut field_types = HashMap::new(); + for field in from_fields { + field_types.insert(field.name(), field.data_type()); + } + if field_types.iter().len() != from_fields.len() { + return false; + } + for field in to_fields { + if let Some(from_type) = field_types.get(&field.name()) { + if !cast_supported(from_type, field.data_type(), options) { + return false; + } + } else { + return false; + } + } + true + } + _ => false, + } +} + +fn can_convert_from_string(to_type: &DataType, options: &SparkParquetOptions) -> bool { + use DataType::*; + match to_type { + Boolean | Int8 | Int16 | Int32 | Int64 | Binary => true, + Float32 | Float64 => { + // https://github.com/apache/datafusion-comet/issues/326 + // Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. + // Does not support ANSI mode. + options.allow_incompat + } + Decimal128(_, _) => { + // https://github.com/apache/datafusion-comet/issues/325 + // Does not support inputs ending with 'd' or 'f'. Does not support 'inf'. + // Does not support ANSI mode. Returns 0.0 instead of null if input contains no digits + + options.allow_incompat + } + Date32 | Date64 => { + // https://github.com/apache/datafusion-comet/issues/327 + // Only supports years between 262143 BC and 262142 AD + options.allow_incompat + } + Timestamp(_, _) if options.eval_mode == EvalMode::Ansi => { + // ANSI mode not supported + false + } + Timestamp(_, Some(tz)) if tz.as_ref() != "UTC" => { + // Cast will use UTC instead of $timeZoneId + options.allow_incompat + } + Timestamp(_, _) => { + // https://github.com/apache/datafusion-comet/issues/328 + // Not all valid formats are supported + options.allow_incompat + } + _ => false, + } +} + +fn can_cast_to_string(from_type: &DataType, options: &SparkParquetOptions) -> bool { + use DataType::*; + match from_type { + Boolean | Int8 | Int16 | Int32 | Int64 | Date32 | Date64 | Timestamp(_, _) => true, + Float32 | Float64 => { + // There can be differences in precision. + // For example, the input \"1.4E-45\" will produce 1.0E-45 " + + // instead of 1.4E-45")) + true + } + Decimal128(_, _) => { + // https://github.com/apache/datafusion-comet/issues/1068 + // There can be formatting differences in some case due to Spark using + // scientific notation where Comet does not + true + } + Binary => { + // https://github.com/apache/datafusion-comet/issues/377 + // Only works for binary data representing valid UTF-8 strings + options.allow_incompat + } + Struct(fields) => fields + .iter() + .all(|f| can_cast_to_string(f.data_type(), options)), + _ => false, + } +} + +fn can_convert_from_timestamp_ntz(to_type: &DataType, options: &SparkParquetOptions) -> bool { + use DataType::*; + match to_type { + Timestamp(_, _) | Date32 | Date64 | Utf8 => { + // incompatible + options.allow_incompat + } + _ => { + // unsupported + false + } + } +} + +fn can_convert_from_timestamp(to_type: &DataType, _options: &SparkParquetOptions) -> bool { + use DataType::*; + match to_type { + Timestamp(_, _) => true, + Boolean | Int8 | Int16 => { + // https://github.com/apache/datafusion-comet/issues/352 + // this seems like an edge case that isn't important for us to support + false + } + Int64 => { + // https://github.com/apache/datafusion-comet/issues/352 + true + } + Date32 | Date64 | Utf8 | Decimal128(_, _) => true, + _ => { + // unsupported + false + } + } +} + +fn can_convert_from_boolean(to_type: &DataType, _: &SparkParquetOptions) -> bool { + use DataType::*; + matches!(to_type, Int8 | Int16 | Int32 | Int64 | Float32 | Float64) +} + +fn can_convert_from_byte(to_type: &DataType, _: &SparkParquetOptions) -> bool { + use DataType::*; + matches!( + to_type, + Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128(_, _) + ) +} + +fn can_convert_from_short(to_type: &DataType, _: &SparkParquetOptions) -> bool { + use DataType::*; + matches!( + to_type, + Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Decimal128(_, _) + ) +} + +fn can_convert_from_int(to_type: &DataType, options: &SparkParquetOptions) -> bool { + use DataType::*; + match to_type { + Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | Utf8 => true, + Decimal128(_, _) => { + // incompatible: no overflow check + options.allow_incompat + } + _ => false, + } +} + +fn can_convert_from_long(to_type: &DataType, options: &SparkParquetOptions) -> bool { + use DataType::*; + match to_type { + Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => true, + Decimal128(_, _) => { + // incompatible: no overflow check + options.allow_incompat + } + _ => false, + } +} + +fn can_convert_from_float(to_type: &DataType, _: &SparkParquetOptions) -> bool { + use DataType::*; + matches!( + to_type, + Boolean | Int8 | Int16 | Int32 | Int64 | Float64 | Decimal128(_, _) + ) +} + +fn can_convert_from_double(to_type: &DataType, _: &SparkParquetOptions) -> bool { + use DataType::*; + matches!( + to_type, + Boolean | Int8 | Int16 | Int32 | Int64 | Float32 | Decimal128(_, _) + ) +} + +fn can_convert_from_decimal( + p1: &u8, + _s1: &i8, + to_type: &DataType, + options: &SparkParquetOptions, +) -> bool { + use DataType::*; + match to_type { + Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => true, + Decimal128(p2, _) => { + if p2 < p1 { + // https://github.com/apache/datafusion/issues/13492 + // Incompatible(Some("Casting to smaller precision is not supported")) + options.allow_incompat + } else { + true + } + } + _ => false, + } +} + +#[cfg(test)] +mod test { + use crate::parquet::parquet_support::SparkParquetOptions; + use crate::parquet::schema_adapter::SparkSchemaAdapterFactory; + use arrow::array::{Int32Array, StringArray}; + use arrow::datatypes::{DataType, Field, Schema}; + use arrow::record_batch::RecordBatch; + use arrow_array::UInt32Array; + use arrow_schema::SchemaRef; + use datafusion::datasource::listing::PartitionedFile; + use datafusion::datasource::physical_plan::{FileScanConfig, ParquetExec}; + use datafusion::execution::object_store::ObjectStoreUrl; + use datafusion::execution::TaskContext; + use datafusion::physical_plan::ExecutionPlan; + use datafusion_comet_spark_expr::test_common::file_util::get_temp_filename; + use datafusion_comet_spark_expr::EvalMode; + use datafusion_common::DataFusionError; + use futures::StreamExt; + use parquet::arrow::ArrowWriter; + use std::fs::File; + use std::sync::Arc; + + #[tokio::test] + async fn parquet_roundtrip_int_as_string() -> Result<(), DataFusionError> { + let file_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ])); + + let ids = Arc::new(Int32Array::from(vec![1, 2, 3])) as Arc; + let names = Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])) + as Arc; + let batch = RecordBatch::try_new(Arc::clone(&file_schema), vec![ids, names])?; + + let required_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Utf8, false), + Field::new("name", DataType::Utf8, false), + ])); + + let _ = roundtrip(&batch, required_schema).await?; + + Ok(()) + } + + #[tokio::test] + async fn parquet_roundtrip_unsigned_int() -> Result<(), DataFusionError> { + let file_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::UInt32, false)])); + + let ids = Arc::new(UInt32Array::from(vec![1, 2, 3])) as Arc; + let batch = RecordBatch::try_new(Arc::clone(&file_schema), vec![ids])?; + + let required_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); + + let _ = roundtrip(&batch, required_schema).await?; + + Ok(()) + } + + /// Create a Parquet file containing a single batch and then read the batch back using + /// the specified required_schema. This will cause the SchemaAdapter code to be used. + async fn roundtrip( + batch: &RecordBatch, + required_schema: SchemaRef, + ) -> Result { + let filename = get_temp_filename(); + let filename = filename.as_path().as_os_str().to_str().unwrap().to_string(); + let file = File::create(&filename)?; + let mut writer = ArrowWriter::try_new(file, Arc::clone(&batch.schema()), None)?; + writer.write(batch)?; + writer.close()?; + + let object_store_url = ObjectStoreUrl::local_filesystem(); + let file_scan_config = FileScanConfig::new(object_store_url, required_schema) + .with_file_groups(vec![vec![PartitionedFile::from_path( + filename.to_string(), + )?]]); + + let mut spark_parquet_options = SparkParquetOptions::new(EvalMode::Legacy, "UTC", false); + spark_parquet_options.allow_cast_unsigned_ints = true; + + let parquet_exec = ParquetExec::builder(file_scan_config) + .with_schema_adapter_factory(Arc::new(SparkSchemaAdapterFactory::new( + spark_parquet_options, + ))) + .build(); + + let mut stream = parquet_exec + .execute(0, Arc::new(TaskContext::default())) + .unwrap(); + stream.next().await.unwrap() + } +} diff --git a/native/core/src/parquet/util/jni.rs b/native/core/src/parquet/util/jni.rs index b61fbeab32..d9fa49922a 100644 --- a/native/core/src/parquet/util/jni.rs +++ b/native/core/src/parquet/util/jni.rs @@ -24,11 +24,16 @@ use jni::{ JNIEnv, }; +use arrow::error::ArrowError; +use arrow::ipc::reader::StreamReader; +use datafusion_execution::object_store::ObjectStoreUrl; +use object_store::path::Path; use parquet::{ basic::{Encoding, LogicalType, TimeUnit, Type as PhysicalType}, format::{MicroSeconds, MilliSeconds, NanoSeconds}, schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder}, }; +use url::{ParseError, Url}; /// Convert primitives from Spark side into a `ColumnDescriptor`. #[allow(clippy::too_many_arguments)] @@ -198,3 +203,52 @@ fn fix_type_length(t: &PhysicalType, type_length: i32) -> i32 { _ => type_length, } } + +pub fn deserialize_schema(ipc_bytes: &[u8]) -> Result { + let reader = StreamReader::try_new(std::io::Cursor::new(ipc_bytes), None)?; + let schema = reader.schema().as_ref().clone(); + Ok(schema) +} + +// parses the url and returns a tuple of the scheme and object store path +pub fn get_file_path(url_: String) -> Result<(ObjectStoreUrl, Path), ParseError> { + // we define origin of a url as scheme + "://" + authority + ["/" + bucket] + let url = Url::parse(url_.as_ref()).unwrap(); + let mut object_store_origin = url.scheme().to_owned(); + let mut object_store_path = Path::from_url_path(url.path()).unwrap(); + if object_store_origin == "s3a" { + object_store_origin = "s3".to_string(); + object_store_origin.push_str("://"); + object_store_origin.push_str(url.authority()); + object_store_origin.push('/'); + let path_splits = url.path_segments().map(|c| c.collect::>()).unwrap(); + object_store_origin.push_str(path_splits.first().unwrap()); + let new_path = path_splits[1..path_splits.len() - 1].join("/"); + //TODO: (ARROW NATIVE) check the use of unwrap here + object_store_path = Path::from_url_path(new_path.clone().as_str()).unwrap(); + } else { + object_store_origin.push_str("://"); + object_store_origin.push_str(url.authority()); + object_store_origin.push('/'); + } + Ok(( + ObjectStoreUrl::parse(object_store_origin).unwrap(), + object_store_path, + )) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_file_path() { + let inp = "file:///comet/spark-warehouse/t1/part1=2019-01-01%2011%253A11%253A11/part-00000-84d7ed74-8f28-456c-9270-f45376eea144.c000.snappy.parquet"; + let expected = "comet/spark-warehouse/t1/part1=2019-01-01 11%3A11%3A11/part-00000-84d7ed74-8f28-456c-9270-f45376eea144.c000.snappy.parquet"; + + if let Ok((_obj_store_url, path)) = get_file_path(inp.to_string()) { + let actual = path.to_string(); + assert_eq!(actual, expected); + } + } +} diff --git a/native/proto/src/proto/operator.proto b/native/proto/src/proto/operator.proto index c6f310d65c..7e5aa975fb 100644 --- a/native/proto/src/proto/operator.proto +++ b/native/proto/src/proto/operator.proto @@ -46,9 +46,30 @@ message Operator { SortMergeJoin sort_merge_join = 108; HashJoin hash_join = 109; Window window = 110; + NativeScan native_scan = 111; } } +message SparkPartitionedFile { + string file_path = 1; + int64 start = 2; + int64 length = 3; + int64 file_size = 4; + repeated spark.spark_expression.Expr partition_values = 5; +} + +// This name and the one above are not great, but they correspond to the (unfortunate) Spark names. +// I prepended "Spark" since I think there's a name collision on the native side, but we can revisit. +message SparkFilePartition { + repeated SparkPartitionedFile partitioned_file = 1; +} + +message SparkStructField { + string name = 1; + spark.spark_expression.DataType data_type = 2; + bool nullable = 3; +} + message Scan { repeated spark.spark_expression.DataType fields = 1; // The source of the scan (e.g. file scan, broadcast exchange, shuffle, etc). This @@ -57,6 +78,21 @@ message Scan { string source = 2; } +message NativeScan { + repeated spark.spark_expression.DataType fields = 1; + // The source of the scan (e.g. file scan, broadcast exchange, shuffle, etc). This + // is purely for informational purposes when viewing native query plans in + // debug mode. + string source = 2; + repeated SparkStructField required_schema = 3; + repeated SparkStructField data_schema = 4; + repeated SparkStructField partition_schema = 5; + repeated spark.spark_expression.Expr data_filters = 6; + repeated SparkFilePartition file_partitions = 7; + repeated int64 projection_vector = 8; + string session_timezone = 9; +} + message Projection { repeated spark.spark_expression.Expr project_list = 1; } diff --git a/native/spark-expr/src/avg.rs b/native/spark-expr/src/avg.rs new file mode 100644 index 0000000000..816440ac9a --- /dev/null +++ b/native/spark-expr/src/avg.rs @@ -0,0 +1,341 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::compute::sum; +use arrow_array::{ + builder::PrimitiveBuilder, + cast::AsArray, + types::{Float64Type, Int64Type}, + Array, ArrayRef, ArrowNumericType, Int64Array, PrimitiveArray, +}; +use arrow_schema::{DataType, Field}; +use datafusion::logical_expr::{ + type_coercion::aggregates::avg_return_type, Accumulator, EmitTo, GroupsAccumulator, Signature, +}; +use datafusion_common::{not_impl_err, Result, ScalarValue}; +use datafusion_physical_expr::expressions::format_state_name; +use std::{any::Any, sync::Arc}; + +use arrow_array::ArrowNativeTypeOp; +use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::Volatility::Immutable; +use datafusion_expr::{AggregateUDFImpl, ReversedUDAF}; +use DataType::*; + +/// AVG aggregate expression +#[derive(Debug, Clone)] +pub struct Avg { + name: String, + signature: Signature, + // expr: Arc, + input_data_type: DataType, + result_data_type: DataType, +} + +impl Avg { + /// Create a new AVG aggregate function + pub fn new(name: impl Into, data_type: DataType) -> Self { + let result_data_type = avg_return_type("avg", &data_type).unwrap(); + + Self { + name: name.into(), + signature: Signature::user_defined(Immutable), + input_data_type: data_type, + result_data_type, + } + } +} + +impl AggregateUDFImpl for Avg { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result> { + // instantiate specialized accumulator based for the type + match (&self.input_data_type, &self.result_data_type) { + (Float64, Float64) => Ok(Box::::default()), + _ => not_impl_err!( + "AvgAccumulator for ({} --> {})", + self.input_data_type, + self.result_data_type + ), + } + } + + fn state_fields(&self, _args: StateFieldsArgs) -> Result> { + Ok(vec![ + Field::new( + format_state_name(&self.name, "sum"), + self.input_data_type.clone(), + true, + ), + Field::new( + format_state_name(&self.name, "count"), + DataType::Int64, + true, + ), + ]) + } + + fn name(&self) -> &str { + &self.name + } + + fn reverse_expr(&self) -> ReversedUDAF { + ReversedUDAF::Identical + } + + fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool { + true + } + + fn create_groups_accumulator( + &self, + _args: AccumulatorArgs, + ) -> Result> { + // instantiate specialized accumulator based for the type + match (&self.input_data_type, &self.result_data_type) { + (Float64, Float64) => Ok(Box::new(AvgGroupsAccumulator::::new( + &self.input_data_type, + |sum: f64, count: i64| Ok(sum / count as f64), + ))), + + _ => not_impl_err!( + "AvgGroupsAccumulator for ({} --> {})", + self.input_data_type, + self.result_data_type + ), + } + } + + fn default_value(&self, _data_type: &DataType) -> Result { + Ok(ScalarValue::Float64(None)) + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + avg_return_type(self.name(), &arg_types[0]) + } +} + +/// An accumulator to compute the average +#[derive(Debug, Default)] +pub struct AvgAccumulator { + sum: Option, + count: i64, +} + +impl Accumulator for AvgAccumulator { + fn state(&mut self) -> Result> { + Ok(vec![ + ScalarValue::Float64(self.sum), + ScalarValue::from(self.count), + ]) + } + + fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { + let values = values[0].as_primitive::(); + self.count += (values.len() - values.null_count()) as i64; + let v = self.sum.get_or_insert(0.); + if let Some(x) = sum(values) { + *v += x; + } + Ok(()) + } + + fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { + // counts are summed + self.count += sum(states[1].as_primitive::()).unwrap_or_default(); + + // sums are summed + if let Some(x) = sum(states[0].as_primitive::()) { + let v = self.sum.get_or_insert(0.); + *v += x; + } + Ok(()) + } + + fn evaluate(&mut self) -> Result { + if self.count == 0 { + // If all input are nulls, count will be 0 and we will get null after the division. + // This is consistent with Spark Average implementation. + Ok(ScalarValue::Float64(None)) + } else { + Ok(ScalarValue::Float64( + self.sum.map(|f| f / self.count as f64), + )) + } + } + + fn size(&self) -> usize { + std::mem::size_of_val(self) + } +} + +/// An accumulator to compute the average of `[PrimitiveArray]`. +/// Stores values as native types, and does overflow checking +/// +/// F: Function that calculates the average value from a sum of +/// T::Native and a total count +#[derive(Debug)] +struct AvgGroupsAccumulator +where + T: ArrowNumericType + Send, + F: Fn(T::Native, i64) -> Result + Send, +{ + /// The type of the returned average + return_data_type: DataType, + + /// Count per group (use i64 to make Int64Array) + counts: Vec, + + /// Sums per group, stored as the native type + sums: Vec, + + /// Function that computes the final average (value / count) + avg_fn: F, +} + +impl AvgGroupsAccumulator +where + T: ArrowNumericType + Send, + F: Fn(T::Native, i64) -> Result + Send, +{ + pub fn new(return_data_type: &DataType, avg_fn: F) -> Self { + Self { + return_data_type: return_data_type.clone(), + counts: vec![], + sums: vec![], + avg_fn, + } + } +} + +impl GroupsAccumulator for AvgGroupsAccumulator +where + T: ArrowNumericType + Send, + F: Fn(T::Native, i64) -> Result + Send, +{ + fn update_batch( + &mut self, + values: &[ArrayRef], + group_indices: &[usize], + _opt_filter: Option<&arrow_array::BooleanArray>, + total_num_groups: usize, + ) -> Result<()> { + assert_eq!(values.len(), 1, "single argument to update_batch"); + let values = values[0].as_primitive::(); + let data = values.values(); + + // increment counts, update sums + self.counts.resize(total_num_groups, 0); + self.sums.resize(total_num_groups, T::default_value()); + + let iter = group_indices.iter().zip(data.iter()); + if values.null_count() == 0 { + for (&group_index, &value) in iter { + let sum = &mut self.sums[group_index]; + *sum = (*sum).add_wrapping(value); + self.counts[group_index] += 1; + } + } else { + for (idx, (&group_index, &value)) in iter.enumerate() { + if values.is_null(idx) { + continue; + } + let sum = &mut self.sums[group_index]; + *sum = (*sum).add_wrapping(value); + + self.counts[group_index] += 1; + } + } + + Ok(()) + } + + fn merge_batch( + &mut self, + values: &[ArrayRef], + group_indices: &[usize], + _opt_filter: Option<&arrow_array::BooleanArray>, + total_num_groups: usize, + ) -> Result<()> { + assert_eq!(values.len(), 2, "two arguments to merge_batch"); + // first batch is partial sums, second is counts + let partial_sums = values[0].as_primitive::(); + let partial_counts = values[1].as_primitive::(); + // update counts with partial counts + self.counts.resize(total_num_groups, 0); + let iter1 = group_indices.iter().zip(partial_counts.values().iter()); + for (&group_index, &partial_count) in iter1 { + self.counts[group_index] += partial_count; + } + + // update sums + self.sums.resize(total_num_groups, T::default_value()); + let iter2 = group_indices.iter().zip(partial_sums.values().iter()); + for (&group_index, &new_value) in iter2 { + let sum = &mut self.sums[group_index]; + *sum = sum.add_wrapping(new_value); + } + + Ok(()) + } + + fn evaluate(&mut self, emit_to: EmitTo) -> Result { + let counts = emit_to.take_needed(&mut self.counts); + let sums = emit_to.take_needed(&mut self.sums); + let mut builder = PrimitiveBuilder::::with_capacity(sums.len()); + let iter = sums.into_iter().zip(counts); + + for (sum, count) in iter { + if count != 0 { + builder.append_value((self.avg_fn)(sum, count)?) + } else { + builder.append_null(); + } + } + let array: PrimitiveArray = builder.finish(); + + Ok(Arc::new(array)) + } + + // return arrays for sums and counts + fn state(&mut self, emit_to: EmitTo) -> Result> { + let counts = emit_to.take_needed(&mut self.counts); + let counts = Int64Array::new(counts.into(), None); + + let sums = emit_to.take_needed(&mut self.sums); + let sums = PrimitiveArray::::new(sums.into(), None) + .with_data_type(self.return_data_type.clone()); + + Ok(vec![ + Arc::new(sums) as ArrayRef, + Arc::new(counts) as ArrayRef, + ]) + } + + fn size(&self) -> usize { + self.counts.capacity() * std::mem::size_of::() + + self.sums.capacity() * std::mem::size_of::() + } +} diff --git a/native/spark-expr/src/avg_decimal.rs b/native/spark-expr/src/avg_decimal.rs new file mode 100644 index 0000000000..05fc28e583 --- /dev/null +++ b/native/spark-expr/src/avg_decimal.rs @@ -0,0 +1,522 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::{array::BooleanBufferBuilder, buffer::NullBuffer, compute::sum}; +use arrow_array::{ + builder::PrimitiveBuilder, + cast::AsArray, + types::{Decimal128Type, Int64Type}, + Array, ArrayRef, Decimal128Array, Int64Array, PrimitiveArray, +}; +use arrow_schema::{DataType, Field}; +use datafusion::logical_expr::{Accumulator, EmitTo, GroupsAccumulator, Signature}; +use datafusion_common::{not_impl_err, Result, ScalarValue}; +use datafusion_physical_expr::expressions::format_state_name; +use std::{any::Any, sync::Arc}; + +use crate::utils::is_valid_decimal_precision; +use arrow_array::ArrowNativeTypeOp; +use arrow_data::decimal::{MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION}; +use datafusion::logical_expr::Volatility::Immutable; +use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::type_coercion::aggregates::avg_return_type; +use datafusion_expr::{AggregateUDFImpl, ReversedUDAF}; +use num::{integer::div_ceil, Integer}; +use DataType::*; + +/// AVG aggregate expression +#[derive(Debug, Clone)] +pub struct AvgDecimal { + signature: Signature, + sum_data_type: DataType, + result_data_type: DataType, +} + +impl AvgDecimal { + /// Create a new AVG aggregate function + pub fn new(result_type: DataType, sum_type: DataType) -> Self { + Self { + signature: Signature::user_defined(Immutable), + result_data_type: result_type, + sum_data_type: sum_type, + } + } +} + +impl AggregateUDFImpl for AvgDecimal { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result> { + match (&self.sum_data_type, &self.result_data_type) { + (Decimal128(sum_precision, sum_scale), Decimal128(target_precision, target_scale)) => { + Ok(Box::new(AvgDecimalAccumulator::new( + *sum_scale, + *sum_precision, + *target_precision, + *target_scale, + ))) + } + _ => not_impl_err!( + "AvgDecimalAccumulator for ({} --> {})", + self.sum_data_type, + self.result_data_type + ), + } + } + + fn state_fields(&self, _args: StateFieldsArgs) -> Result> { + Ok(vec![ + Field::new( + format_state_name(self.name(), "sum"), + self.sum_data_type.clone(), + true, + ), + Field::new( + format_state_name(self.name(), "count"), + DataType::Int64, + true, + ), + ]) + } + + fn name(&self) -> &str { + "avg" + } + + fn reverse_expr(&self) -> ReversedUDAF { + ReversedUDAF::Identical + } + + fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool { + true + } + + fn create_groups_accumulator( + &self, + _args: AccumulatorArgs, + ) -> Result> { + // instantiate specialized accumulator based for the type + match (&self.sum_data_type, &self.result_data_type) { + (Decimal128(sum_precision, sum_scale), Decimal128(target_precision, target_scale)) => { + Ok(Box::new(AvgDecimalGroupsAccumulator::new( + &self.result_data_type, + &self.sum_data_type, + *target_precision, + *target_scale, + *sum_precision, + *sum_scale, + ))) + } + _ => not_impl_err!( + "AvgDecimalGroupsAccumulator for ({} --> {})", + self.sum_data_type, + self.result_data_type + ), + } + } + + fn default_value(&self, _data_type: &DataType) -> Result { + match &self.result_data_type { + Decimal128(target_precision, target_scale) => { + Ok(make_decimal128(None, *target_precision, *target_scale)) + } + _ => not_impl_err!( + "The result_data_type of AvgDecimal should be Decimal128 but got{}", + self.result_data_type + ), + } + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + avg_return_type(self.name(), &arg_types[0]) + } +} + +/// An accumulator to compute the average for decimals +#[derive(Debug)] +struct AvgDecimalAccumulator { + sum: Option, + count: i64, + is_empty: bool, + is_not_null: bool, + sum_scale: i8, + sum_precision: u8, + target_precision: u8, + target_scale: i8, +} + +impl AvgDecimalAccumulator { + pub fn new(sum_scale: i8, sum_precision: u8, target_precision: u8, target_scale: i8) -> Self { + Self { + sum: None, + count: 0, + is_empty: true, + is_not_null: true, + sum_scale, + sum_precision, + target_precision, + target_scale, + } + } + + fn update_single(&mut self, values: &Decimal128Array, idx: usize) { + let v = unsafe { values.value_unchecked(idx) }; + let (new_sum, is_overflow) = match self.sum { + Some(sum) => sum.overflowing_add(v), + None => (v, false), + }; + + if is_overflow || !is_valid_decimal_precision(new_sum, self.sum_precision) { + // Overflow: set buffer accumulator to null + self.is_not_null = false; + return; + } + + self.sum = Some(new_sum); + + if let Some(new_count) = self.count.checked_add(1) { + self.count = new_count; + } else { + self.is_not_null = false; + return; + } + + self.is_not_null = true; + } +} + +fn make_decimal128(value: Option, precision: u8, scale: i8) -> ScalarValue { + ScalarValue::Decimal128(value, precision, scale) +} + +impl Accumulator for AvgDecimalAccumulator { + fn state(&mut self) -> Result> { + Ok(vec![ + ScalarValue::Decimal128(self.sum, self.sum_precision, self.sum_scale), + ScalarValue::from(self.count), + ]) + } + + fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { + if !self.is_empty && !self.is_not_null { + // This means there's a overflow in decimal, so we will just skip the rest + // of the computation + return Ok(()); + } + + let values = &values[0]; + let data = values.as_primitive::(); + + self.is_empty = self.is_empty && values.len() == values.null_count(); + + if values.null_count() == 0 { + for i in 0..data.len() { + self.update_single(data, i); + } + } else { + for i in 0..data.len() { + if data.is_null(i) { + continue; + } + self.update_single(data, i); + } + } + Ok(()) + } + + fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { + // counts are summed + self.count += sum(states[1].as_primitive::()).unwrap_or_default(); + + // sums are summed + if let Some(x) = sum(states[0].as_primitive::()) { + let v = self.sum.get_or_insert(0); + let (result, overflowed) = v.overflowing_add(x); + if overflowed { + // Set to None if overflow happens + self.sum = None; + } else { + *v = result; + } + } + Ok(()) + } + + fn evaluate(&mut self) -> Result { + let scaler = 10_i128.pow(self.target_scale.saturating_sub(self.sum_scale) as u32); + let target_min = MIN_DECIMAL_FOR_EACH_PRECISION[self.target_precision as usize - 1]; + let target_max = MAX_DECIMAL_FOR_EACH_PRECISION[self.target_precision as usize - 1]; + + let result = self + .sum + .map(|v| avg(v, self.count as i128, target_min, target_max, scaler)); + + match result { + Some(value) => Ok(make_decimal128( + value, + self.target_precision, + self.target_scale, + )), + _ => Ok(make_decimal128( + None, + self.target_precision, + self.target_scale, + )), + } + } + + fn size(&self) -> usize { + std::mem::size_of_val(self) + } +} + +#[derive(Debug)] +struct AvgDecimalGroupsAccumulator { + /// Tracks if the value is null + is_not_null: BooleanBufferBuilder, + + // Tracks if the value is empty + is_empty: BooleanBufferBuilder, + + /// The type of the avg return type + return_data_type: DataType, + target_precision: u8, + target_scale: i8, + + /// Count per group (use i64 to make Int64Array) + counts: Vec, + + /// Sums per group, stored as i128 + sums: Vec, + + /// The type of the sum + sum_data_type: DataType, + /// This is input_precision + 10 to be consistent with Spark + sum_precision: u8, + sum_scale: i8, +} + +impl AvgDecimalGroupsAccumulator { + pub fn new( + return_data_type: &DataType, + sum_data_type: &DataType, + target_precision: u8, + target_scale: i8, + sum_precision: u8, + sum_scale: i8, + ) -> Self { + Self { + is_not_null: BooleanBufferBuilder::new(0), + is_empty: BooleanBufferBuilder::new(0), + return_data_type: return_data_type.clone(), + target_precision, + target_scale, + sum_data_type: sum_data_type.clone(), + sum_precision, + sum_scale, + counts: vec![], + sums: vec![], + } + } + + fn is_overflow(&self, index: usize) -> bool { + !self.is_empty.get_bit(index) && !self.is_not_null.get_bit(index) + } + + fn update_single(&mut self, group_index: usize, value: i128) { + if self.is_overflow(group_index) { + // This means there's a overflow in decimal, so we will just skip the rest + // of the computation + return; + } + + self.is_empty.set_bit(group_index, false); + let (new_sum, is_overflow) = self.sums[group_index].overflowing_add(value); + self.counts[group_index] += 1; + + if is_overflow || !is_valid_decimal_precision(new_sum, self.sum_precision) { + // Overflow: set buffer accumulator to null + self.is_not_null.set_bit(group_index, false); + return; + } + + self.sums[group_index] = new_sum; + self.is_not_null.set_bit(group_index, true) + } +} + +fn ensure_bit_capacity(builder: &mut BooleanBufferBuilder, capacity: usize) { + if builder.len() < capacity { + let additional = capacity - builder.len(); + builder.append_n(additional, true); + } +} + +impl GroupsAccumulator for AvgDecimalGroupsAccumulator { + fn update_batch( + &mut self, + values: &[ArrayRef], + group_indices: &[usize], + _opt_filter: Option<&arrow_array::BooleanArray>, + total_num_groups: usize, + ) -> Result<()> { + assert_eq!(values.len(), 1, "single argument to update_batch"); + let values = values[0].as_primitive::(); + let data = values.values(); + + // increment counts, update sums + self.counts.resize(total_num_groups, 0); + self.sums.resize(total_num_groups, 0); + ensure_bit_capacity(&mut self.is_empty, total_num_groups); + ensure_bit_capacity(&mut self.is_not_null, total_num_groups); + + let iter = group_indices.iter().zip(data.iter()); + if values.null_count() == 0 { + for (&group_index, &value) in iter { + self.update_single(group_index, value); + } + } else { + for (idx, (&group_index, &value)) in iter.enumerate() { + if values.is_null(idx) { + continue; + } + self.update_single(group_index, value); + } + } + Ok(()) + } + + fn merge_batch( + &mut self, + values: &[ArrayRef], + group_indices: &[usize], + _opt_filter: Option<&arrow_array::BooleanArray>, + total_num_groups: usize, + ) -> Result<()> { + assert_eq!(values.len(), 2, "two arguments to merge_batch"); + // first batch is partial sums, second is counts + let partial_sums = values[0].as_primitive::(); + let partial_counts = values[1].as_primitive::(); + // update counts with partial counts + self.counts.resize(total_num_groups, 0); + let iter1 = group_indices.iter().zip(partial_counts.values().iter()); + for (&group_index, &partial_count) in iter1 { + self.counts[group_index] += partial_count; + } + + // update sums + self.sums.resize(total_num_groups, 0); + let iter2 = group_indices.iter().zip(partial_sums.values().iter()); + for (&group_index, &new_value) in iter2 { + let sum = &mut self.sums[group_index]; + *sum = sum.add_wrapping(new_value); + } + + Ok(()) + } + + fn evaluate(&mut self, emit_to: EmitTo) -> Result { + let counts = emit_to.take_needed(&mut self.counts); + let sums = emit_to.take_needed(&mut self.sums); + + let mut builder = PrimitiveBuilder::::with_capacity(sums.len()) + .with_data_type(self.return_data_type.clone()); + let iter = sums.into_iter().zip(counts); + + let scaler = 10_i128.pow(self.target_scale.saturating_sub(self.sum_scale) as u32); + let target_min = MIN_DECIMAL_FOR_EACH_PRECISION[self.target_precision as usize - 1]; + let target_max = MAX_DECIMAL_FOR_EACH_PRECISION[self.target_precision as usize - 1]; + + for (sum, count) in iter { + if count != 0 { + match avg(sum, count as i128, target_min, target_max, scaler) { + Some(value) => { + builder.append_value(value); + } + _ => { + builder.append_null(); + } + } + } else { + builder.append_null(); + } + } + let array: PrimitiveArray = builder.finish(); + + Ok(Arc::new(array)) + } + + // return arrays for sums and counts + fn state(&mut self, emit_to: EmitTo) -> Result> { + let nulls = self.is_not_null.finish(); + let nulls = Some(NullBuffer::new(nulls)); + + let counts = emit_to.take_needed(&mut self.counts); + let counts = Int64Array::new(counts.into(), nulls.clone()); + + let sums = emit_to.take_needed(&mut self.sums); + let sums = + Decimal128Array::new(sums.into(), nulls).with_data_type(self.sum_data_type.clone()); + + Ok(vec![ + Arc::new(sums) as ArrayRef, + Arc::new(counts) as ArrayRef, + ]) + } + + fn size(&self) -> usize { + self.counts.capacity() * std::mem::size_of::() + + self.sums.capacity() * std::mem::size_of::() + } +} + +/// Returns the `sum`/`count` as a i128 Decimal128 with +/// target_scale and target_precision and return None if overflows. +/// +/// * sum: The total sum value stored as Decimal128 with sum_scale +/// * count: total count, stored as a i128 (*NOT* a Decimal128 value) +/// * target_min: The minimum output value possible to represent with the target precision +/// * target_max: The maximum output value possible to represent with the target precision +/// * scaler: scale factor for avg +#[inline(always)] +fn avg(sum: i128, count: i128, target_min: i128, target_max: i128, scaler: i128) -> Option { + if let Some(value) = sum.checked_mul(scaler) { + // `sum / count` with ROUND_HALF_UP + let (div, rem) = value.div_rem(&count); + let half = div_ceil(count, 2); + let half_neg = half.neg_wrapping(); + let new_value = match value >= 0 { + true if rem >= half => div.add_wrapping(1), + false if rem <= half_neg => div.sub_wrapping(1), + _ => div, + }; + if new_value >= target_min && new_value <= target_max { + Some(new_value) + } else { + None + } + } else { + None + } +} diff --git a/native/spark-expr/src/conversion_funcs/cast.rs b/native/spark-expr/src/conversion_funcs/cast.rs index 6e0e0915cd..0f59761325 100644 --- a/native/spark-expr/src/conversion_funcs/cast.rs +++ b/native/spark-expr/src/conversion_funcs/cast.rs @@ -37,7 +37,7 @@ use arrow::{ }; use arrow_array::builder::StringBuilder; use arrow_array::{DictionaryArray, StringArray, StructArray}; -use arrow_schema::{DataType, Field, Schema}; +use arrow_schema::{DataType, Schema}; use chrono::{NaiveDate, NaiveDateTime, TimeZone, Timelike}; use datafusion_common::{ cast::as_generic_string_array, internal_err, Result as DataFusionResult, ScalarValue, @@ -49,6 +49,7 @@ use num::{ ToPrimitive, }; use regex::Regex; +use std::collections::HashMap; use std::str::FromStr; use std::{ any::Any, @@ -803,11 +804,15 @@ pub struct SparkCastOptions { pub eval_mode: EvalMode, /// When cast from/to timezone related types, we need timezone, which will be resolved with /// session local timezone by an analyzer in Spark. + // TODO we should change timezone to Tz to avoid repeated parsing pub timezone: String, /// Allow casts that are supported but not guaranteed to be 100% compatible pub allow_incompat: bool, /// Support casting unsigned ints to signed ints (used by Parquet SchemaAdapter) pub allow_cast_unsigned_ints: bool, + /// We also use the cast logic for adapting Parquet schemas, so this flag is used + /// for that use case + pub is_adapting_schema: bool, } impl SparkCastOptions { @@ -817,6 +822,7 @@ impl SparkCastOptions { timezone: timezone.to_string(), allow_incompat, allow_cast_unsigned_ints: false, + is_adapting_schema: false, } } @@ -826,6 +832,7 @@ impl SparkCastOptions { timezone: "".to_string(), allow_incompat, allow_cast_unsigned_ints: false, + is_adapting_schema: false, } } } @@ -952,7 +959,9 @@ fn cast_array( { Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?) } - _ if is_datafusion_spark_compatible(from_type, to_type, cast_options.allow_incompat) => { + _ if cast_options.is_adapting_schema + || is_datafusion_spark_compatible(from_type, to_type, cast_options.allow_incompat) => + { // use DataFusion cast only when we know that it is compatible with Spark Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?) } @@ -1058,17 +1067,28 @@ fn cast_struct_to_struct( cast_options: &SparkCastOptions, ) -> DataFusionResult { match (from_type, to_type) { - (DataType::Struct(_), DataType::Struct(to_fields)) => { - let mut cast_fields: Vec<(Arc, ArrayRef)> = Vec::with_capacity(to_fields.len()); + (DataType::Struct(from_fields), DataType::Struct(to_fields)) => { + // TODO some of this logic may be specific to converting Parquet to Spark + let mut field_name_to_index_map = HashMap::new(); + for (i, field) in from_fields.iter().enumerate() { + field_name_to_index_map.insert(field.name(), i); + } + assert_eq!(field_name_to_index_map.len(), from_fields.len()); + let mut cast_fields: Vec = Vec::with_capacity(to_fields.len()); for i in 0..to_fields.len() { + let from_index = field_name_to_index_map[to_fields[i].name()]; let cast_field = cast_array( - Arc::clone(array.column(i)), + Arc::clone(array.column(from_index)), to_fields[i].data_type(), cast_options, )?; - cast_fields.push((Arc::clone(&to_fields[i]), cast_field)); + cast_fields.push(cast_field); } - Ok(Arc::new(StructArray::from(cast_fields))) + Ok(Arc::new(StructArray::new( + to_fields.clone(), + cast_fields, + array.nulls().cloned(), + ))) } _ => unreachable!(), } diff --git a/native/spark-expr/src/covariance.rs b/native/spark-expr/src/covariance.rs new file mode 100644 index 0000000000..fa3563cdea --- /dev/null +++ b/native/spark-expr/src/covariance.rs @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use std::any::Any; + +use arrow::{ + array::{ArrayRef, Float64Array}, + compute::cast, + datatypes::{DataType, Field}, +}; +use datafusion::logical_expr::Accumulator; +use datafusion_common::{ + downcast_value, unwrap_or_internal_err, DataFusionError, Result, ScalarValue, +}; +use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::type_coercion::aggregates::NUMERICS; +use datafusion_expr::{AggregateUDFImpl, Signature, Volatility}; +use datafusion_physical_expr::expressions::format_state_name; +use datafusion_physical_expr::expressions::StatsType; + +/// COVAR_SAMP and COVAR_POP aggregate expression +/// The implementation mostly is the same as the DataFusion's implementation. The reason +/// we have our own implementation is that DataFusion has UInt64 for state_field count, +/// while Spark has Double for count. +#[derive(Debug, Clone)] +pub struct Covariance { + name: String, + signature: Signature, + stats_type: StatsType, + null_on_divide_by_zero: bool, +} + +impl Covariance { + /// Create a new COVAR aggregate function + pub fn new( + name: impl Into, + data_type: DataType, + stats_type: StatsType, + null_on_divide_by_zero: bool, + ) -> Self { + // the result of covariance just support FLOAT64 data type. + assert!(matches!(data_type, DataType::Float64)); + Self { + name: name.into(), + signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable), + stats_type, + null_on_divide_by_zero, + } + } +} + +impl AggregateUDFImpl for Covariance { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + &self.name + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Float64) + } + fn default_value(&self, _data_type: &DataType) -> Result { + Ok(ScalarValue::Float64(None)) + } + + fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result> { + Ok(Box::new(CovarianceAccumulator::try_new( + self.stats_type, + self.null_on_divide_by_zero, + )?)) + } + + fn state_fields(&self, _args: StateFieldsArgs) -> Result> { + Ok(vec![ + Field::new( + format_state_name(&self.name, "count"), + DataType::Float64, + true, + ), + Field::new( + format_state_name(&self.name, "mean1"), + DataType::Float64, + true, + ), + Field::new( + format_state_name(&self.name, "mean2"), + DataType::Float64, + true, + ), + Field::new( + format_state_name(&self.name, "algo_const"), + DataType::Float64, + true, + ), + ]) + } +} + +/// An accumulator to compute covariance +#[derive(Debug)] +pub struct CovarianceAccumulator { + algo_const: f64, + mean1: f64, + mean2: f64, + count: f64, + stats_type: StatsType, + null_on_divide_by_zero: bool, +} + +impl CovarianceAccumulator { + /// Creates a new `CovarianceAccumulator` + pub fn try_new(s_type: StatsType, null_on_divide_by_zero: bool) -> Result { + Ok(Self { + algo_const: 0_f64, + mean1: 0_f64, + mean2: 0_f64, + count: 0_f64, + stats_type: s_type, + null_on_divide_by_zero, + }) + } + + pub fn get_count(&self) -> f64 { + self.count + } + + pub fn get_mean1(&self) -> f64 { + self.mean1 + } + + pub fn get_mean2(&self) -> f64 { + self.mean2 + } + + pub fn get_algo_const(&self) -> f64 { + self.algo_const + } +} + +impl Accumulator for CovarianceAccumulator { + fn state(&mut self) -> Result> { + Ok(vec![ + ScalarValue::from(self.count), + ScalarValue::from(self.mean1), + ScalarValue::from(self.mean2), + ScalarValue::from(self.algo_const), + ]) + } + + fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { + let values1 = &cast(&values[0], &DataType::Float64)?; + let values2 = &cast(&values[1], &DataType::Float64)?; + + let mut arr1 = downcast_value!(values1, Float64Array).iter().flatten(); + let mut arr2 = downcast_value!(values2, Float64Array).iter().flatten(); + + for i in 0..values1.len() { + let value1 = if values1.is_valid(i) { + arr1.next() + } else { + None + }; + let value2 = if values2.is_valid(i) { + arr2.next() + } else { + None + }; + + if value1.is_none() || value2.is_none() { + continue; + } + + let value1 = unwrap_or_internal_err!(value1); + let value2 = unwrap_or_internal_err!(value2); + let new_count = self.count + 1.0; + let delta1 = value1 - self.mean1; + let new_mean1 = delta1 / new_count + self.mean1; + let delta2 = value2 - self.mean2; + let new_mean2 = delta2 / new_count + self.mean2; + let new_c = delta1 * (value2 - new_mean2) + self.algo_const; + + self.count += 1.0; + self.mean1 = new_mean1; + self.mean2 = new_mean2; + self.algo_const = new_c; + } + + Ok(()) + } + + fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> { + let values1 = &cast(&values[0], &DataType::Float64)?; + let values2 = &cast(&values[1], &DataType::Float64)?; + let mut arr1 = downcast_value!(values1, Float64Array).iter().flatten(); + let mut arr2 = downcast_value!(values2, Float64Array).iter().flatten(); + + for i in 0..values1.len() { + let value1 = if values1.is_valid(i) { + arr1.next() + } else { + None + }; + let value2 = if values2.is_valid(i) { + arr2.next() + } else { + None + }; + + if value1.is_none() || value2.is_none() { + continue; + } + + let value1 = unwrap_or_internal_err!(value1); + let value2 = unwrap_or_internal_err!(value2); + + let new_count = self.count - 1.0; + let delta1 = self.mean1 - value1; + let new_mean1 = delta1 / new_count + self.mean1; + let delta2 = self.mean2 - value2; + let new_mean2 = delta2 / new_count + self.mean2; + let new_c = self.algo_const - delta1 * (new_mean2 - value2); + + self.count -= 1.0; + self.mean1 = new_mean1; + self.mean2 = new_mean2; + self.algo_const = new_c; + } + + Ok(()) + } + + fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { + let counts = downcast_value!(states[0], Float64Array); + let means1 = downcast_value!(states[1], Float64Array); + let means2 = downcast_value!(states[2], Float64Array); + let cs = downcast_value!(states[3], Float64Array); + + for i in 0..counts.len() { + let c = counts.value(i); + if c == 0.0 { + continue; + } + let new_count = self.count + c; + let new_mean1 = self.mean1 * self.count / new_count + means1.value(i) * c / new_count; + let new_mean2 = self.mean2 * self.count / new_count + means2.value(i) * c / new_count; + let delta1 = self.mean1 - means1.value(i); + let delta2 = self.mean2 - means2.value(i); + let new_c = + self.algo_const + cs.value(i) + delta1 * delta2 * self.count * c / new_count; + + self.count = new_count; + self.mean1 = new_mean1; + self.mean2 = new_mean2; + self.algo_const = new_c; + } + Ok(()) + } + + fn evaluate(&mut self) -> Result { + if self.count == 0.0 { + return Ok(ScalarValue::Float64(None)); + } + + let count = match self.stats_type { + StatsType::Population => self.count, + StatsType::Sample if self.count > 1.0 => self.count - 1.0, + StatsType::Sample => { + // self.count == 1.0 + return if self.null_on_divide_by_zero { + Ok(ScalarValue::Float64(None)) + } else { + Ok(ScalarValue::Float64(Some(f64::NAN))) + }; + } + }; + + Ok(ScalarValue::Float64(Some(self.algo_const / count))) + } + + fn size(&self) -> usize { + std::mem::size_of_val(self) + } +} diff --git a/native/spark-expr/src/strings.rs b/native/spark-expr/src/strings.rs new file mode 100644 index 0000000000..c2706b5896 --- /dev/null +++ b/native/spark-expr/src/strings.rs @@ -0,0 +1,290 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![allow(deprecated)] + +use crate::kernels::strings::{string_space, substring}; +use arrow::{ + compute::{ + contains_dyn, contains_utf8_scalar_dyn, ends_with_dyn, ends_with_utf8_scalar_dyn, like_dyn, + like_utf8_scalar_dyn, starts_with_dyn, starts_with_utf8_scalar_dyn, + }, + record_batch::RecordBatch, +}; +use arrow_schema::{DataType, Schema}; +use datafusion::logical_expr::ColumnarValue; +use datafusion_common::{DataFusionError, ScalarValue::Utf8}; +use datafusion_physical_expr::PhysicalExpr; +use std::{ + any::Any, + fmt::{Display, Formatter}, + hash::Hash, + sync::Arc, +}; + +macro_rules! make_predicate_function { + ($name: ident, $kernel: ident, $str_scalar_kernel: ident) => { + #[derive(Debug, Eq)] + pub struct $name { + left: Arc, + right: Arc, + } + + impl $name { + pub fn new(left: Arc, right: Arc) -> Self { + Self { left, right } + } + } + + impl Display for $name { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "$name [left: {}, right: {}]", self.left, self.right) + } + } + + impl Hash for $name { + fn hash(&self, state: &mut H) { + self.left.hash(state); + self.right.hash(state); + } + } + + impl PartialEq for $name { + fn eq(&self, other: &Self) -> bool { + self.left.eq(&other.left) && self.right.eq(&other.right) + } + } + + impl PhysicalExpr for $name { + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, _: &Schema) -> datafusion_common::Result { + Ok(DataType::Boolean) + } + + fn nullable(&self, _: &Schema) -> datafusion_common::Result { + Ok(true) + } + + fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result { + let left_arg = self.left.evaluate(batch)?; + let right_arg = self.right.evaluate(batch)?; + + let array = match (left_arg, right_arg) { + // array (op) scalar + (ColumnarValue::Array(array), ColumnarValue::Scalar(Utf8(Some(string)))) => { + $str_scalar_kernel(&array, string.as_str()) + } + (ColumnarValue::Array(_), ColumnarValue::Scalar(other)) => { + return Err(DataFusionError::Execution(format!( + "Should be String but got: {:?}", + other + ))) + } + // array (op) array + (ColumnarValue::Array(array1), ColumnarValue::Array(array2)) => { + $kernel(&array1, &array2) + } + // scalar (op) scalar should be folded at Spark optimizer + _ => { + return Err(DataFusionError::Execution( + "Predicate on two literals should be folded at Spark".to_string(), + )) + } + }?; + + Ok(ColumnarValue::Array(Arc::new(array))) + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.left, &self.right] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> datafusion_common::Result> { + Ok(Arc::new($name::new( + children[0].clone(), + children[1].clone(), + ))) + } + } + }; +} + +make_predicate_function!(Like, like_dyn, like_utf8_scalar_dyn); + +make_predicate_function!(StartsWith, starts_with_dyn, starts_with_utf8_scalar_dyn); + +make_predicate_function!(EndsWith, ends_with_dyn, ends_with_utf8_scalar_dyn); + +make_predicate_function!(Contains, contains_dyn, contains_utf8_scalar_dyn); + +#[derive(Debug, Eq)] +pub struct SubstringExpr { + pub child: Arc, + pub start: i64, + pub len: u64, +} + +impl Hash for SubstringExpr { + fn hash(&self, state: &mut H) { + self.child.hash(state); + self.start.hash(state); + self.len.hash(state); + } +} + +impl PartialEq for SubstringExpr { + fn eq(&self, other: &Self) -> bool { + self.child.eq(&other.child) && self.start.eq(&other.start) && self.len.eq(&other.len) + } +} +#[derive(Debug, Eq)] +pub struct StringSpaceExpr { + pub child: Arc, +} + +impl Hash for StringSpaceExpr { + fn hash(&self, state: &mut H) { + self.child.hash(state); + } +} + +impl PartialEq for StringSpaceExpr { + fn eq(&self, other: &Self) -> bool { + self.child.eq(&other.child) + } +} + +impl SubstringExpr { + pub fn new(child: Arc, start: i64, len: u64) -> Self { + Self { child, start, len } + } +} + +impl StringSpaceExpr { + pub fn new(child: Arc) -> Self { + Self { child } + } +} + +impl Display for SubstringExpr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "StringSpace [start: {}, len: {}, child: {}]", + self.start, self.len, self.child + ) + } +} + +impl Display for StringSpaceExpr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "StringSpace [child: {}] ", self.child) + } +} + +impl PhysicalExpr for SubstringExpr { + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result { + self.child.data_type(input_schema) + } + + fn nullable(&self, _: &Schema) -> datafusion_common::Result { + Ok(true) + } + + fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result { + let arg = self.child.evaluate(batch)?; + match arg { + ColumnarValue::Array(array) => { + let result = substring(&array, self.start, self.len)?; + + Ok(ColumnarValue::Array(result)) + } + _ => Err(DataFusionError::Execution( + "Substring(scalar) should be fold in Spark JVM side.".to_string(), + )), + } + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.child] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> datafusion_common::Result> { + Ok(Arc::new(SubstringExpr::new( + Arc::clone(&children[0]), + self.start, + self.len, + ))) + } +} + +impl PhysicalExpr for StringSpaceExpr { + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result { + match self.child.data_type(input_schema)? { + DataType::Dictionary(key_type, _) => { + Ok(DataType::Dictionary(key_type, Box::new(DataType::Utf8))) + } + _ => Ok(DataType::Utf8), + } + } + + fn nullable(&self, _: &Schema) -> datafusion_common::Result { + Ok(true) + } + + fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result { + let arg = self.child.evaluate(batch)?; + match arg { + ColumnarValue::Array(array) => { + let result = string_space(&array)?; + + Ok(ColumnarValue::Array(result)) + } + _ => Err(DataFusionError::Execution( + "StringSpace(scalar) should be fold in Spark JVM side.".to_string(), + )), + } + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.child] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> datafusion_common::Result> { + Ok(Arc::new(StringSpaceExpr::new(Arc::clone(&children[0])))) + } +} diff --git a/native/spark-expr/src/sum_decimal.rs b/native/spark-expr/src/sum_decimal.rs new file mode 100644 index 0000000000..f3f34d9bfa --- /dev/null +++ b/native/spark-expr/src/sum_decimal.rs @@ -0,0 +1,555 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::utils::{is_valid_decimal_precision, unlikely}; +use arrow::{ + array::BooleanBufferBuilder, + buffer::{BooleanBuffer, NullBuffer}, +}; +use arrow_array::{ + cast::AsArray, types::Decimal128Type, Array, ArrayRef, BooleanArray, Decimal128Array, +}; +use arrow_schema::{DataType, Field}; +use datafusion::logical_expr::{Accumulator, EmitTo, GroupsAccumulator}; +use datafusion_common::{DataFusionError, Result as DFResult, ScalarValue}; +use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::Volatility::Immutable; +use datafusion_expr::{AggregateUDFImpl, ReversedUDAF, Signature}; +use std::{any::Any, ops::BitAnd, sync::Arc}; + +#[derive(Debug)] +pub struct SumDecimal { + /// Aggregate function signature + signature: Signature, + /// The data type of the SUM result. This will always be a decimal type + /// with the same precision and scale as specified in this struct + result_type: DataType, + /// Decimal precision + precision: u8, + /// Decimal scale + scale: i8, +} + +impl SumDecimal { + pub fn try_new(data_type: DataType) -> DFResult { + // The `data_type` is the SUM result type passed from Spark side + let (precision, scale) = match data_type { + DataType::Decimal128(p, s) => (p, s), + _ => { + return Err(DataFusionError::Internal( + "Invalid data type for SumDecimal".into(), + )) + } + }; + Ok(Self { + signature: Signature::user_defined(Immutable), + result_type: data_type, + precision, + scale, + }) + } +} + +impl AggregateUDFImpl for SumDecimal { + fn as_any(&self) -> &dyn Any { + self + } + + fn accumulator(&self, _args: AccumulatorArgs) -> DFResult> { + Ok(Box::new(SumDecimalAccumulator::new( + self.precision, + self.scale, + ))) + } + + fn state_fields(&self, _args: StateFieldsArgs) -> DFResult> { + let fields = vec![ + Field::new(self.name(), self.result_type.clone(), self.is_nullable()), + Field::new("is_empty", DataType::Boolean, false), + ]; + Ok(fields) + } + + fn name(&self) -> &str { + "sum" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> DFResult { + Ok(self.result_type.clone()) + } + + fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool { + true + } + + fn create_groups_accumulator( + &self, + _args: AccumulatorArgs, + ) -> DFResult> { + Ok(Box::new(SumDecimalGroupsAccumulator::new( + self.result_type.clone(), + self.precision, + ))) + } + + fn default_value(&self, _data_type: &DataType) -> DFResult { + ScalarValue::new_primitive::( + None, + &DataType::Decimal128(self.precision, self.scale), + ) + } + + fn reverse_expr(&self) -> ReversedUDAF { + ReversedUDAF::Identical + } + + fn is_nullable(&self) -> bool { + // SumDecimal is always nullable because overflows can cause null values + true + } +} + +#[derive(Debug)] +struct SumDecimalAccumulator { + sum: i128, + is_empty: bool, + is_not_null: bool, + + precision: u8, + scale: i8, +} + +impl SumDecimalAccumulator { + fn new(precision: u8, scale: i8) -> Self { + Self { + sum: 0, + is_empty: true, + is_not_null: true, + precision, + scale, + } + } + + fn update_single(&mut self, values: &Decimal128Array, idx: usize) { + let v = unsafe { values.value_unchecked(idx) }; + let (new_sum, is_overflow) = self.sum.overflowing_add(v); + + if is_overflow || !is_valid_decimal_precision(new_sum, self.precision) { + // Overflow: set buffer accumulator to null + self.is_not_null = false; + return; + } + + self.sum = new_sum; + self.is_not_null = true; + } +} + +impl Accumulator for SumDecimalAccumulator { + fn update_batch(&mut self, values: &[ArrayRef]) -> DFResult<()> { + assert_eq!( + values.len(), + 1, + "Expect only one element in 'values' but found {}", + values.len() + ); + + if !self.is_empty && !self.is_not_null { + // This means there's a overflow in decimal, so we will just skip the rest + // of the computation + return Ok(()); + } + + let values = &values[0]; + let data = values.as_primitive::(); + + self.is_empty = self.is_empty && values.len() == values.null_count(); + + if values.null_count() == 0 { + for i in 0..data.len() { + self.update_single(data, i); + } + } else { + for i in 0..data.len() { + if data.is_null(i) { + continue; + } + self.update_single(data, i); + } + } + + Ok(()) + } + + fn evaluate(&mut self) -> DFResult { + // For each group: + // 1. if `is_empty` is true, it means either there is no value or all values for the group + // are null, in this case we'll return null + // 2. if `is_empty` is false, but `null_state` is true, it means there's an overflow. In + // non-ANSI mode Spark returns null. + if self.is_empty || !self.is_not_null { + ScalarValue::new_primitive::( + None, + &DataType::Decimal128(self.precision, self.scale), + ) + } else { + ScalarValue::try_new_decimal128(self.sum, self.precision, self.scale) + } + } + + fn size(&self) -> usize { + std::mem::size_of_val(self) + } + + fn state(&mut self) -> DFResult> { + let sum = if self.is_not_null { + ScalarValue::try_new_decimal128(self.sum, self.precision, self.scale)? + } else { + ScalarValue::new_primitive::( + None, + &DataType::Decimal128(self.precision, self.scale), + )? + }; + Ok(vec![sum, ScalarValue::from(self.is_empty)]) + } + + fn merge_batch(&mut self, states: &[ArrayRef]) -> DFResult<()> { + assert_eq!( + states.len(), + 2, + "Expect two element in 'states' but found {}", + states.len() + ); + assert_eq!(states[0].len(), 1); + assert_eq!(states[1].len(), 1); + + let that_sum = states[0].as_primitive::(); + let that_is_empty = states[1].as_any().downcast_ref::().unwrap(); + + let this_overflow = !self.is_empty && !self.is_not_null; + let that_overflow = !that_is_empty.value(0) && that_sum.is_null(0); + + self.is_not_null = !this_overflow && !that_overflow; + self.is_empty = self.is_empty && that_is_empty.value(0); + + if self.is_not_null { + self.sum += that_sum.value(0); + } + + Ok(()) + } +} + +struct SumDecimalGroupsAccumulator { + // Whether aggregate buffer for a particular group is null. True indicates it is not null. + is_not_null: BooleanBufferBuilder, + is_empty: BooleanBufferBuilder, + sum: Vec, + result_type: DataType, + precision: u8, +} + +impl SumDecimalGroupsAccumulator { + fn new(result_type: DataType, precision: u8) -> Self { + Self { + is_not_null: BooleanBufferBuilder::new(0), + is_empty: BooleanBufferBuilder::new(0), + sum: Vec::new(), + result_type, + precision, + } + } + + fn is_overflow(&self, index: usize) -> bool { + !self.is_empty.get_bit(index) && !self.is_not_null.get_bit(index) + } + + fn update_single(&mut self, group_index: usize, value: i128) { + if unlikely(self.is_overflow(group_index)) { + // This means there's a overflow in decimal, so we will just skip the rest + // of the computation + return; + } + + self.is_empty.set_bit(group_index, false); + let (new_sum, is_overflow) = self.sum[group_index].overflowing_add(value); + + if is_overflow || !is_valid_decimal_precision(new_sum, self.precision) { + // Overflow: set buffer accumulator to null + self.is_not_null.set_bit(group_index, false); + return; + } + + self.sum[group_index] = new_sum; + self.is_not_null.set_bit(group_index, true) + } +} + +fn ensure_bit_capacity(builder: &mut BooleanBufferBuilder, capacity: usize) { + if builder.len() < capacity { + let additional = capacity - builder.len(); + builder.append_n(additional, true); + } +} + +/// Build a boolean buffer from the state and reset the state, based on the emit_to +/// strategy. +fn build_bool_state(state: &mut BooleanBufferBuilder, emit_to: &EmitTo) -> BooleanBuffer { + let bool_state: BooleanBuffer = state.finish(); + + match emit_to { + EmitTo::All => bool_state, + EmitTo::First(n) => { + // split off the first N values in bool_state + let first_n_bools: BooleanBuffer = bool_state.iter().take(*n).collect(); + // reset the existing seen buffer + for seen in bool_state.iter().skip(*n) { + state.append(seen); + } + first_n_bools + } + } +} + +impl GroupsAccumulator for SumDecimalGroupsAccumulator { + fn update_batch( + &mut self, + values: &[ArrayRef], + group_indices: &[usize], + opt_filter: Option<&BooleanArray>, + total_num_groups: usize, + ) -> DFResult<()> { + assert!(opt_filter.is_none(), "opt_filter is not supported yet"); + assert_eq!(values.len(), 1); + let values = values[0].as_primitive::(); + let data = values.values(); + + // Update size for the accumulate states + self.sum.resize(total_num_groups, 0); + ensure_bit_capacity(&mut self.is_empty, total_num_groups); + ensure_bit_capacity(&mut self.is_not_null, total_num_groups); + + let iter = group_indices.iter().zip(data.iter()); + if values.null_count() == 0 { + for (&group_index, &value) in iter { + self.update_single(group_index, value); + } + } else { + for (idx, (&group_index, &value)) in iter.enumerate() { + if values.is_null(idx) { + continue; + } + self.update_single(group_index, value); + } + } + + Ok(()) + } + + fn evaluate(&mut self, emit_to: EmitTo) -> DFResult { + // For each group: + // 1. if `is_empty` is true, it means either there is no value or all values for the group + // are null, in this case we'll return null + // 2. if `is_empty` is false, but `null_state` is true, it means there's an overflow. In + // non-ANSI mode Spark returns null. + let nulls = build_bool_state(&mut self.is_not_null, &emit_to); + let is_empty = build_bool_state(&mut self.is_empty, &emit_to); + let x = (!&is_empty).bitand(&nulls); + + let result = emit_to.take_needed(&mut self.sum); + let result = Decimal128Array::new(result.into(), Some(NullBuffer::new(x))) + .with_data_type(self.result_type.clone()); + + Ok(Arc::new(result)) + } + + fn state(&mut self, emit_to: EmitTo) -> DFResult> { + let nulls = build_bool_state(&mut self.is_not_null, &emit_to); + let nulls = Some(NullBuffer::new(nulls)); + + let sum = emit_to.take_needed(&mut self.sum); + let sum = Decimal128Array::new(sum.into(), nulls.clone()) + .with_data_type(self.result_type.clone()); + + let is_empty = build_bool_state(&mut self.is_empty, &emit_to); + let is_empty = BooleanArray::new(is_empty, None); + + Ok(vec![ + Arc::new(sum) as ArrayRef, + Arc::new(is_empty) as ArrayRef, + ]) + } + + fn merge_batch( + &mut self, + values: &[ArrayRef], + group_indices: &[usize], + opt_filter: Option<&BooleanArray>, + total_num_groups: usize, + ) -> DFResult<()> { + assert_eq!( + values.len(), + 2, + "Expected two arrays: 'sum' and 'is_empty', but found {}", + values.len() + ); + assert!(opt_filter.is_none(), "opt_filter is not supported yet"); + + // Make sure we have enough capacity for the additional groups + self.sum.resize(total_num_groups, 0); + ensure_bit_capacity(&mut self.is_empty, total_num_groups); + ensure_bit_capacity(&mut self.is_not_null, total_num_groups); + + let that_sum = &values[0]; + let that_sum = that_sum.as_primitive::(); + let that_is_empty = &values[1]; + let that_is_empty = that_is_empty + .as_any() + .downcast_ref::() + .unwrap(); + + group_indices + .iter() + .enumerate() + .for_each(|(idx, &group_index)| unsafe { + let this_overflow = self.is_overflow(group_index); + let that_is_empty = that_is_empty.value_unchecked(idx); + let that_overflow = !that_is_empty && that_sum.is_null(idx); + let is_overflow = this_overflow || that_overflow; + + // This part follows the logic in Spark: + // `org.apache.spark.sql.catalyst.expressions.aggregate.Sum` + self.is_not_null.set_bit(group_index, !is_overflow); + self.is_empty.set_bit( + group_index, + self.is_empty.get_bit(group_index) && that_is_empty, + ); + if !is_overflow { + // .. otherwise, the sum value for this particular index must not be null, + // and thus we merge both values and update this sum. + self.sum[group_index] += that_sum.value_unchecked(idx); + } + }); + + Ok(()) + } + + fn size(&self) -> usize { + self.sum.capacity() * std::mem::size_of::() + + self.is_empty.capacity() / 8 + + self.is_not_null.capacity() / 8 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow::datatypes::*; + use arrow_array::builder::{Decimal128Builder, StringBuilder}; + use arrow_array::RecordBatch; + use datafusion::execution::TaskContext; + use datafusion::physical_plan::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy}; + use datafusion::physical_plan::memory::MemoryExec; + use datafusion::physical_plan::ExecutionPlan; + use datafusion_common::Result; + use datafusion_expr::AggregateUDF; + use datafusion_physical_expr::aggregate::AggregateExprBuilder; + use datafusion_physical_expr::expressions::Column; + use datafusion_physical_expr::PhysicalExpr; + use futures::StreamExt; + + #[test] + fn invalid_data_type() { + assert!(SumDecimal::try_new(DataType::Int32).is_err()); + } + + #[tokio::test] + async fn sum_no_overflow() -> Result<()> { + let num_rows = 8192; + let batch = create_record_batch(num_rows); + let mut batches = Vec::new(); + for _ in 0..10 { + batches.push(batch.clone()); + } + let partitions = &[batches]; + let c0: Arc = Arc::new(Column::new("c0", 0)); + let c1: Arc = Arc::new(Column::new("c1", 1)); + + let data_type = DataType::Decimal128(8, 2); + let schema = Arc::clone(&partitions[0][0].schema()); + let scan: Arc = + Arc::new(MemoryExec::try_new(partitions, Arc::clone(&schema), None).unwrap()); + + let aggregate_udf = Arc::new(AggregateUDF::new_from_impl(SumDecimal::try_new( + data_type.clone(), + )?)); + + let aggr_expr = AggregateExprBuilder::new(aggregate_udf, vec![c1]) + .schema(Arc::clone(&schema)) + .alias("sum") + .with_ignore_nulls(false) + .with_distinct(false) + .build()?; + + let aggregate = Arc::new(AggregateExec::try_new( + AggregateMode::Partial, + PhysicalGroupBy::new_single(vec![(c0, "c0".to_string())]), + vec![aggr_expr.into()], + vec![None], // no filter expressions + scan, + Arc::clone(&schema), + )?); + + let mut stream = aggregate + .execute(0, Arc::new(TaskContext::default())) + .unwrap(); + while let Some(batch) = stream.next().await { + let _batch = batch?; + } + + Ok(()) + } + + fn create_record_batch(num_rows: usize) -> RecordBatch { + let mut decimal_builder = Decimal128Builder::with_capacity(num_rows); + let mut string_builder = StringBuilder::with_capacity(num_rows, num_rows * 32); + for i in 0..num_rows { + decimal_builder.append_value(i as i128); + string_builder.append_value(format!("this is string #{}", i % 1024)); + } + let decimal_array = Arc::new(decimal_builder.finish()); + let string_array = Arc::new(string_builder.finish()); + + let mut fields = vec![]; + let mut columns: Vec = vec![]; + + // string column + fields.push(Field::new("c0", DataType::Utf8, false)); + columns.push(string_array); + + // decimal column + fields.push(Field::new("c1", DataType::Decimal128(38, 10), false)); + columns.push(decimal_array); + + let schema = Schema::new(fields); + RecordBatch::try_new(Arc::new(schema), columns).unwrap() + } +} diff --git a/native/spark-expr/src/temporal.rs b/native/spark-expr/src/temporal.rs new file mode 100644 index 0000000000..fb549f9ce8 --- /dev/null +++ b/native/spark-expr/src/temporal.rs @@ -0,0 +1,510 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::utils::array_with_timezone; +use arrow::{ + compute::{date_part, DatePart}, + record_batch::RecordBatch, +}; +use arrow_schema::{DataType, Schema, TimeUnit::Microsecond}; +use datafusion::logical_expr::ColumnarValue; +use datafusion_common::{DataFusionError, ScalarValue::Utf8}; +use datafusion_physical_expr::PhysicalExpr; +use std::hash::Hash; +use std::{ + any::Any, + fmt::{Debug, Display, Formatter}, + sync::Arc, +}; + +use crate::kernels::temporal::{ + date_trunc_array_fmt_dyn, date_trunc_dyn, timestamp_trunc_array_fmt_dyn, timestamp_trunc_dyn, +}; + +#[derive(Debug, Eq)] +pub struct HourExpr { + /// An array with DataType::Timestamp(TimeUnit::Microsecond, None) + child: Arc, + timezone: String, +} + +impl Hash for HourExpr { + fn hash(&self, state: &mut H) { + self.child.hash(state); + self.timezone.hash(state); + } +} +impl PartialEq for HourExpr { + fn eq(&self, other: &Self) -> bool { + self.child.eq(&other.child) && self.timezone.eq(&other.timezone) + } +} + +impl HourExpr { + pub fn new(child: Arc, timezone: String) -> Self { + HourExpr { child, timezone } + } +} + +impl Display for HourExpr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Hour [timezone:{}, child: {}]", + self.timezone, self.child + ) + } +} + +impl PhysicalExpr for HourExpr { + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result { + match self.child.data_type(input_schema).unwrap() { + DataType::Dictionary(key_type, _) => { + Ok(DataType::Dictionary(key_type, Box::new(DataType::Int32))) + } + _ => Ok(DataType::Int32), + } + } + + fn nullable(&self, _: &Schema) -> datafusion_common::Result { + Ok(true) + } + + fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result { + let arg = self.child.evaluate(batch)?; + match arg { + ColumnarValue::Array(array) => { + let array = array_with_timezone( + array, + self.timezone.clone(), + Some(&DataType::Timestamp( + Microsecond, + Some(self.timezone.clone().into()), + )), + )?; + let result = date_part(&array, DatePart::Hour)?; + + Ok(ColumnarValue::Array(result)) + } + _ => Err(DataFusionError::Execution( + "Hour(scalar) should be fold in Spark JVM side.".to_string(), + )), + } + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.child] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result, DataFusionError> { + Ok(Arc::new(HourExpr::new( + Arc::clone(&children[0]), + self.timezone.clone(), + ))) + } +} + +#[derive(Debug, Eq)] +pub struct MinuteExpr { + /// An array with DataType::Timestamp(TimeUnit::Microsecond, None) + child: Arc, + timezone: String, +} + +impl Hash for MinuteExpr { + fn hash(&self, state: &mut H) { + self.child.hash(state); + self.timezone.hash(state); + } +} +impl PartialEq for MinuteExpr { + fn eq(&self, other: &Self) -> bool { + self.child.eq(&other.child) && self.timezone.eq(&other.timezone) + } +} + +impl MinuteExpr { + pub fn new(child: Arc, timezone: String) -> Self { + MinuteExpr { child, timezone } + } +} + +impl Display for MinuteExpr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Minute [timezone:{}, child: {}]", + self.timezone, self.child + ) + } +} + +impl PhysicalExpr for MinuteExpr { + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result { + match self.child.data_type(input_schema).unwrap() { + DataType::Dictionary(key_type, _) => { + Ok(DataType::Dictionary(key_type, Box::new(DataType::Int32))) + } + _ => Ok(DataType::Int32), + } + } + + fn nullable(&self, _: &Schema) -> datafusion_common::Result { + Ok(true) + } + + fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result { + let arg = self.child.evaluate(batch)?; + match arg { + ColumnarValue::Array(array) => { + let array = array_with_timezone( + array, + self.timezone.clone(), + Some(&DataType::Timestamp( + Microsecond, + Some(self.timezone.clone().into()), + )), + )?; + let result = date_part(&array, DatePart::Minute)?; + + Ok(ColumnarValue::Array(result)) + } + _ => Err(DataFusionError::Execution( + "Minute(scalar) should be fold in Spark JVM side.".to_string(), + )), + } + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.child] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result, DataFusionError> { + Ok(Arc::new(MinuteExpr::new( + Arc::clone(&children[0]), + self.timezone.clone(), + ))) + } +} + +#[derive(Debug, Eq)] +pub struct SecondExpr { + /// An array with DataType::Timestamp(TimeUnit::Microsecond, None) + child: Arc, + timezone: String, +} + +impl Hash for SecondExpr { + fn hash(&self, state: &mut H) { + self.child.hash(state); + self.timezone.hash(state); + } +} +impl PartialEq for SecondExpr { + fn eq(&self, other: &Self) -> bool { + self.child.eq(&other.child) && self.timezone.eq(&other.timezone) + } +} + +impl SecondExpr { + pub fn new(child: Arc, timezone: String) -> Self { + SecondExpr { child, timezone } + } +} + +impl Display for SecondExpr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Second (timezone:{}, child: {}]", + self.timezone, self.child + ) + } +} + +impl PhysicalExpr for SecondExpr { + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result { + match self.child.data_type(input_schema).unwrap() { + DataType::Dictionary(key_type, _) => { + Ok(DataType::Dictionary(key_type, Box::new(DataType::Int32))) + } + _ => Ok(DataType::Int32), + } + } + + fn nullable(&self, _: &Schema) -> datafusion_common::Result { + Ok(true) + } + + fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result { + let arg = self.child.evaluate(batch)?; + match arg { + ColumnarValue::Array(array) => { + let array = array_with_timezone( + array, + self.timezone.clone(), + Some(&DataType::Timestamp( + Microsecond, + Some(self.timezone.clone().into()), + )), + )?; + let result = date_part(&array, DatePart::Second)?; + + Ok(ColumnarValue::Array(result)) + } + _ => Err(DataFusionError::Execution( + "Second(scalar) should be fold in Spark JVM side.".to_string(), + )), + } + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.child] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result, DataFusionError> { + Ok(Arc::new(SecondExpr::new( + Arc::clone(&children[0]), + self.timezone.clone(), + ))) + } +} + +#[derive(Debug, Eq)] +pub struct DateTruncExpr { + /// An array with DataType::Date32 + child: Arc, + /// Scalar UTF8 string matching the valid values in Spark SQL: https://spark.apache.org/docs/latest/api/sql/index.html#trunc + format: Arc, +} + +impl Hash for DateTruncExpr { + fn hash(&self, state: &mut H) { + self.child.hash(state); + self.format.hash(state); + } +} +impl PartialEq for DateTruncExpr { + fn eq(&self, other: &Self) -> bool { + self.child.eq(&other.child) && self.format.eq(&other.format) + } +} + +impl DateTruncExpr { + pub fn new(child: Arc, format: Arc) -> Self { + DateTruncExpr { child, format } + } +} + +impl Display for DateTruncExpr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "DateTrunc [child:{}, format: {}]", + self.child, self.format + ) + } +} + +impl PhysicalExpr for DateTruncExpr { + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result { + self.child.data_type(input_schema) + } + + fn nullable(&self, _: &Schema) -> datafusion_common::Result { + Ok(true) + } + + fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result { + let date = self.child.evaluate(batch)?; + let format = self.format.evaluate(batch)?; + match (date, format) { + (ColumnarValue::Array(date), ColumnarValue::Scalar(Utf8(Some(format)))) => { + let result = date_trunc_dyn(&date, format)?; + Ok(ColumnarValue::Array(result)) + } + (ColumnarValue::Array(date), ColumnarValue::Array(formats)) => { + let result = date_trunc_array_fmt_dyn(&date, &formats)?; + Ok(ColumnarValue::Array(result)) + } + _ => Err(DataFusionError::Execution( + "Invalid input to function DateTrunc. Expected (PrimitiveArray, Scalar) or \ + (PrimitiveArray, StringArray)".to_string(), + )), + } + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.child] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result, DataFusionError> { + Ok(Arc::new(DateTruncExpr::new( + Arc::clone(&children[0]), + Arc::clone(&self.format), + ))) + } +} + +#[derive(Debug, Eq)] +pub struct TimestampTruncExpr { + /// An array with DataType::Timestamp(TimeUnit::Microsecond, None) + child: Arc, + /// Scalar UTF8 string matching the valid values in Spark SQL: https://spark.apache.org/docs/latest/api/sql/index.html#date_trunc + format: Arc, + /// String containing a timezone name. The name must be found in the standard timezone + /// database (https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). The string is + /// later parsed into a chrono::TimeZone. + /// Timestamp arrays in this implementation are kept in arrays of UTC timestamps (in micros) + /// along with a single value for the associated TimeZone. The timezone offset is applied + /// just before any operations on the timestamp + timezone: String, +} + +impl Hash for TimestampTruncExpr { + fn hash(&self, state: &mut H) { + self.child.hash(state); + self.format.hash(state); + self.timezone.hash(state); + } +} +impl PartialEq for TimestampTruncExpr { + fn eq(&self, other: &Self) -> bool { + self.child.eq(&other.child) + && self.format.eq(&other.format) + && self.timezone.eq(&other.timezone) + } +} + +impl TimestampTruncExpr { + pub fn new( + child: Arc, + format: Arc, + timezone: String, + ) -> Self { + TimestampTruncExpr { + child, + format, + timezone, + } + } +} + +impl Display for TimestampTruncExpr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "TimestampTrunc [child:{}, format:{}, timezone: {}]", + self.child, self.format, self.timezone + ) + } +} + +impl PhysicalExpr for TimestampTruncExpr { + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, input_schema: &Schema) -> datafusion_common::Result { + match self.child.data_type(input_schema)? { + DataType::Dictionary(key_type, _) => Ok(DataType::Dictionary( + key_type, + Box::new(DataType::Timestamp(Microsecond, None)), + )), + _ => Ok(DataType::Timestamp(Microsecond, None)), + } + } + + fn nullable(&self, _: &Schema) -> datafusion_common::Result { + Ok(true) + } + + fn evaluate(&self, batch: &RecordBatch) -> datafusion_common::Result { + let timestamp = self.child.evaluate(batch)?; + let format = self.format.evaluate(batch)?; + let tz = self.timezone.clone(); + match (timestamp, format) { + (ColumnarValue::Array(ts), ColumnarValue::Scalar(Utf8(Some(format)))) => { + let ts = array_with_timezone( + ts, + tz.clone(), + Some(&DataType::Timestamp(Microsecond, Some(tz.into()))), + )?; + let result = timestamp_trunc_dyn(&ts, format)?; + Ok(ColumnarValue::Array(result)) + } + (ColumnarValue::Array(ts), ColumnarValue::Array(formats)) => { + let ts = array_with_timezone( + ts, + tz.clone(), + Some(&DataType::Timestamp(Microsecond, Some(tz.into()))), + )?; + let result = timestamp_trunc_array_fmt_dyn(&ts, &formats)?; + Ok(ColumnarValue::Array(result)) + } + _ => Err(DataFusionError::Execution( + "Invalid input to function TimestampTrunc. \ + Expected (PrimitiveArray, Scalar, String) or \ + (PrimitiveArray, StringArray, String)" + .to_string(), + )), + } + } + + fn children(&self) -> Vec<&Arc> { + vec![&self.child] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result, DataFusionError> { + Ok(Arc::new(TimestampTruncExpr::new( + Arc::clone(&children[0]), + Arc::clone(&self.format), + self.timezone.clone(), + ))) + } +} diff --git a/native/spark-expr/src/utils.rs b/native/spark-expr/src/utils.rs index 18a2314fb1..ed04941bdb 100644 --- a/native/spark-expr/src/utils.rs +++ b/native/spark-expr/src/utils.rs @@ -19,7 +19,7 @@ use arrow_array::{ cast::as_primitive_array, types::{Int32Type, TimestampMicrosecondType}, }; -use arrow_schema::{ArrowError, DataType, DECIMAL128_MAX_PRECISION}; +use arrow_schema::{ArrowError, DataType, TimeUnit, DECIMAL128_MAX_PRECISION}; use std::sync::Arc; use crate::timezone::Tz; @@ -27,6 +27,7 @@ use arrow::{ array::{as_dictionary_array, Array, ArrayRef, PrimitiveArray}, temporal_conversions::as_datetime, }; +use arrow_array::types::TimestampMillisecondType; use arrow_data::decimal::{MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION}; use chrono::{DateTime, Offset, TimeZone}; @@ -71,6 +72,9 @@ pub fn array_with_timezone( Some(DataType::Timestamp(_, Some(_))) => { timestamp_ntz_to_timestamp(array, timezone.as_str(), Some(timezone.as_str())) } + Some(DataType::Timestamp(_, None)) => { + timestamp_ntz_to_timestamp(array, timezone.as_str(), None) + } _ => { // Not supported panic!( @@ -81,7 +85,7 @@ pub fn array_with_timezone( } } } - DataType::Timestamp(_, Some(_)) => { + DataType::Timestamp(TimeUnit::Microsecond, Some(_)) => { assert!(!timezone.is_empty()); let array = as_primitive_array::(&array); let array_with_timezone = array.clone().with_timezone(timezone.clone()); @@ -93,6 +97,18 @@ pub fn array_with_timezone( _ => Ok(array), } } + DataType::Timestamp(TimeUnit::Millisecond, Some(_)) => { + assert!(!timezone.is_empty()); + let array = as_primitive_array::(&array); + let array_with_timezone = array.clone().with_timezone(timezone.clone()); + let array = Arc::new(array_with_timezone) as ArrayRef; + match to_type { + Some(DataType::Utf8) | Some(DataType::Date32) => { + pre_timestamp_cast(array, timezone) + } + _ => Ok(array), + } + } DataType::Dictionary(_, value_type) if matches!(value_type.as_ref(), &DataType::Timestamp(_, _)) => { @@ -128,7 +144,7 @@ fn timestamp_ntz_to_timestamp( ) -> Result { assert!(!tz.is_empty()); match array.data_type() { - DataType::Timestamp(_, None) => { + DataType::Timestamp(TimeUnit::Microsecond, None) => { let array = as_primitive_array::(&array); let tz: Tz = tz.parse()?; let array: PrimitiveArray = array.try_unary(|value| { @@ -147,6 +163,25 @@ fn timestamp_ntz_to_timestamp( }; Ok(Arc::new(array_with_tz)) } + DataType::Timestamp(TimeUnit::Millisecond, None) => { + let array = as_primitive_array::(&array); + let tz: Tz = tz.parse()?; + let array: PrimitiveArray = array.try_unary(|value| { + as_datetime::(value) + .ok_or_else(|| datetime_cast_err(value)) + .map(|local_datetime| { + let datetime: DateTime = + tz.from_local_datetime(&local_datetime).unwrap(); + datetime.timestamp_millis() + }) + })?; + let array_with_tz = if let Some(to_tz) = to_timezone { + array.with_timezone(to_tz) + } else { + array + }; + Ok(Arc::new(array_with_tz)) + } _ => Ok(array), } } diff --git a/native/spark-expr/src/variance.rs b/native/spark-expr/src/variance.rs new file mode 100644 index 0000000000..e71d713f59 --- /dev/null +++ b/native/spark-expr/src/variance.rs @@ -0,0 +1,247 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; + +use arrow::{ + array::{ArrayRef, Float64Array}, + datatypes::{DataType, Field}, +}; +use datafusion::logical_expr::Accumulator; +use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue}; +use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::Volatility::Immutable; +use datafusion_expr::{AggregateUDFImpl, Signature}; +use datafusion_physical_expr::expressions::format_state_name; +use datafusion_physical_expr::expressions::StatsType; + +/// VAR_SAMP and VAR_POP aggregate expression +/// The implementation mostly is the same as the DataFusion's implementation. The reason +/// we have our own implementation is that DataFusion has UInt64 for state_field `count`, +/// while Spark has Double for count. Also we have added `null_on_divide_by_zero` +/// to be consistent with Spark's implementation. +#[derive(Debug)] +pub struct Variance { + name: String, + signature: Signature, + stats_type: StatsType, + null_on_divide_by_zero: bool, +} + +impl Variance { + /// Create a new VARIANCE aggregate function + pub fn new( + name: impl Into, + data_type: DataType, + stats_type: StatsType, + null_on_divide_by_zero: bool, + ) -> Self { + // the result of variance just support FLOAT64 data type. + assert!(matches!(data_type, DataType::Float64)); + Self { + name: name.into(), + signature: Signature::numeric(1, Immutable), + stats_type, + null_on_divide_by_zero, + } + } +} + +impl AggregateUDFImpl for Variance { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + &self.name + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Float64) + } + + fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result> { + Ok(Box::new(VarianceAccumulator::try_new( + self.stats_type, + self.null_on_divide_by_zero, + )?)) + } + + fn create_sliding_accumulator(&self, _args: AccumulatorArgs) -> Result> { + Ok(Box::new(VarianceAccumulator::try_new( + self.stats_type, + self.null_on_divide_by_zero, + )?)) + } + + fn state_fields(&self, _args: StateFieldsArgs) -> Result> { + Ok(vec![ + Field::new( + format_state_name(&self.name, "count"), + DataType::Float64, + true, + ), + Field::new( + format_state_name(&self.name, "mean"), + DataType::Float64, + true, + ), + Field::new(format_state_name(&self.name, "m2"), DataType::Float64, true), + ]) + } + + fn default_value(&self, _data_type: &DataType) -> Result { + Ok(ScalarValue::Float64(None)) + } +} + +/// An accumulator to compute variance +#[derive(Debug)] +pub struct VarianceAccumulator { + m2: f64, + mean: f64, + count: f64, + stats_type: StatsType, + null_on_divide_by_zero: bool, +} + +impl VarianceAccumulator { + /// Creates a new `VarianceAccumulator` + pub fn try_new(s_type: StatsType, null_on_divide_by_zero: bool) -> Result { + Ok(Self { + m2: 0_f64, + mean: 0_f64, + count: 0_f64, + stats_type: s_type, + null_on_divide_by_zero, + }) + } + + pub fn get_count(&self) -> f64 { + self.count + } + + pub fn get_mean(&self) -> f64 { + self.mean + } + + pub fn get_m2(&self) -> f64 { + self.m2 + } +} + +impl Accumulator for VarianceAccumulator { + fn state(&mut self) -> Result> { + Ok(vec![ + ScalarValue::from(self.count), + ScalarValue::from(self.mean), + ScalarValue::from(self.m2), + ]) + } + + fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { + let arr = downcast_value!(&values[0], Float64Array).iter().flatten(); + + for value in arr { + let new_count = self.count + 1.0; + let delta1 = value - self.mean; + let new_mean = delta1 / new_count + self.mean; + let delta2 = value - new_mean; + let new_m2 = self.m2 + delta1 * delta2; + + self.count += 1.0; + self.mean = new_mean; + self.m2 = new_m2; + } + + Ok(()) + } + + fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> { + let arr = downcast_value!(&values[0], Float64Array).iter().flatten(); + + for value in arr { + let new_count = self.count - 1.0; + let delta1 = self.mean - value; + let new_mean = delta1 / new_count + self.mean; + let delta2 = new_mean - value; + let new_m2 = self.m2 - delta1 * delta2; + + self.count -= 1.0; + self.mean = new_mean; + self.m2 = new_m2; + } + + Ok(()) + } + + fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { + let counts = downcast_value!(states[0], Float64Array); + let means = downcast_value!(states[1], Float64Array); + let m2s = downcast_value!(states[2], Float64Array); + + for i in 0..counts.len() { + let c = counts.value(i); + if c == 0_f64 { + continue; + } + let new_count = self.count + c; + let new_mean = self.mean * self.count / new_count + means.value(i) * c / new_count; + let delta = self.mean - means.value(i); + let new_m2 = self.m2 + m2s.value(i) + delta * delta * self.count * c / new_count; + + self.count = new_count; + self.mean = new_mean; + self.m2 = new_m2; + } + Ok(()) + } + + fn evaluate(&mut self) -> Result { + let count = match self.stats_type { + StatsType::Population => self.count, + StatsType::Sample => { + if self.count > 0.0 { + self.count - 1.0 + } else { + self.count + } + } + }; + + Ok(ScalarValue::Float64(match self.count { + count if count == 0.0 => None, + count if count == 1.0 && StatsType::Sample == self.stats_type => { + if self.null_on_divide_by_zero { + None + } else { + Some(f64::NAN) + } + } + _ => Some(self.m2 / count), + })) + } + + fn size(&self) -> usize { + std::mem::size_of_val(self) + } +} diff --git a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala index 6066c890d3..7a70181401 100644 --- a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala +++ b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala @@ -70,6 +70,7 @@ class CometExecIterator( id, cometBatchIterators, protobufQueryPlan, + numParts, nativeMetrics, new CometTaskMemoryManager(id), batchSize = COMET_BATCH_SIZE.get(), diff --git a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala index e786eece22..addf737069 100644 --- a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala +++ b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala @@ -53,7 +53,7 @@ import org.apache.spark.sql.types.{DoubleType, FloatType} import org.apache.comet.CometConf._ import org.apache.comet.CometExplainInfo.getActualPlan -import org.apache.comet.CometSparkSessionExtensions.{createMessage, getCometBroadcastNotEnabledReason, getCometShuffleNotEnabledReason, isANSIEnabled, isCometBroadCastForceEnabled, isCometEnabled, isCometExecEnabled, isCometJVMShuffleMode, isCometNativeShuffleMode, isCometScan, isCometScanEnabled, isCometShuffleEnabled, isSpark34Plus, isSpark40Plus, shouldApplySparkToColumnar, withInfo, withInfos} +import org.apache.comet.CometSparkSessionExtensions.{createMessage, getCometBroadcastNotEnabledReason, getCometShuffleNotEnabledReason, isANSIEnabled, isCometBroadCastForceEnabled, isCometEnabled, isCometExecEnabled, isCometJVMShuffleMode, isCometNativeShuffleMode, isCometScan, isCometScanEnabled, isCometShuffleEnabled, isOffHeapEnabled, isSpark34Plus, isSpark40Plus, isTesting, shouldApplySparkToColumnar, withInfo, withInfos} import org.apache.comet.parquet.{CometParquetScan, SupportsComet} import org.apache.comet.rules.RewriteJoin import org.apache.comet.serde.OperatorOuterClass.Operator @@ -188,6 +188,29 @@ class CometSparkSessionExtensions scanExec } + // data source V1 + case scanExec @ FileSourceScanExec( + HadoopFsRelation(_, partitionSchema, _, _, fileFormat, _), + _: Seq[_], + requiredSchema, + _, + _, + _, + _, + _, + _) + if CometScanExec.isFileFormatSupported(fileFormat) + && CometNativeScanExec.isSchemaSupported(requiredSchema) + && CometNativeScanExec.isSchemaSupported(partitionSchema) + // TODO we only enable full native scan if COMET_EXEC_ENABLED is enabled + // but this is not really what we want .. we currently insert `CometScanExec` + // here and then it gets replaced with `CometNativeScanExec` in `CometExecRule` + // but that only happens if `COMET_EXEC_ENABLED` is enabled + && COMET_EXEC_ENABLED.get() + && COMET_NATIVE_SCAN_IMPL.get() == CometConf.SCAN_NATIVE_DATAFUSION => + logInfo("Comet extension enabled for v1 full native Scan") + CometScanExec(scanExec, session) + // data source V1 case scanExec @ FileSourceScanExec( HadoopFsRelation(_, partitionSchema, _, _, fileFormat, _), @@ -353,6 +376,13 @@ class CometSparkSessionExtensions } plan.transformUp { + // Fully native scan for V1 + case scan: CometScanExec + if COMET_NATIVE_SCAN_IMPL.get().equals(CometConf.SCAN_NATIVE_DATAFUSION) => + val nativeOp = QueryPlanSerde.operator2Proto(scan).get + CometNativeScanExec(nativeOp, scan.wrapped, scan.session) + + // Comet JVM + native scan for V1 and V2 case op if isCometScan(op) => val nativeOp = QueryPlanSerde.operator2Proto(op).get CometScanWrapper(nativeOp, op) @@ -944,6 +974,14 @@ class CometSparkSessionExtensions } override def apply(plan: SparkPlan): SparkPlan = { + + // Comet required off-heap memory to be enabled + if (!isOffHeapEnabled(conf) && !isTesting) { + logWarning("Comet native exec disabled because spark.memory.offHeap.enabled=false") + withInfo(plan, "Comet native exec disabled because spark.memory.offHeap.enabled=false") + return plan + } + // DataFusion doesn't have ANSI mode. For now we just disable CometExec if ANSI mode is // enabled. if (isANSIEnabled(conf)) { @@ -1035,12 +1073,20 @@ class CometSparkSessionExtensions var firstNativeOp = true newPlan.transformDown { case op: CometNativeExec => - if (firstNativeOp) { + val newPlan = if (firstNativeOp) { firstNativeOp = false op.convertBlock() } else { op } + + // If reaching leaf node, reset `firstNativeOp` to true + // because it will start a new block in next iteration. + if (op.children.isEmpty) { + firstNativeOp = true + } + + newPlan case op => firstNativeOp = true op @@ -1204,12 +1250,21 @@ object CometSparkSessionExtensions extends Logging { } } + private[comet] def isOffHeapEnabled(conf: SQLConf): Boolean = + conf.getConfString("spark.memory.offHeap.enabled", "false").toBoolean + + // Copied from org.apache.spark.util.Utils which is private to Spark. + private[comet] def isTesting: Boolean = { + System.getenv("SPARK_TESTING") != null || System.getProperty("spark.testing") != null + } + // Check whether Comet shuffle is enabled: // 1. `COMET_EXEC_SHUFFLE_ENABLED` is true // 2. `spark.shuffle.manager` is set to `CometShuffleManager` // 3. Off-heap memory is enabled || Spark/Comet unit testing private[comet] def isCometShuffleEnabled(conf: SQLConf): Boolean = - COMET_EXEC_SHUFFLE_ENABLED.get(conf) && isCometShuffleManagerEnabled(conf) + COMET_EXEC_SHUFFLE_ENABLED.get(conf) && isCometShuffleManagerEnabled(conf) && + (isOffHeapEnabled(conf) || isTesting) private[comet] def getCometShuffleNotEnabledReason(conf: SQLConf): Option[String] = { if (!COMET_EXEC_SHUFFLE_ENABLED.get(conf)) { diff --git a/spark/src/main/scala/org/apache/comet/DataTypeSupport.scala b/spark/src/main/scala/org/apache/comet/DataTypeSupport.scala index c49a2c465c..5c765003cf 100644 --- a/spark/src/main/scala/org/apache/comet/DataTypeSupport.scala +++ b/spark/src/main/scala/org/apache/comet/DataTypeSupport.scala @@ -38,7 +38,9 @@ trait DataTypeSupport { case BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | BinaryType | StringType | _: DecimalType | DateType | TimestampType => true - case t: DataType if t.typeName == "timestamp_ntz" => true + case t: DataType if t.typeName == "timestamp_ntz" => + true + true case _ => false } diff --git a/spark/src/main/scala/org/apache/comet/Native.scala b/spark/src/main/scala/org/apache/comet/Native.scala index 7a8c061a25..c293905c96 100644 --- a/spark/src/main/scala/org/apache/comet/Native.scala +++ b/spark/src/main/scala/org/apache/comet/Native.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.comet.CometMetricNode class Native extends NativeBase { + // scalastyle:off /** * Create a native query plan from execution SparkPlan serialized in bytes. * @param id @@ -50,6 +51,7 @@ class Native extends NativeBase { id: Long, iterators: Array[CometBatchIterator], plan: Array[Byte], + partitionCount: Int, metrics: CometMetricNode, taskMemoryManager: CometTaskMemoryManager, batchSize: Int, diff --git a/spark/src/main/scala/org/apache/comet/parquet/CometParquetFileFormat.scala b/spark/src/main/scala/org/apache/comet/parquet/CometParquetFileFormat.scala index 52d8d09a0d..b97aff1b1d 100644 --- a/spark/src/main/scala/org/apache/comet/parquet/CometParquetFileFormat.scala +++ b/spark/src/main/scala/org/apache/comet/parquet/CometParquetFileFormat.scala @@ -100,6 +100,8 @@ class CometParquetFileFormat extends ParquetFileFormat with MetricsSupport with // Comet specific configurations val capacity = CometConf.COMET_BATCH_SIZE.get(sqlConf) + val nativeRecordBatchReaderEnabled = + CometConf.COMET_NATIVE_SCAN_IMPL.get(sqlConf).equals(CometConf.SCAN_NATIVE_ICEBERG_COMPAT) (file: PartitionedFile) => { val sharedConf = broadcastedHadoopConf.value.value @@ -134,22 +136,54 @@ class CometParquetFileFormat extends ParquetFileFormat with MetricsSupport with } pushed.foreach(p => ParquetInputFormat.setFilterPredicate(sharedConf, p)) - val batchReader = new BatchReader( - sharedConf, - file, - footer, - capacity, - requiredSchema, - isCaseSensitive, - useFieldId, - ignoreMissingIds, - datetimeRebaseSpec.mode == CORRECTED, - partitionSchema, - file.partitionValues, - JavaConverters.mapAsJavaMap(metrics)) - val iter = new RecordReaderIterator(batchReader) + val recordBatchReader = + if (nativeRecordBatchReaderEnabled) { + val batchReader = new NativeBatchReader( + sharedConf, + file, + footer, + capacity, + requiredSchema, + isCaseSensitive, + useFieldId, + ignoreMissingIds, + datetimeRebaseSpec.mode == CORRECTED, + partitionSchema, + file.partitionValues, + JavaConverters.mapAsJavaMap(metrics)) + try { + batchReader.init() + } catch { + case e: Throwable => + batchReader.close() + throw e + } + batchReader + } else { + val batchReader = new BatchReader( + sharedConf, + file, + footer, + capacity, + requiredSchema, + isCaseSensitive, + useFieldId, + ignoreMissingIds, + datetimeRebaseSpec.mode == CORRECTED, + partitionSchema, + file.partitionValues, + JavaConverters.mapAsJavaMap(metrics)) + try { + batchReader.init() + } catch { + case e: Throwable => + batchReader.close() + throw e + } + batchReader + } + val iter = new RecordReaderIterator(recordBatchReader) try { - batchReader.init() iter.asInstanceOf[Iterator[InternalRow]] } catch { case e: Throwable => diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index b806b00809..8c37abf3c9 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -29,12 +29,14 @@ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, Normalize import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning, SinglePartition} import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils -import org.apache.spark.sql.comet.{CometBroadcastExchangeExec, CometSinkPlaceHolder, CometSparkToColumnarExec, DecimalPrecision} +import org.apache.spark.sql.comet.{CometBroadcastExchangeExec, CometScanExec, CometSinkPlaceHolder, CometSparkToColumnarExec, DecimalPrecision} import org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec import org.apache.spark.sql.execution import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.{BroadcastQueryStageExec, ShuffleQueryStageExec} import org.apache.spark.sql.execution.aggregate.{BaseAggregateExec, HashAggregateExec, ObjectHashAggregateExec} +import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD} +import org.apache.spark.sql.execution.datasources.v2.{DataSourceRDD, DataSourceRDDPartition} import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec} import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, HashJoin, ShuffledHashJoinExec, SortMergeJoinExec} import org.apache.spark.sql.execution.window.WindowExec @@ -2540,6 +2542,83 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim childOp.foreach(result.addChildren) op match { + + // Fully native scan for V1 + case scan: CometScanExec + if CometConf.COMET_NATIVE_SCAN_IMPL.get(conf) == CometConf.SCAN_NATIVE_DATAFUSION => + val nativeScanBuilder = OperatorOuterClass.NativeScan.newBuilder() + nativeScanBuilder.setSource(op.simpleStringWithNodeId()) + + val scanTypes = op.output.flatten { attr => + serializeDataType(attr.dataType) + } + + if (scanTypes.length == op.output.length) { + nativeScanBuilder.addAllFields(scanTypes.asJava) + + // Sink operators don't have children + result.clearChildren() + + // TODO remove flatMap and add error handling for unsupported data filters + val dataFilters = scan.dataFilters.flatMap(exprToProto(_, scan.output)) + nativeScanBuilder.addAllDataFilters(dataFilters.asJava) + + // TODO: modify CometNativeScan to generate the file partitions without instantiating RDD. + scan.inputRDD match { + case rdd: DataSourceRDD => + val partitions = rdd.partitions + partitions.foreach(p => { + val inputPartitions = p.asInstanceOf[DataSourceRDDPartition].inputPartitions + inputPartitions.foreach(partition => { + partition2Proto( + partition.asInstanceOf[FilePartition], + nativeScanBuilder, + scan.relation.partitionSchema) + }) + }) + case rdd: FileScanRDD => + rdd.filePartitions.foreach(partition => { + partition2Proto(partition, nativeScanBuilder, scan.relation.partitionSchema) + }) + case _ => + } + + val partitionSchema = schema2Proto(scan.relation.partitionSchema.fields) + val requiredSchema = schema2Proto(scan.requiredSchema.fields) + val dataSchema = schema2Proto(scan.relation.dataSchema.fields) + + val data_schema_idxs = scan.requiredSchema.fields.map(field => { + scan.relation.dataSchema.fieldIndex(field.name) + }) + val partition_schema_idxs = Array + .range( + scan.relation.dataSchema.fields.length, + scan.relation.dataSchema.length + scan.relation.partitionSchema.fields.length) + + val projection_vector = (data_schema_idxs ++ partition_schema_idxs).map(idx => + idx.toLong.asInstanceOf[java.lang.Long]) + + nativeScanBuilder.addAllProjectionVector(projection_vector.toIterable.asJava) + + // In `CometScanRule`, we ensure partitionSchema is supported. + assert(partitionSchema.length == scan.relation.partitionSchema.fields.length) + + nativeScanBuilder.addAllDataSchema(dataSchema.toIterable.asJava) + nativeScanBuilder.addAllRequiredSchema(requiredSchema.toIterable.asJava) + nativeScanBuilder.addAllPartitionSchema(partitionSchema.toIterable.asJava) + nativeScanBuilder.setSessionTimezone(conf.getConfString("spark.sql.session.timeZone")) + + Some(result.setNativeScan(nativeScanBuilder).build()) + + } else { + // There are unsupported scan type + val msg = + s"unsupported Comet operator: ${op.nodeName}, due to unsupported data types above" + emitWarning(msg) + withInfo(op, msg) + None + } + case ProjectExec(projectList, child) if CometConf.COMET_EXEC_PROJECT_ENABLED.get(conf) => val exprs = projectList.map(exprToProto(_, child.output)) @@ -3198,4 +3277,48 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim true } + + private def schema2Proto( + fields: Array[StructField]): Array[OperatorOuterClass.SparkStructField] = { + val fieldBuilder = OperatorOuterClass.SparkStructField.newBuilder() + fields.map(field => { + fieldBuilder.setName(field.name) + fieldBuilder.setDataType(serializeDataType(field.dataType).get) + fieldBuilder.setNullable(field.nullable) + fieldBuilder.build() + }) + } + + private def partition2Proto( + partition: FilePartition, + nativeScanBuilder: OperatorOuterClass.NativeScan.Builder, + partitionSchema: StructType): Unit = { + val partitionBuilder = OperatorOuterClass.SparkFilePartition.newBuilder() + partition.files.foreach(file => { + // Process the partition values + val partitionValues = file.partitionValues + assert(partitionValues.numFields == partitionSchema.length) + val partitionVals = + partitionValues.toSeq(partitionSchema).zipWithIndex.map { case (value, i) => + val attr = partitionSchema(i) + val valueProto = exprToProto(Literal(value, attr.dataType), Seq.empty) + // In `CometScanRule`, we have already checked that all partition values are + // supported. So, we can safely use `get` here. + assert( + valueProto.isDefined, + s"Unsupported partition value: $value, type: ${attr.dataType}") + valueProto.get + } + + val fileBuilder = OperatorOuterClass.SparkPartitionedFile.newBuilder() + partitionVals.foreach(fileBuilder.addPartitionValues) + fileBuilder + .setFilePath(file.filePath.toString) + .setStart(file.start) + .setLength(file.length) + .setFileSize(file.fileSize) + partitionBuilder.addPartitionedFile(fileBuilder.build()) + }) + nativeScanBuilder.addFilePartitions(partitionBuilder.build()) + } } diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala new file mode 100644 index 0000000000..2fd7f12c24 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometExecRDD.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.comet + +import org.apache.spark.{Partition, SparkContext, TaskContext} +import org.apache.spark.rdd.{RDD, RDDOperationScope} +import org.apache.spark.sql.vectorized.ColumnarBatch + +/** + * A RDD that executes Spark SQL query in Comet native execution to generate ColumnarBatch. + */ +private[spark] class CometExecRDD( + sc: SparkContext, + partitionNum: Int, + var f: (Seq[Iterator[ColumnarBatch]], Int, Int) => Iterator[ColumnarBatch]) + extends RDD[ColumnarBatch](sc, Nil) { + + override def compute(s: Partition, context: TaskContext): Iterator[ColumnarBatch] = { + f(Seq.empty, partitionNum, s.index) + } + + override protected def getPartitions: Array[Partition] = { + Array.tabulate(partitionNum)(i => + new Partition { + override def index: Int = i + }) + } +} + +object CometExecRDD { + def apply(sc: SparkContext, partitionNum: Int)( + f: (Seq[Iterator[ColumnarBatch]], Int, Int) => Iterator[ColumnarBatch]) + : RDD[ColumnarBatch] = + withScope(sc) { + new CometExecRDD(sc, partitionNum, f) + } + + private[spark] def withScope[U](sc: SparkContext)(body: => U): U = + RDDOperationScope.withScope[U](sc)(body) +} diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometNativeScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometNativeScanExec.scala new file mode 100644 index 0000000000..ed9e545dd1 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometNativeScanExec.scala @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.comet + +import scala.reflect.ClassTag + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnknownPartitioning} +import org.apache.spark.sql.execution._ +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.types._ +import org.apache.spark.util.collection._ + +import com.google.common.base.Objects + +import org.apache.comet.DataTypeSupport +import org.apache.comet.parquet.CometParquetFileFormat +import org.apache.comet.serde.OperatorOuterClass.Operator + +/** + * Comet fully native scan node for DataSource V1. + */ +case class CometNativeScanExec( + override val nativeOp: Operator, + @transient relation: HadoopFsRelation, + override val output: Seq[Attribute], + requiredSchema: StructType, + partitionFilters: Seq[Expression], + optionalBucketSet: Option[BitSet], + optionalNumCoalescedBuckets: Option[Int], + dataFilters: Seq[Expression], + tableIdentifier: Option[TableIdentifier], + disableBucketedScan: Boolean = false, + originalPlan: FileSourceScanExec, + override val serializedPlanOpt: SerializedPlan) + extends CometLeafExec { + + override def nodeName: String = + s"${super.nodeName}: ${tableIdentifier.map(_.toString).getOrElse("")}" + + override def outputPartitioning: Partitioning = + UnknownPartitioning(originalPlan.inputRDD.getNumPartitions) + override def outputOrdering: Seq[SortOrder] = originalPlan.outputOrdering + + override def stringArgs: Iterator[Any] = Iterator(output) + + override def equals(obj: Any): Boolean = { + obj match { + case other: CometNativeScanExec => + this.output == other.output && + this.serializedPlanOpt == other.serializedPlanOpt + case _ => + false + } + } + + override def hashCode(): Int = Objects.hashCode(output) +} + +object CometNativeScanExec extends DataTypeSupport { + def apply( + nativeOp: Operator, + scanExec: FileSourceScanExec, + session: SparkSession): CometNativeScanExec = { + // TreeNode.mapProductIterator is protected method. + def mapProductIterator[B: ClassTag](product: Product, f: Any => B): Array[B] = { + val arr = Array.ofDim[B](product.productArity) + var i = 0 + while (i < arr.length) { + arr(i) = f(product.productElement(i)) + i += 1 + } + arr + } + + // Replacing the relation in FileSourceScanExec by `copy` seems causing some issues + // on other Spark distributions if FileSourceScanExec constructor is changed. + // Using `makeCopy` to avoid the issue. + // https://github.com/apache/arrow-datafusion-comet/issues/190 + def transform(arg: Any): AnyRef = arg match { + case _: HadoopFsRelation => + scanExec.relation.copy(fileFormat = new CometParquetFileFormat)(session) + case other: AnyRef => other + case null => null + } + val newArgs = mapProductIterator(scanExec, transform(_)) + val wrapped = scanExec.makeCopy(newArgs).asInstanceOf[FileSourceScanExec] + val batchScanExec = CometNativeScanExec( + nativeOp, + wrapped.relation, + wrapped.output, + wrapped.requiredSchema, + wrapped.partitionFilters, + wrapped.optionalBucketSet, + wrapped.optionalNumCoalescedBuckets, + wrapped.dataFilters, + wrapped.tableIdentifier, + wrapped.disableBucketedScan, + wrapped, + SerializedPlan(None)) + scanExec.logicalLink.foreach(batchScanExec.setLogicalLink) + batchScanExec + } + + override def isAdditionallySupported(dt: DataType): Boolean = { + // TODO add array and map + dt match { + case s: StructType => s.fields.map(_.dataType).forall(isTypeSupported) + case _ => false + } + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala index eb9d5cb314..497857d9d7 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometScanExec.scala @@ -131,8 +131,15 @@ case class CometScanExec( // exposed for testing lazy val bucketedScan: Boolean = wrapped.bucketedScan - override lazy val (outputPartitioning, outputOrdering): (Partitioning, Seq[SortOrder]) = - (wrapped.outputPartitioning, wrapped.outputOrdering) + override lazy val (outputPartitioning, outputOrdering): (Partitioning, Seq[SortOrder]) = { + if (bucketedScan) { + (wrapped.outputPartitioning, wrapped.outputOrdering) + } else { + val files = selectedPartitions.flatMap(partition => partition.files) + val numPartitions = files.length + (UnknownPartitioning(numPartitions), wrapped.outputOrdering) + } + } @transient private lazy val pushedDownFilters = getPushedDownFilters(relation, dataFilters) @@ -465,6 +472,19 @@ case class CometScanExec( } object CometScanExec extends DataTypeSupport { + + override def isAdditionallySupported(dt: DataType): Boolean = { + if (CometConf.COMET_NATIVE_SCAN_IMPL.get() == CometConf.SCAN_NATIVE_ICEBERG_COMPAT) { + // TODO add array and map + dt match { + case s: StructType => s.fields.map(_.dataType).forall(isTypeSupported) + case _ => false + } + } else { + false + } + } + def apply(scanExec: FileSourceScanExec, session: SparkSession): CometScanExec = { // TreeNode.mapProductIterator is protected method. def mapProductIterator[B: ClassTag](product: Product, f: Any => B): Array[B] = { diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala index c70f7464ec..f36b41aa62 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala @@ -242,17 +242,30 @@ abstract class CometNativeExec extends CometExec { case _ => true } + val containsBroadcastInput = sparkPlans.exists { + case _: CometBroadcastExchangeExec => true + case BroadcastQueryStageExec(_, _: CometBroadcastExchangeExec, _) => true + case BroadcastQueryStageExec(_, _: ReusedExchangeExec, _) => true + case _ => false + } + // If the first non broadcast plan is not found, it means all the plans are broadcast plans. // This is not expected, so throw an exception. - if (firstNonBroadcastPlan.isEmpty) { + if (containsBroadcastInput && firstNonBroadcastPlan.isEmpty) { throw new CometRuntimeException(s"Cannot find the first non broadcast plan: $this") } // If the first non broadcast plan is found, we need to adjust the partition number of // the broadcast plans to make sure they have the same partition number as the first non // broadcast plan. - val firstNonBroadcastPlanRDD = firstNonBroadcastPlan.get._1.executeColumnar() - val firstNonBroadcastPlanNumPartitions = firstNonBroadcastPlanRDD.getNumPartitions + val (firstNonBroadcastPlanRDD, firstNonBroadcastPlanNumPartitions) = + firstNonBroadcastPlan.get._1 match { + case plan: CometNativeExec => + (null, plan.outputPartitioning.numPartitions) + case plan => + val rdd = plan.executeColumnar() + (rdd, rdd.getNumPartitions) + } // Spark doesn't need to zip Broadcast RDDs, so it doesn't schedule Broadcast RDDs with // same partition number. But for Comet, we need to zip them so we need to adjust the @@ -270,6 +283,8 @@ abstract class CometNativeExec extends CometExec { ReusedExchangeExec(_, c: CometBroadcastExchangeExec), _) => inputs += c.setNumPartitions(firstNonBroadcastPlanNumPartitions).executeColumnar() + case _: CometNativeExec => + // no-op case _ if idx == firstNonBroadcastPlan.get._2 => inputs += firstNonBroadcastPlanRDD case _ => @@ -284,11 +299,16 @@ abstract class CometNativeExec extends CometExec { } } - if (inputs.isEmpty) { + if (inputs.isEmpty && !sparkPlans.forall(_.isInstanceOf[CometNativeExec])) { throw new CometRuntimeException(s"No input for CometNativeExec:\n $this") } - ZippedPartitionsRDD(sparkContext, inputs.toSeq)(createCometExecIter) + if (inputs.nonEmpty) { + ZippedPartitionsRDD(sparkContext, inputs.toSeq)(createCometExecIter) + } else { + val partitionNum = firstNonBroadcastPlanNumPartitions + CometExecRDD(sparkContext, partitionNum)(createCometExecIter) + } } } @@ -312,10 +332,10 @@ abstract class CometNativeExec extends CometExec { */ def foreachUntilCometInput(plan: SparkPlan)(func: SparkPlan => Unit): Unit = { plan match { - case _: CometScanExec | _: CometBatchScanExec | _: ShuffleQueryStageExec | - _: AQEShuffleReadExec | _: CometShuffleExchangeExec | _: CometUnionExec | - _: CometTakeOrderedAndProjectExec | _: CometCoalesceExec | _: ReusedExchangeExec | - _: CometBroadcastExchangeExec | _: BroadcastQueryStageExec | + case _: CometNativeScanExec | _: CometScanExec | _: CometBatchScanExec | + _: ShuffleQueryStageExec | _: AQEShuffleReadExec | _: CometShuffleExchangeExec | + _: CometUnionExec | _: CometTakeOrderedAndProjectExec | _: CometCoalesceExec | + _: ReusedExchangeExec | _: CometBroadcastExchangeExec | _: BroadcastQueryStageExec | _: CometSparkToColumnarExec => func(plan) case _: CometPlan => @@ -395,6 +415,8 @@ abstract class CometNativeExec extends CometExec { } } +abstract class CometLeafExec extends CometNativeExec with LeafExecNode + abstract class CometUnaryExec extends CometNativeExec with UnaryExecNode abstract class CometBinaryExec extends CometNativeExec with BinaryExecNode @@ -622,6 +644,7 @@ case class CometUnionExec( override val output: Seq[Attribute], children: Seq[SparkPlan]) extends CometExec { + override def doExecuteColumnar(): RDD[ColumnarBatch] = { sparkContext.union(children.map(_.executeColumnar())) } diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_datafusion/explain.txt new file mode 100644 index 0000000000..a1697ca9eb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_datafusion/explain.txt @@ -0,0 +1,212 @@ +== Physical Plan == +* ColumnarToRow (40) ++- CometTakeOrderedAndProject (39) + +- CometProject (38) + +- CometBroadcastHashJoin (37) + :- CometProject (33) + : +- CometBroadcastHashJoin (32) + : :- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometFilter (12) + : : : +- CometHashAggregate (11) + : : : +- CometExchange (10) + : : : +- CometHashAggregate (9) + : : : +- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (25) + : : +- CometFilter (24) + : : +- CometHashAggregate (23) + : : +- CometExchange (22) + : : +- CometHashAggregate (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (13) + : : +- ReusedExchange (15) + : +- CometBroadcastExchange (31) + : +- CometProject (30) + : +- CometFilter (29) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (28) + +- CometBroadcastExchange (36) + +- CometFilter (35) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (34) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Arguments: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] + +(2) CometFilter +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Condition : (isnotnull(sr_store_sk#2) AND isnotnull(sr_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5, d_year#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [sr_returned_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4, d_date_sk#5] +Arguments: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3], [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] + +(9) CometHashAggregate +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#3))] + +(10) CometExchange +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#7] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(11) CometHashAggregate +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#7] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] + +(12) CometFilter +Input [3]: [ctr_customer_sk#8, ctr_store_sk#9, ctr_total_return#10] +Condition : isnotnull(ctr_total_return#10) + +(13) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [4]: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13, sr_returned_date_sk#14] +Arguments: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13, sr_returned_date_sk#14] + +(14) CometFilter +Input [4]: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#12) + +(15) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#15] + +(16) CometBroadcastHashJoin +Left output [4]: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13, sr_returned_date_sk#14] +Right output [1]: [d_date_sk#15] +Arguments: [sr_returned_date_sk#14], [d_date_sk#15], Inner, BuildRight + +(17) CometProject +Input [5]: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13, sr_returned_date_sk#14, d_date_sk#15] +Arguments: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13], [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13] + +(18) CometHashAggregate +Input [3]: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13] +Keys [2]: [sr_customer_sk#11, sr_store_sk#12] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#13))] + +(19) CometExchange +Input [3]: [sr_customer_sk#11, sr_store_sk#12, sum#16] +Arguments: hashpartitioning(sr_customer_sk#11, sr_store_sk#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometHashAggregate +Input [3]: [sr_customer_sk#11, sr_store_sk#12, sum#16] +Keys [2]: [sr_customer_sk#11, sr_store_sk#12] +Functions [1]: [sum(UnscaledValue(sr_return_amt#13))] + +(21) CometHashAggregate +Input [2]: [ctr_store_sk#17, ctr_total_return#18] +Keys [1]: [ctr_store_sk#17] +Functions [1]: [partial_avg(ctr_total_return#18)] + +(22) CometExchange +Input [3]: [ctr_store_sk#17, sum#19, count#20] +Arguments: hashpartitioning(ctr_store_sk#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(23) CometHashAggregate +Input [3]: [ctr_store_sk#17, sum#19, count#20] +Keys [1]: [ctr_store_sk#17] +Functions [1]: [avg(ctr_total_return#18)] + +(24) CometFilter +Input [2]: [(avg(ctr_total_return) * 1.2)#21, ctr_store_sk#17] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#21) + +(25) CometBroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#21, ctr_store_sk#17] +Arguments: [(avg(ctr_total_return) * 1.2)#21, ctr_store_sk#17] + +(26) CometBroadcastHashJoin +Left output [3]: [ctr_customer_sk#8, ctr_store_sk#9, ctr_total_return#10] +Right output [2]: [(avg(ctr_total_return) * 1.2)#21, ctr_store_sk#17] +Arguments: [ctr_store_sk#9], [ctr_store_sk#17], Inner, (cast(ctr_total_return#10 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#21), BuildRight + +(27) CometProject +Input [5]: [ctr_customer_sk#8, ctr_store_sk#9, ctr_total_return#10, (avg(ctr_total_return) * 1.2)#21, ctr_store_sk#17] +Arguments: [ctr_customer_sk#8, ctr_store_sk#9], [ctr_customer_sk#8, ctr_store_sk#9] + +(28) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#22, s_state#23] +Arguments: [s_store_sk#22, s_state#23] + +(29) CometFilter +Input [2]: [s_store_sk#22, s_state#23] +Condition : ((isnotnull(s_state#23) AND (s_state#23 = TN)) AND isnotnull(s_store_sk#22)) + +(30) CometProject +Input [2]: [s_store_sk#22, s_state#23] +Arguments: [s_store_sk#22], [s_store_sk#22] + +(31) CometBroadcastExchange +Input [1]: [s_store_sk#22] +Arguments: [s_store_sk#22] + +(32) CometBroadcastHashJoin +Left output [2]: [ctr_customer_sk#8, ctr_store_sk#9] +Right output [1]: [s_store_sk#22] +Arguments: [ctr_store_sk#9], [s_store_sk#22], Inner, BuildRight + +(33) CometProject +Input [3]: [ctr_customer_sk#8, ctr_store_sk#9, s_store_sk#22] +Arguments: [ctr_customer_sk#8], [ctr_customer_sk#8] + +(34) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#24, c_customer_id#25] +Arguments: [c_customer_sk#24, c_customer_id#25] + +(35) CometFilter +Input [2]: [c_customer_sk#24, c_customer_id#25] +Condition : isnotnull(c_customer_sk#24) + +(36) CometBroadcastExchange +Input [2]: [c_customer_sk#24, c_customer_id#25] +Arguments: [c_customer_sk#24, c_customer_id#25] + +(37) CometBroadcastHashJoin +Left output [1]: [ctr_customer_sk#8] +Right output [2]: [c_customer_sk#24, c_customer_id#25] +Arguments: [ctr_customer_sk#8], [c_customer_sk#24], Inner, BuildRight + +(38) CometProject +Input [3]: [ctr_customer_sk#8, c_customer_sk#24, c_customer_id#25] +Arguments: [c_customer_id#25], [c_customer_id#25] + +(39) CometTakeOrderedAndProject +Input [1]: [c_customer_id#25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#25 ASC NULLS FIRST], output=[c_customer_id#25]), [c_customer_id#25], 100, [c_customer_id#25 ASC NULLS FIRST], [c_customer_id#25] + +(40) ColumnarToRow [codegen id : 1] +Input [1]: [c_customer_id#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_datafusion/simplified.txt new file mode 100644 index 0000000000..87657df444 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_datafusion/simplified.txt @@ -0,0 +1,42 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_customer_id] + CometProject [c_customer_id] + CometBroadcastHashJoin [ctr_customer_sk,c_customer_sk,c_customer_id] + CometProject [ctr_customer_sk] + CometBroadcastHashJoin [ctr_customer_sk,ctr_store_sk,s_store_sk] + CometProject [ctr_customer_sk,ctr_store_sk] + CometBroadcastHashJoin [ctr_customer_sk,ctr_store_sk,ctr_total_return,(avg(ctr_total_return) * 1.2),ctr_store_sk] + CometFilter [ctr_customer_sk,ctr_store_sk,ctr_total_return] + CometHashAggregate [ctr_customer_sk,ctr_store_sk,ctr_total_return,sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] + CometExchange [sr_customer_sk,sr_store_sk] #1 + CometHashAggregate [sr_customer_sk,sr_store_sk,sum,sr_return_amt] + CometProject [sr_customer_sk,sr_store_sk,sr_return_amt] + CometBroadcastHashJoin [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk,d_date_sk] + CometFilter [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [(avg(ctr_total_return) * 1.2),ctr_store_sk] #3 + CometFilter [(avg(ctr_total_return) * 1.2),ctr_store_sk] + CometHashAggregate [(avg(ctr_total_return) * 1.2),ctr_store_sk,sum,count,avg(ctr_total_return)] + CometExchange [ctr_store_sk] #4 + CometHashAggregate [ctr_store_sk,sum,count,ctr_total_return] + CometHashAggregate [ctr_store_sk,ctr_total_return,sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] + CometExchange [sr_customer_sk,sr_store_sk] #5 + CometHashAggregate [sr_customer_sk,sr_store_sk,sum,sr_return_amt] + CometProject [sr_customer_sk,sr_store_sk,sr_return_amt] + CometBroadcastHashJoin [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk,d_date_sk] + CometFilter [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + ReusedExchange [d_date_sk] #2 + CometBroadcastExchange [s_store_sk] #6 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + CometBroadcastExchange [c_customer_sk,c_customer_id] #7 + CometFilter [c_customer_sk,c_customer_id] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ca868a928e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_iceberg_compat/explain.txt @@ -0,0 +1,229 @@ +== Physical Plan == +* ColumnarToRow (40) ++- CometTakeOrderedAndProject (39) + +- CometProject (38) + +- CometBroadcastHashJoin (37) + :- CometProject (33) + : +- CometBroadcastHashJoin (32) + : :- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometFilter (12) + : : : +- CometHashAggregate (11) + : : : +- CometExchange (10) + : : : +- CometHashAggregate (9) + : : : +- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_returns (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (25) + : : +- CometFilter (24) + : : +- CometHashAggregate (23) + : : +- CometExchange (22) + : : +- CometHashAggregate (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometFilter (14) + : : : +- CometScan parquet spark_catalog.default.store_returns (13) + : : +- ReusedExchange (15) + : +- CometBroadcastExchange (31) + : +- CometProject (30) + : +- CometFilter (29) + : +- CometScan parquet spark_catalog.default.store (28) + +- CometBroadcastExchange (36) + +- CometFilter (35) + +- CometScan parquet spark_catalog.default.customer (34) + + +(1) CometScan parquet spark_catalog.default.store_returns +Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#4)] +PushedFilters: [IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Condition : (isnotnull(sr_store_sk#2) AND isnotnull(sr_customer_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [sr_returned_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4, d_date_sk#5] +Arguments: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3], [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] + +(9) CometHashAggregate +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#3))] + +(10) CometExchange +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#7] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(11) CometHashAggregate +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#7] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] + +(12) CometFilter +Input [3]: [ctr_customer_sk#8, ctr_store_sk#9, ctr_total_return#10] +Condition : isnotnull(ctr_total_return#10) + +(13) CometScan parquet spark_catalog.default.store_returns +Output [4]: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13, sr_returned_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#14)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(14) CometFilter +Input [4]: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#12) + +(15) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#15] + +(16) CometBroadcastHashJoin +Left output [4]: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13, sr_returned_date_sk#14] +Right output [1]: [d_date_sk#15] +Arguments: [sr_returned_date_sk#14], [d_date_sk#15], Inner, BuildRight + +(17) CometProject +Input [5]: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13, sr_returned_date_sk#14, d_date_sk#15] +Arguments: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13], [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13] + +(18) CometHashAggregate +Input [3]: [sr_customer_sk#11, sr_store_sk#12, sr_return_amt#13] +Keys [2]: [sr_customer_sk#11, sr_store_sk#12] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#13))] + +(19) CometExchange +Input [3]: [sr_customer_sk#11, sr_store_sk#12, sum#16] +Arguments: hashpartitioning(sr_customer_sk#11, sr_store_sk#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometHashAggregate +Input [3]: [sr_customer_sk#11, sr_store_sk#12, sum#16] +Keys [2]: [sr_customer_sk#11, sr_store_sk#12] +Functions [1]: [sum(UnscaledValue(sr_return_amt#13))] + +(21) CometHashAggregate +Input [2]: [ctr_store_sk#17, ctr_total_return#18] +Keys [1]: [ctr_store_sk#17] +Functions [1]: [partial_avg(ctr_total_return#18)] + +(22) CometExchange +Input [3]: [ctr_store_sk#17, sum#19, count#20] +Arguments: hashpartitioning(ctr_store_sk#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(23) CometHashAggregate +Input [3]: [ctr_store_sk#17, sum#19, count#20] +Keys [1]: [ctr_store_sk#17] +Functions [1]: [avg(ctr_total_return#18)] + +(24) CometFilter +Input [2]: [(avg(ctr_total_return) * 1.2)#21, ctr_store_sk#17] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#21) + +(25) CometBroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#21, ctr_store_sk#17] +Arguments: [(avg(ctr_total_return) * 1.2)#21, ctr_store_sk#17] + +(26) CometBroadcastHashJoin +Left output [3]: [ctr_customer_sk#8, ctr_store_sk#9, ctr_total_return#10] +Right output [2]: [(avg(ctr_total_return) * 1.2)#21, ctr_store_sk#17] +Arguments: [ctr_store_sk#9], [ctr_store_sk#17], Inner, (cast(ctr_total_return#10 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#21), BuildRight + +(27) CometProject +Input [5]: [ctr_customer_sk#8, ctr_store_sk#9, ctr_total_return#10, (avg(ctr_total_return) * 1.2)#21, ctr_store_sk#17] +Arguments: [ctr_customer_sk#8, ctr_store_sk#9], [ctr_customer_sk#8, ctr_store_sk#9] + +(28) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#22, s_state#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(29) CometFilter +Input [2]: [s_store_sk#22, s_state#23] +Condition : ((isnotnull(s_state#23) AND (s_state#23 = TN)) AND isnotnull(s_store_sk#22)) + +(30) CometProject +Input [2]: [s_store_sk#22, s_state#23] +Arguments: [s_store_sk#22], [s_store_sk#22] + +(31) CometBroadcastExchange +Input [1]: [s_store_sk#22] +Arguments: [s_store_sk#22] + +(32) CometBroadcastHashJoin +Left output [2]: [ctr_customer_sk#8, ctr_store_sk#9] +Right output [1]: [s_store_sk#22] +Arguments: [ctr_store_sk#9], [s_store_sk#22], Inner, BuildRight + +(33) CometProject +Input [3]: [ctr_customer_sk#8, ctr_store_sk#9, s_store_sk#22] +Arguments: [ctr_customer_sk#8], [ctr_customer_sk#8] + +(34) CometScan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#24, c_customer_id#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(35) CometFilter +Input [2]: [c_customer_sk#24, c_customer_id#25] +Condition : isnotnull(c_customer_sk#24) + +(36) CometBroadcastExchange +Input [2]: [c_customer_sk#24, c_customer_id#25] +Arguments: [c_customer_sk#24, c_customer_id#25] + +(37) CometBroadcastHashJoin +Left output [1]: [ctr_customer_sk#8] +Right output [2]: [c_customer_sk#24, c_customer_id#25] +Arguments: [ctr_customer_sk#8], [c_customer_sk#24], Inner, BuildRight + +(38) CometProject +Input [3]: [ctr_customer_sk#8, c_customer_sk#24, c_customer_id#25] +Arguments: [c_customer_id#25], [c_customer_id#25] + +(39) CometTakeOrderedAndProject +Input [1]: [c_customer_id#25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#25 ASC NULLS FIRST], output=[c_customer_id#25]), [c_customer_id#25], 100, [c_customer_id#25 ASC NULLS FIRST], [c_customer_id#25] + +(40) ColumnarToRow [codegen id : 1] +Input [1]: [c_customer_id#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..dd2d55266f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q1.native_iceberg_compat/simplified.txt @@ -0,0 +1,42 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_customer_id] + CometProject [c_customer_id] + CometBroadcastHashJoin [ctr_customer_sk,c_customer_sk,c_customer_id] + CometProject [ctr_customer_sk] + CometBroadcastHashJoin [ctr_customer_sk,ctr_store_sk,s_store_sk] + CometProject [ctr_customer_sk,ctr_store_sk] + CometBroadcastHashJoin [ctr_customer_sk,ctr_store_sk,ctr_total_return,(avg(ctr_total_return) * 1.2),ctr_store_sk] + CometFilter [ctr_customer_sk,ctr_store_sk,ctr_total_return] + CometHashAggregate [ctr_customer_sk,ctr_store_sk,ctr_total_return,sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] + CometExchange [sr_customer_sk,sr_store_sk] #1 + CometHashAggregate [sr_customer_sk,sr_store_sk,sum,sr_return_amt] + CometProject [sr_customer_sk,sr_store_sk,sr_return_amt] + CometBroadcastHashJoin [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk,d_date_sk] + CometFilter [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [(avg(ctr_total_return) * 1.2),ctr_store_sk] #3 + CometFilter [(avg(ctr_total_return) * 1.2),ctr_store_sk] + CometHashAggregate [(avg(ctr_total_return) * 1.2),ctr_store_sk,sum,count,avg(ctr_total_return)] + CometExchange [ctr_store_sk] #4 + CometHashAggregate [ctr_store_sk,sum,count,ctr_total_return] + CometHashAggregate [ctr_store_sk,ctr_total_return,sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] + CometExchange [sr_customer_sk,sr_store_sk] #5 + CometHashAggregate [sr_customer_sk,sr_store_sk,sum,sr_return_amt] + CometProject [sr_customer_sk,sr_store_sk,sr_return_amt] + CometBroadcastHashJoin [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk,d_date_sk] + CometFilter [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + ReusedExchange [d_date_sk] #2 + CometBroadcastExchange [s_store_sk] #6 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_state] + CometBroadcastExchange [c_customer_sk,c_customer_id] #7 + CometFilter [c_customer_sk,c_customer_id] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_datafusion/explain.txt new file mode 100644 index 0000000000..7c92771145 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_datafusion/explain.txt @@ -0,0 +1,214 @@ +== Physical Plan == +TakeOrderedAndProject (40) ++- * HashAggregate (39) + +- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * BroadcastHashJoin Inner BuildRight (35) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildRight (29) + : :- * Project (23) + : : +- * Filter (22) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (21) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (13) + : : : +- ReusedExchange (14) + : : +- ReusedExchange (20) + : +- BroadcastExchange (28) + : +- * ColumnarToRow (27) + : +- CometProject (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (24) + +- BroadcastExchange (34) + +- * ColumnarToRow (33) + +- CometFilter (32) + +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (31) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(2) CometFilter +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#6, ss_sold_date_sk#7] + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(5) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : (((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2002)) AND (d_moy#10 >= 1)) AND (d_moy#10 <= 4)) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ss_customer_sk#6], [ss_customer_sk#6] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Right output [1]: [ss_customer_sk#6] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(13) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Arguments: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#13] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Right output [1]: [d_date_sk#13] +Arguments: [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Arguments: [ws_bill_customer_sk#11], [ws_bill_customer_sk#11] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#11] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(20) ReusedExchange [Reuses operator id: 18] +Output [1]: [cs_ship_customer_sk#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(22) Filter [codegen id : 5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(23) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#15, ca_county#16] +Arguments: [ca_address_sk#15, ca_county#16] + +(25) CometFilter +Input [2]: [ca_address_sk#15, ca_county#16] +Condition : (ca_county#16 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#15)) + +(26) CometProject +Input [2]: [ca_address_sk#15, ca_county#16] +Arguments: [ca_address_sk#15], [ca_address_sk#15] + +(27) ColumnarToRow [codegen id : 3] +Input [1]: [ca_address_sk#15] + +(28) BroadcastExchange +Input [1]: [ca_address_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#15] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [1]: [c_current_cdemo_sk#4] +Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#15] + +(31) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [9]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Arguments: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] + +(32) CometFilter +Input [9]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Condition : isnotnull(cd_demo_sk#17) + +(33) ColumnarToRow [codegen id : 4] +Input [9]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] + +(34) BroadcastExchange +Input [9]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(35) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 5] +Output [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] + +(37) HashAggregate [codegen id : 5] +Input [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Keys [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#26] +Results [9]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25, count#27] + +(38) Exchange +Input [9]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25, count#27] +Arguments: hashpartitioning(cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(39) HashAggregate [codegen id : 6] +Input [9]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25, count#27] +Keys [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#28] +Results [14]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, count(1)#28 AS cnt1#29, cd_purchase_estimate#21, count(1)#28 AS cnt2#30, cd_credit_rating#22, count(1)#28 AS cnt3#31, cd_dep_count#23, count(1)#28 AS cnt4#32, cd_dep_employed_count#24, count(1)#28 AS cnt5#33, cd_dep_college_count#25, count(1)#28 AS cnt6#34] + +(40) TakeOrderedAndProject +Input [14]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cnt1#29, cd_purchase_estimate#21, cnt2#30, cd_credit_rating#22, cnt3#31, cd_dep_count#23, cnt4#32, cd_dep_employed_count#24, cnt5#33, cd_dep_college_count#25, cnt6#34] +Arguments: 100, [cd_gender#18 ASC NULLS FIRST, cd_marital_status#19 ASC NULLS FIRST, cd_education_status#20 ASC NULLS FIRST, cd_purchase_estimate#21 ASC NULLS FIRST, cd_credit_rating#22 ASC NULLS FIRST, cd_dep_count#23 ASC NULLS FIRST, cd_dep_employed_count#24 ASC NULLS FIRST, cd_dep_college_count#25 ASC NULLS FIRST], [cd_gender#18, cd_marital_status#19, cd_education_status#20, cnt1#29, cd_purchase_estimate#21, cnt2#30, cd_credit_rating#22, cnt3#31, cd_dep_count#23, cnt4#32, cd_dep_employed_count#24, cnt5#33, cd_dep_college_count#25, cnt6#34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_datafusion/simplified.txt new file mode 100644 index 0000000000..228afd2bac --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_datafusion/simplified.txt @@ -0,0 +1,54 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (6) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count] [count(1),cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (5) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [cs_ship_customer_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_county] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..07599dfe5e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_iceberg_compat/explain.txt @@ -0,0 +1,260 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * HashAggregate (44) + +- Exchange (43) + +- * HashAggregate (42) + +- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (28) + : : +- * Filter (27) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (26) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometScan parquet spark_catalog.default.store_sales (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometScan parquet spark_catalog.default.date_dim (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometScan parquet spark_catalog.default.web_sales (13) + : : : +- ReusedExchange (14) + : : +- BroadcastExchange (25) + : : +- * ColumnarToRow (24) + : : +- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometScan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (21) + : +- BroadcastExchange (33) + : +- * ColumnarToRow (32) + : +- CometProject (31) + : +- CometFilter (30) + : +- CometScan parquet spark_catalog.default.customer_address (29) + +- BroadcastExchange (39) + +- * ColumnarToRow (38) + +- CometFilter (37) + +- CometScan parquet spark_catalog.default.customer_demographics (36) + + +(1) CometScan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +ReadSchema: struct + +(4) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : (((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2002)) AND (d_moy#10 >= 1)) AND (d_moy#10 <= 4)) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ss_customer_sk#6], [ss_customer_sk#6] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Right output [1]: [ss_customer_sk#6] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(13) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#12)] +ReadSchema: struct + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#13] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Right output [1]: [d_date_sk#13] +Arguments: [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Arguments: [ws_bill_customer_sk#11], [ws_bill_customer_sk#11] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#11] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(20) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +ReadSchema: struct + +(21) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#16] + +(22) CometBroadcastHashJoin +Left output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [cs_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight + +(23) CometProject +Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [cs_ship_customer_sk#14], [cs_ship_customer_sk#14] + +(24) ColumnarToRow [codegen id : 2] +Input [1]: [cs_ship_customer_sk#14] + +(25) BroadcastExchange +Input [1]: [cs_ship_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(27) Filter [codegen id : 5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(28) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(29) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_county#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_county, [Dona Ana County,Jefferson County,La Porte County,Rush County,Toole County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(30) CometFilter +Input [2]: [ca_address_sk#17, ca_county#18] +Condition : (ca_county#18 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#17)) + +(31) CometProject +Input [2]: [ca_address_sk#17, ca_county#18] +Arguments: [ca_address_sk#17], [ca_address_sk#17] + +(32) ColumnarToRow [codegen id : 3] +Input [1]: [ca_address_sk#17] + +(33) BroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 5] +Output [1]: [c_current_cdemo_sk#4] +Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#17] + +(36) CometScan parquet spark_catalog.default.customer_demographics +Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(37) CometFilter +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#19) + +(38) ColumnarToRow [codegen id : 4] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(39) BroadcastExchange +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(40) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 5] +Output [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(42) HashAggregate [codegen id : 5] +Input [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] + +(43) Exchange +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Arguments: hashpartitioning(cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(44) HashAggregate [codegen id : 6] +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#30] +Results [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, count(1)#30 AS cnt1#31, cd_purchase_estimate#23, count(1)#30 AS cnt2#32, cd_credit_rating#24, count(1)#30 AS cnt3#33, cd_dep_count#25, count(1)#30 AS cnt4#34, cd_dep_employed_count#26, count(1)#30 AS cnt5#35, cd_dep_college_count#27, count(1)#30 AS cnt6#36] + +(45) TakeOrderedAndProject +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] +Arguments: 100, [cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_education_status#22 ASC NULLS FIRST, cd_purchase_estimate#23 ASC NULLS FIRST, cd_credit_rating#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..f388a0a786 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q10.native_iceberg_compat/simplified.txt @@ -0,0 +1,61 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (6) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count] [count(1),cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (5) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometProject [cs_ship_customer_sk] + CometBroadcastHashJoin [cs_ship_customer_sk,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_county] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_datafusion/explain.txt new file mode 100644 index 0000000000..d4ba47247d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_datafusion/explain.txt @@ -0,0 +1,331 @@ +== Physical Plan == +* ColumnarToRow (63) ++- CometTakeOrderedAndProject (62) + +- CometProject (61) + +- CometBroadcastHashJoin (60) + :- CometProject (47) + : +- CometBroadcastHashJoin (46) + : :- CometProject (32) + : : +- CometBroadcastHashJoin (31) + : : :- CometFilter (16) + : : : +- CometHashAggregate (15) + : : : +- CometExchange (14) + : : : +- CometHashAggregate (13) + : : : +- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (30) + : : +- CometHashAggregate (29) + : : +- CometExchange (28) + : : +- CometHashAggregate (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometProject (21) + : : : +- CometBroadcastHashJoin (20) + : : : :- CometFilter (18) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (17) + : : : +- ReusedExchange (19) + : : +- CometBroadcastExchange (24) + : : +- CometFilter (23) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (22) + : +- CometBroadcastExchange (45) + : +- CometFilter (44) + : +- CometHashAggregate (43) + : +- CometExchange (42) + : +- CometHashAggregate (41) + : +- CometProject (40) + : +- CometBroadcastHashJoin (39) + : :- CometProject (37) + : : +- CometBroadcastHashJoin (36) + : : :- CometFilter (34) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (33) + : : +- ReusedExchange (35) + : +- ReusedExchange (38) + +- CometBroadcastExchange (59) + +- CometHashAggregate (58) + +- CometExchange (57) + +- CometHashAggregate (56) + +- CometProject (55) + +- CometBroadcastHashJoin (54) + :- CometProject (52) + : +- CometBroadcastHashJoin (51) + : :- CometFilter (49) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (48) + : +- ReusedExchange (50) + +- ReusedExchange (53) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Arguments: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(2) CometFilter +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(4) CometFilter +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_customer_sk#9) + +(5) CometBroadcastExchange +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(6) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Right output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [c_customer_sk#1], [ss_customer_sk#9], Inner, BuildRight + +(7) CometProject +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(9) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(11) CometBroadcastHashJoin +Left output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Right output [2]: [d_date_sk#13, d_year#14] +Arguments: [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] + +(13) CometHashAggregate +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] + +(14) CometExchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#15] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] + +(16) CometFilter +Input [2]: [customer_id#16, year_total#17] +Condition : (isnotnull(year_total#17) AND (year_total#17 > 0.00)) + +(17) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Arguments: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + +(18) CometFilter +Input [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Condition : (isnotnull(c_customer_sk#18) AND isnotnull(c_customer_id#19)) + +(19) ReusedExchange [Reuses operator id: 5] +Output [4]: [ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] + +(20) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Right output [4]: [ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Arguments: [c_customer_sk#18], [ss_customer_sk#26], Inner, BuildRight + +(21) CometProject +Input [12]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Arguments: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29], [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] + +(22) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#30, d_year#31] +Arguments: [d_date_sk#30, d_year#31] + +(23) CometFilter +Input [2]: [d_date_sk#30, d_year#31] +Condition : ((isnotnull(d_year#31) AND (d_year#31 = 2002)) AND isnotnull(d_date_sk#30)) + +(24) CometBroadcastExchange +Input [2]: [d_date_sk#30, d_year#31] +Arguments: [d_date_sk#30, d_year#31] + +(25) CometBroadcastHashJoin +Left output [10]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Right output [2]: [d_date_sk#30, d_year#31] +Arguments: [ss_sold_date_sk#29], [d_date_sk#30], Inner, BuildRight + +(26) CometProject +Input [12]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29, d_date_sk#30, d_year#31] +Arguments: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31], [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31] + +(27) CometHashAggregate +Input [10]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31] +Keys [8]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#28 - ss_ext_discount_amt#27)))] + +(28) CometExchange +Input [9]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, sum#32] +Arguments: hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [9]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, sum#32] +Keys [8]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#28 - ss_ext_discount_amt#27)))] + +(30) CometBroadcastExchange +Input [3]: [customer_id#33, customer_preferred_cust_flag#34, year_total#35] +Arguments: [customer_id#33, customer_preferred_cust_flag#34, year_total#35] + +(31) CometBroadcastHashJoin +Left output [2]: [customer_id#16, year_total#17] +Right output [3]: [customer_id#33, customer_preferred_cust_flag#34, year_total#35] +Arguments: [customer_id#16], [customer_id#33], Inner, BuildRight + +(32) CometProject +Input [5]: [customer_id#16, year_total#17, customer_id#33, customer_preferred_cust_flag#34, year_total#35] +Arguments: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35], [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35] + +(33) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#36, c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43] +Arguments: [c_customer_sk#36, c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43] + +(34) CometFilter +Input [8]: [c_customer_sk#36, c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43] +Condition : (isnotnull(c_customer_sk#36) AND isnotnull(c_customer_id#37)) + +(35) ReusedExchange [Reuses operator id: 5] +Output [4]: [ws_bill_customer_sk#44, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] + +(36) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#36, c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43] +Right output [4]: [ws_bill_customer_sk#44, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] +Arguments: [c_customer_sk#36], [ws_bill_customer_sk#44], Inner, BuildRight + +(37) CometProject +Input [12]: [c_customer_sk#36, c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_bill_customer_sk#44, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] +Arguments: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47], [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] + +(38) ReusedExchange [Reuses operator id: 10] +Output [2]: [d_date_sk#48, d_year#49] + +(39) CometBroadcastHashJoin +Left output [10]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] +Right output [2]: [d_date_sk#48, d_year#49] +Arguments: [ws_sold_date_sk#47], [d_date_sk#48], Inner, BuildRight + +(40) CometProject +Input [12]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47, d_date_sk#48, d_year#49] +Arguments: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, d_year#49], [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, d_year#49] + +(41) CometHashAggregate +Input [10]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, d_year#49] +Keys [8]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, d_year#49] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#46 - ws_ext_discount_amt#45)))] + +(42) CometExchange +Input [9]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, d_year#49, sum#50] +Arguments: hashpartitioning(c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, d_year#49, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(43) CometHashAggregate +Input [9]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, d_year#49, sum#50] +Keys [8]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, d_year#49] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#46 - ws_ext_discount_amt#45)))] + +(44) CometFilter +Input [2]: [customer_id#51, year_total#52] +Condition : (isnotnull(year_total#52) AND (year_total#52 > 0.00)) + +(45) CometBroadcastExchange +Input [2]: [customer_id#51, year_total#52] +Arguments: [customer_id#51, year_total#52] + +(46) CometBroadcastHashJoin +Left output [4]: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35] +Right output [2]: [customer_id#51, year_total#52] +Arguments: [customer_id#16], [customer_id#51], Inner, BuildRight + +(47) CometProject +Input [6]: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35, customer_id#51, year_total#52] +Arguments: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35, year_total#52], [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35, year_total#52] + +(48) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#53, c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60] +Arguments: [c_customer_sk#53, c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60] + +(49) CometFilter +Input [8]: [c_customer_sk#53, c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60] +Condition : (isnotnull(c_customer_sk#53) AND isnotnull(c_customer_id#54)) + +(50) ReusedExchange [Reuses operator id: 5] +Output [4]: [ws_bill_customer_sk#61, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64] + +(51) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#53, c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60] +Right output [4]: [ws_bill_customer_sk#61, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64] +Arguments: [c_customer_sk#53], [ws_bill_customer_sk#61], Inner, BuildRight + +(52) CometProject +Input [12]: [c_customer_sk#53, c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_bill_customer_sk#61, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64] +Arguments: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64], [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64] + +(53) ReusedExchange [Reuses operator id: 24] +Output [2]: [d_date_sk#65, d_year#66] + +(54) CometBroadcastHashJoin +Left output [10]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64] +Right output [2]: [d_date_sk#65, d_year#66] +Arguments: [ws_sold_date_sk#64], [d_date_sk#65], Inner, BuildRight + +(55) CometProject +Input [12]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64, d_date_sk#65, d_year#66] +Arguments: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, d_year#66], [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, d_year#66] + +(56) CometHashAggregate +Input [10]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, d_year#66] +Keys [8]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, d_year#66] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#63 - ws_ext_discount_amt#62)))] + +(57) CometExchange +Input [9]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, d_year#66, sum#67] +Arguments: hashpartitioning(c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, d_year#66, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(58) CometHashAggregate +Input [9]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, d_year#66, sum#67] +Keys [8]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, d_year#66] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#63 - ws_ext_discount_amt#62)))] + +(59) CometBroadcastExchange +Input [2]: [customer_id#68, year_total#69] +Arguments: [customer_id#68, year_total#69] + +(60) CometBroadcastHashJoin +Left output [5]: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35, year_total#52] +Right output [2]: [customer_id#68, year_total#69] +Arguments: [customer_id#16], [customer_id#68], Inner, (CASE WHEN (year_total#52 > 0.00) THEN (year_total#69 / year_total#52) END > CASE WHEN (year_total#17 > 0.00) THEN (year_total#35 / year_total#17) END), BuildRight + +(61) CometProject +Input [7]: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35, year_total#52, customer_id#68, year_total#69] +Arguments: [customer_preferred_cust_flag#34], [customer_preferred_cust_flag#34] + +(62) CometTakeOrderedAndProject +Input [1]: [customer_preferred_cust_flag#34] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[customer_preferred_cust_flag#34 ASC NULLS FIRST], output=[customer_preferred_cust_flag#34]), [customer_preferred_cust_flag#34], 100, [customer_preferred_cust_flag#34 ASC NULLS FIRST], [customer_preferred_cust_flag#34] + +(63) ColumnarToRow [codegen id : 1] +Input [1]: [customer_preferred_cust_flag#34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_datafusion/simplified.txt new file mode 100644 index 0000000000..7bfc93c3e9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_datafusion/simplified.txt @@ -0,0 +1,65 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [customer_preferred_cust_flag] + CometProject [customer_preferred_cust_flag] + CometBroadcastHashJoin [customer_id,year_total,customer_preferred_cust_flag,year_total,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_preferred_cust_flag,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_preferred_cust_flag,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_preferred_cust_flag,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_preferred_cust_flag,year_total] + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,ss_ext_list_price,ss_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [customer_id,customer_preferred_cust_flag,year_total] #4 + CometHashAggregate [customer_id,customer_preferred_cust_flag,year_total,c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,ss_ext_list_price,ss_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [customer_id,year_total] #7 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,ws_ext_list_price,ws_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #2 + ReusedExchange [d_date_sk,d_year] #3 + CometBroadcastExchange [customer_id,year_total] #9 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #10 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,ws_ext_list_price,ws_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #2 + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..7e598f18a0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_iceberg_compat/explain.txt @@ -0,0 +1,368 @@ +== Physical Plan == +* ColumnarToRow (65) ++- CometTakeOrderedAndProject (64) + +- CometProject (63) + +- CometBroadcastHashJoin (62) + :- CometProject (49) + : +- CometBroadcastHashJoin (48) + : :- CometProject (32) + : : +- CometBroadcastHashJoin (31) + : : :- CometFilter (16) + : : : +- CometHashAggregate (15) + : : : +- CometExchange (14) + : : : +- CometHashAggregate (13) + : : : +- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.store_sales (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (30) + : : +- CometHashAggregate (29) + : : +- CometExchange (28) + : : +- CometHashAggregate (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometProject (21) + : : : +- CometBroadcastHashJoin (20) + : : : :- CometFilter (18) + : : : : +- CometScan parquet spark_catalog.default.customer (17) + : : : +- ReusedExchange (19) + : : +- CometBroadcastExchange (24) + : : +- CometFilter (23) + : : +- CometScan parquet spark_catalog.default.date_dim (22) + : +- CometBroadcastExchange (47) + : +- CometFilter (46) + : +- CometHashAggregate (45) + : +- CometExchange (44) + : +- CometHashAggregate (43) + : +- CometProject (42) + : +- CometBroadcastHashJoin (41) + : :- CometProject (39) + : : +- CometBroadcastHashJoin (38) + : : :- CometFilter (34) + : : : +- CometScan parquet spark_catalog.default.customer (33) + : : +- CometBroadcastExchange (37) + : : +- CometFilter (36) + : : +- CometScan parquet spark_catalog.default.web_sales (35) + : +- ReusedExchange (40) + +- CometBroadcastExchange (61) + +- CometHashAggregate (60) + +- CometExchange (59) + +- CometHashAggregate (58) + +- CometProject (57) + +- CometBroadcastHashJoin (56) + :- CometProject (54) + : +- CometBroadcastHashJoin (53) + : :- CometFilter (51) + : : +- CometScan parquet spark_catalog.default.customer (50) + : +- ReusedExchange (52) + +- ReusedExchange (55) + + +(1) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_customer_sk#9) + +(5) CometBroadcastExchange +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(6) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Right output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [c_customer_sk#1], [ss_customer_sk#9], Inner, BuildRight + +(7) CometProject +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(11) CometBroadcastHashJoin +Left output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Right output [2]: [d_date_sk#13, d_year#14] +Arguments: [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] + +(13) CometHashAggregate +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] + +(14) CometExchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#15] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] + +(16) CometFilter +Input [2]: [customer_id#16, year_total#17] +Condition : (isnotnull(year_total#17) AND (year_total#17 > 0.00)) + +(17) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(18) CometFilter +Input [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Condition : (isnotnull(c_customer_sk#18) AND isnotnull(c_customer_id#19)) + +(19) ReusedExchange [Reuses operator id: 5] +Output [4]: [ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] + +(20) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Right output [4]: [ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Arguments: [c_customer_sk#18], [ss_customer_sk#26], Inner, BuildRight + +(21) CometProject +Input [12]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Arguments: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29], [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] + +(22) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#30, d_year#31] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) CometFilter +Input [2]: [d_date_sk#30, d_year#31] +Condition : ((isnotnull(d_year#31) AND (d_year#31 = 2002)) AND isnotnull(d_date_sk#30)) + +(24) CometBroadcastExchange +Input [2]: [d_date_sk#30, d_year#31] +Arguments: [d_date_sk#30, d_year#31] + +(25) CometBroadcastHashJoin +Left output [10]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Right output [2]: [d_date_sk#30, d_year#31] +Arguments: [ss_sold_date_sk#29], [d_date_sk#30], Inner, BuildRight + +(26) CometProject +Input [12]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29, d_date_sk#30, d_year#31] +Arguments: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31], [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31] + +(27) CometHashAggregate +Input [10]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31] +Keys [8]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#28 - ss_ext_discount_amt#27)))] + +(28) CometExchange +Input [9]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, sum#32] +Arguments: hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [9]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, sum#32] +Keys [8]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#28 - ss_ext_discount_amt#27)))] + +(30) CometBroadcastExchange +Input [3]: [customer_id#33, customer_preferred_cust_flag#34, year_total#35] +Arguments: [customer_id#33, customer_preferred_cust_flag#34, year_total#35] + +(31) CometBroadcastHashJoin +Left output [2]: [customer_id#16, year_total#17] +Right output [3]: [customer_id#33, customer_preferred_cust_flag#34, year_total#35] +Arguments: [customer_id#16], [customer_id#33], Inner, BuildRight + +(32) CometProject +Input [5]: [customer_id#16, year_total#17, customer_id#33, customer_preferred_cust_flag#34, year_total#35] +Arguments: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35], [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35] + +(33) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#36, c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(34) CometFilter +Input [8]: [c_customer_sk#36, c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43] +Condition : (isnotnull(c_customer_sk#36) AND isnotnull(c_customer_id#37)) + +(35) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_bill_customer_sk#44, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#47)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(36) CometFilter +Input [4]: [ws_bill_customer_sk#44, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] +Condition : isnotnull(ws_bill_customer_sk#44) + +(37) CometBroadcastExchange +Input [4]: [ws_bill_customer_sk#44, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] +Arguments: [ws_bill_customer_sk#44, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] + +(38) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#36, c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43] +Right output [4]: [ws_bill_customer_sk#44, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] +Arguments: [c_customer_sk#36], [ws_bill_customer_sk#44], Inner, BuildRight + +(39) CometProject +Input [12]: [c_customer_sk#36, c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_bill_customer_sk#44, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] +Arguments: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47], [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] + +(40) ReusedExchange [Reuses operator id: 10] +Output [2]: [d_date_sk#48, d_year#49] + +(41) CometBroadcastHashJoin +Left output [10]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47] +Right output [2]: [d_date_sk#48, d_year#49] +Arguments: [ws_sold_date_sk#47], [d_date_sk#48], Inner, BuildRight + +(42) CometProject +Input [12]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, ws_sold_date_sk#47, d_date_sk#48, d_year#49] +Arguments: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, d_year#49], [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, d_year#49] + +(43) CometHashAggregate +Input [10]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, ws_ext_discount_amt#45, ws_ext_list_price#46, d_year#49] +Keys [8]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, d_year#49] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#46 - ws_ext_discount_amt#45)))] + +(44) CometExchange +Input [9]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, d_year#49, sum#50] +Arguments: hashpartitioning(c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, d_year#49, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(45) CometHashAggregate +Input [9]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, d_year#49, sum#50] +Keys [8]: [c_customer_id#37, c_first_name#38, c_last_name#39, c_preferred_cust_flag#40, c_birth_country#41, c_login#42, c_email_address#43, d_year#49] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#46 - ws_ext_discount_amt#45)))] + +(46) CometFilter +Input [2]: [customer_id#51, year_total#52] +Condition : (isnotnull(year_total#52) AND (year_total#52 > 0.00)) + +(47) CometBroadcastExchange +Input [2]: [customer_id#51, year_total#52] +Arguments: [customer_id#51, year_total#52] + +(48) CometBroadcastHashJoin +Left output [4]: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35] +Right output [2]: [customer_id#51, year_total#52] +Arguments: [customer_id#16], [customer_id#51], Inner, BuildRight + +(49) CometProject +Input [6]: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35, customer_id#51, year_total#52] +Arguments: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35, year_total#52], [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35, year_total#52] + +(50) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#53, c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(51) CometFilter +Input [8]: [c_customer_sk#53, c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60] +Condition : (isnotnull(c_customer_sk#53) AND isnotnull(c_customer_id#54)) + +(52) ReusedExchange [Reuses operator id: 37] +Output [4]: [ws_bill_customer_sk#61, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64] + +(53) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#53, c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60] +Right output [4]: [ws_bill_customer_sk#61, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64] +Arguments: [c_customer_sk#53], [ws_bill_customer_sk#61], Inner, BuildRight + +(54) CometProject +Input [12]: [c_customer_sk#53, c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_bill_customer_sk#61, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64] +Arguments: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64], [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64] + +(55) ReusedExchange [Reuses operator id: 24] +Output [2]: [d_date_sk#65, d_year#66] + +(56) CometBroadcastHashJoin +Left output [10]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64] +Right output [2]: [d_date_sk#65, d_year#66] +Arguments: [ws_sold_date_sk#64], [d_date_sk#65], Inner, BuildRight + +(57) CometProject +Input [12]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, ws_sold_date_sk#64, d_date_sk#65, d_year#66] +Arguments: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, d_year#66], [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, d_year#66] + +(58) CometHashAggregate +Input [10]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, ws_ext_discount_amt#62, ws_ext_list_price#63, d_year#66] +Keys [8]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, d_year#66] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#63 - ws_ext_discount_amt#62)))] + +(59) CometExchange +Input [9]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, d_year#66, sum#67] +Arguments: hashpartitioning(c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, d_year#66, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(60) CometHashAggregate +Input [9]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, d_year#66, sum#67] +Keys [8]: [c_customer_id#54, c_first_name#55, c_last_name#56, c_preferred_cust_flag#57, c_birth_country#58, c_login#59, c_email_address#60, d_year#66] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#63 - ws_ext_discount_amt#62)))] + +(61) CometBroadcastExchange +Input [2]: [customer_id#68, year_total#69] +Arguments: [customer_id#68, year_total#69] + +(62) CometBroadcastHashJoin +Left output [5]: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35, year_total#52] +Right output [2]: [customer_id#68, year_total#69] +Arguments: [customer_id#16], [customer_id#68], Inner, (CASE WHEN (year_total#52 > 0.00) THEN (year_total#69 / year_total#52) END > CASE WHEN (year_total#17 > 0.00) THEN (year_total#35 / year_total#17) END), BuildRight + +(63) CometProject +Input [7]: [customer_id#16, year_total#17, customer_preferred_cust_flag#34, year_total#35, year_total#52, customer_id#68, year_total#69] +Arguments: [customer_preferred_cust_flag#34], [customer_preferred_cust_flag#34] + +(64) CometTakeOrderedAndProject +Input [1]: [customer_preferred_cust_flag#34] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[customer_preferred_cust_flag#34 ASC NULLS FIRST], output=[customer_preferred_cust_flag#34]), [customer_preferred_cust_flag#34], 100, [customer_preferred_cust_flag#34 ASC NULLS FIRST], [customer_preferred_cust_flag#34] + +(65) ColumnarToRow [codegen id : 1] +Input [1]: [customer_preferred_cust_flag#34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..3a01bb4dcc --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q11.native_iceberg_compat/simplified.txt @@ -0,0 +1,67 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [customer_preferred_cust_flag] + CometProject [customer_preferred_cust_flag] + CometBroadcastHashJoin [customer_id,year_total,customer_preferred_cust_flag,year_total,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_preferred_cust_flag,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_preferred_cust_flag,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_preferred_cust_flag,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_preferred_cust_flag,year_total] + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,ss_ext_list_price,ss_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [customer_id,customer_preferred_cust_flag,year_total] #4 + CometHashAggregate [customer_id,customer_preferred_cust_flag,year_total,c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,ss_ext_list_price,ss_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [customer_id,year_total] #7 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,ws_ext_list_price,ws_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 + CometFilter [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk,d_year] #3 + CometBroadcastExchange [customer_id,year_total] #10 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,ws_ext_list_price,ws_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_datafusion/explain.txt new file mode 100644 index 0000000000..c6a8dff14a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_datafusion/explain.txt @@ -0,0 +1,116 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ws_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [ws_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] + +(20) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5, _we0#15] + +(22) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_datafusion/simplified.txt new file mode 100644 index 0000000000..64d31a68c7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_datafusion/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (2) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id] + CometExchange [i_class] #1 + CometHashAggregate [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #3 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..5fec3af520 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_iceberg_compat/explain.txt @@ -0,0 +1,126 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.web_sales (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometScan parquet spark_catalog.default.item (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometScan parquet spark_catalog.default.date_dim (8) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ws_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [ws_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] + +(20) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5, _we0#15] + +(22) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..7696b4f605 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q12.native_iceberg_compat/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (2) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id] + CometExchange [i_class] #1 + CometHashAggregate [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #3 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_datafusion/explain.txt new file mode 100644 index 0000000000..3e48fad3a2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_datafusion/explain.txt @@ -0,0 +1,174 @@ +== Physical Plan == +* ColumnarToRow (33) ++- CometHashAggregate (32) + +- CometExchange (31) + +- CometHashAggregate (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometProject (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (8) + : : +- CometBroadcastExchange (17) + : : +- CometProject (16) + : : +- CometFilter (15) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (14) + : +- CometBroadcastExchange (22) + : +- CometFilter (21) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (20) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] + +(2) CometFilter +Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_hdemo_sk#2)) AND ((((ss_net_profit#9 >= 100.00) AND (ss_net_profit#9 <= 200.00)) OR ((ss_net_profit#9 >= 150.00) AND (ss_net_profit#9 <= 300.00))) OR ((ss_net_profit#9 >= 50.00) AND (ss_net_profit#9 <= 250.00)))) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00)))) + +(3) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(4) CometFilter +Input [1]: [s_store_sk#11] +Condition : isnotnull(s_store_sk#11) + +(5) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(6) CometBroadcastHashJoin +Left output [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#4], [s_store_sk#11], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, s_store_sk#11] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10], [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] + +(8) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Arguments: [ca_address_sk#12, ca_state#13, ca_country#14] + +(9) CometFilter +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Condition : (((isnotnull(ca_country#14) AND (ca_country#14 = United States)) AND isnotnull(ca_address_sk#12)) AND ((ca_state#13 IN (TX,OH) OR ca_state#13 IN (OR,NM,KY)) OR ca_state#13 IN (VA,TX,MS))) + +(10) CometProject +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Arguments: [ca_address_sk#12, ca_state#13], [ca_address_sk#12, ca_state#13] + +(11) CometBroadcastExchange +Input [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ca_address_sk#12, ca_state#13] + +(12) CometBroadcastHashJoin +Left output [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Right output [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ss_addr_sk#3], [ca_address_sk#12], Inner, ((((ca_state#13 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#13 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#13 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00))), BuildRight + +(13) CometProject +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, ca_address_sk#12, ca_state#13] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10], [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10] + +(14) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15, d_year#16] + +(15) CometFilter +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(16) CometProject +Input [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15], [d_date_sk#15] + +(17) CometBroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: [d_date_sk#15] + +(18) CometBroadcastHashJoin +Left output [7]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10] +Right output [1]: [d_date_sk#15] +Arguments: [ss_sold_date_sk#10], [d_date_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10, d_date_sk#15] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8], [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] + +(20) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] + +(21) CometFilter +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Condition : (isnotnull(cd_demo_sk#17) AND ((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) OR ((cd_marital_status#18 = S) AND (cd_education_status#19 = College ))) OR ((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )))) + +(22) CometBroadcastExchange +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] + +(23) CometBroadcastHashJoin +Left output [6]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Right output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [ss_cdemo_sk#1], [cd_demo_sk#17], Inner, ((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))), BuildRight + +(24) CometProject +Input [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19], [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19] + +(25) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: [hd_demo_sk#20, hd_dep_count#21] + +(26) CometFilter +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Condition : (isnotnull(hd_demo_sk#20) AND ((hd_dep_count#21 = 3) OR (hd_dep_count#21 = 1))) + +(27) CometBroadcastExchange +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: [hd_demo_sk#20, hd_dep_count#21] + +(28) CometBroadcastHashJoin +Left output [7]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19] +Right output [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#20], Inner, (((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#21 = 3)) OR (((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#21 = 1))) OR (((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#21 = 1))), BuildRight + +(29) CometProject +Input [9]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19, hd_demo_sk#20, hd_dep_count#21] +Arguments: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8], [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] + +(30) CometHashAggregate +Input [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Keys: [] +Functions [4]: [partial_avg(ss_quantity#5), partial_avg(UnscaledValue(ss_ext_sales_price#7)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#8)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#8))] + +(31) CometExchange +Input [7]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [7]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28] +Keys: [] +Functions [4]: [avg(ss_quantity#5), avg(UnscaledValue(ss_ext_sales_price#7)), avg(UnscaledValue(ss_ext_wholesale_cost#8)), sum(UnscaledValue(ss_ext_wholesale_cost#8))] + +(33) ColumnarToRow [codegen id : 1] +Input [4]: [avg(ss_quantity)#29, avg(ss_ext_sales_price)#30, avg(ss_ext_wholesale_cost)#31, sum(ss_ext_wholesale_cost)#32] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ec41eb52c3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_datafusion/simplified.txt @@ -0,0 +1,35 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [avg(ss_quantity),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),sum(ss_ext_wholesale_cost),sum,count,sum,count,sum,count,sum,avg(ss_quantity),avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),sum(UnscaledValue(ss_ext_wholesale_cost))] + CometExchange #1 + CometHashAggregate [sum,count,sum,count,sum,count,sum,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + CometProject [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + CometBroadcastHashJoin [ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_marital_status,cd_education_status,hd_demo_sk,hd_dep_count] + CometProject [ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk,d_date_sk] + CometProject [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk,ca_address_sk,ca_state] + CometProject [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk,s_store_sk] + CometFilter [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk] #2 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] + CometBroadcastExchange [ca_address_sk,ca_state] #3 + CometProject [ca_address_sk,ca_state] + CometFilter [ca_address_sk,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #5 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [hd_demo_sk,hd_dep_count] #6 + CometFilter [hd_demo_sk,hd_dep_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..8dfa77e2f6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_iceberg_compat/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +* ColumnarToRow (33) ++- CometHashAggregate (32) + +- CometExchange (31) + +- CometHashAggregate (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.store (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometProject (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.customer_address (8) + : : +- CometBroadcastExchange (17) + : : +- CometProject (16) + : : +- CometFilter (15) + : : +- CometScan parquet spark_catalog.default.date_dim (14) + : +- CometBroadcastExchange (22) + : +- CometFilter (21) + : +- CometScan parquet spark_catalog.default.customer_demographics (20) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometScan parquet spark_catalog.default.household_demographics (25) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))] +ReadSchema: struct + +(2) CometFilter +Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_hdemo_sk#2)) AND ((((ss_net_profit#9 >= 100.00) AND (ss_net_profit#9 <= 200.00)) OR ((ss_net_profit#9 >= 150.00) AND (ss_net_profit#9 <= 300.00))) OR ((ss_net_profit#9 >= 50.00) AND (ss_net_profit#9 <= 250.00)))) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00)))) + +(3) CometScan parquet spark_catalog.default.store +Output [1]: [s_store_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(4) CometFilter +Input [1]: [s_store_sk#11] +Condition : isnotnull(s_store_sk#11) + +(5) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(6) CometBroadcastHashJoin +Left output [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#4], [s_store_sk#11], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, s_store_sk#11] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10], [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] + +(8) CometScan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [OH,TX]),In(ca_state, [KY,NM,OR])),In(ca_state, [MS,TX,VA]))] +ReadSchema: struct + +(9) CometFilter +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Condition : (((isnotnull(ca_country#14) AND (ca_country#14 = United States)) AND isnotnull(ca_address_sk#12)) AND ((ca_state#13 IN (TX,OH) OR ca_state#13 IN (OR,NM,KY)) OR ca_state#13 IN (VA,TX,MS))) + +(10) CometProject +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Arguments: [ca_address_sk#12, ca_state#13], [ca_address_sk#12, ca_state#13] + +(11) CometBroadcastExchange +Input [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ca_address_sk#12, ca_state#13] + +(12) CometBroadcastHashJoin +Left output [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Right output [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ss_addr_sk#3], [ca_address_sk#12], Inner, ((((ca_state#13 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#13 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#13 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00))), BuildRight + +(13) CometProject +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, ca_address_sk#12, ca_state#13] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10], [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10] + +(14) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) CometFilter +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(16) CometProject +Input [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15], [d_date_sk#15] + +(17) CometBroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: [d_date_sk#15] + +(18) CometBroadcastHashJoin +Left output [7]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10] +Right output [1]: [d_date_sk#15] +Arguments: [ss_sold_date_sk#10], [d_date_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10, d_date_sk#15] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8], [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] + +(20) CometScan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree )))] +ReadSchema: struct + +(21) CometFilter +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Condition : (isnotnull(cd_demo_sk#17) AND ((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) OR ((cd_marital_status#18 = S) AND (cd_education_status#19 = College ))) OR ((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )))) + +(22) CometBroadcastExchange +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] + +(23) CometBroadcastHashJoin +Left output [6]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Right output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [ss_cdemo_sk#1], [cd_demo_sk#17], Inner, ((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))), BuildRight + +(24) CometProject +Input [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19], [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19] + +(25) CometScan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#20, hd_dep_count#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1))] +ReadSchema: struct + +(26) CometFilter +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Condition : (isnotnull(hd_demo_sk#20) AND ((hd_dep_count#21 = 3) OR (hd_dep_count#21 = 1))) + +(27) CometBroadcastExchange +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: [hd_demo_sk#20, hd_dep_count#21] + +(28) CometBroadcastHashJoin +Left output [7]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19] +Right output [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#20], Inner, (((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#21 = 3)) OR (((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#21 = 1))) OR (((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#21 = 1))), BuildRight + +(29) CometProject +Input [9]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19, hd_demo_sk#20, hd_dep_count#21] +Arguments: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8], [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] + +(30) CometHashAggregate +Input [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Keys: [] +Functions [4]: [partial_avg(ss_quantity#5), partial_avg(UnscaledValue(ss_ext_sales_price#7)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#8)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#8))] + +(31) CometExchange +Input [7]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [7]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28] +Keys: [] +Functions [4]: [avg(ss_quantity#5), avg(UnscaledValue(ss_ext_sales_price#7)), avg(UnscaledValue(ss_ext_wholesale_cost#8)), sum(UnscaledValue(ss_ext_wholesale_cost#8))] + +(33) ColumnarToRow [codegen id : 1] +Input [4]: [avg(ss_quantity)#29, avg(ss_ext_sales_price)#30, avg(ss_ext_wholesale_cost)#31, sum(ss_ext_wholesale_cost)#32] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..dc2f87896a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q13.native_iceberg_compat/simplified.txt @@ -0,0 +1,35 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [avg(ss_quantity),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),sum(ss_ext_wholesale_cost),sum,count,sum,count,sum,count,sum,avg(ss_quantity),avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),sum(UnscaledValue(ss_ext_wholesale_cost))] + CometExchange #1 + CometHashAggregate [sum,count,sum,count,sum,count,sum,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + CometProject [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + CometBroadcastHashJoin [ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_marital_status,cd_education_status,hd_demo_sk,hd_dep_count] + CometProject [ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk,d_date_sk] + CometProject [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk,ca_address_sk,ca_state] + CometProject [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk,s_store_sk] + CometFilter [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk] #2 + CometFilter [s_store_sk] + CometScan parquet spark_catalog.default.store [s_store_sk] + CometBroadcastExchange [ca_address_sk,ca_state] #3 + CometProject [ca_address_sk,ca_state] + CometFilter [ca_address_sk,ca_state,ca_country] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #5 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [hd_demo_sk,hd_dep_count] #6 + CometFilter [hd_demo_sk,hd_dep_count] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_datafusion/explain.txt new file mode 100644 index 0000000000..c271ba7ee3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_datafusion/explain.txt @@ -0,0 +1,474 @@ +== Physical Plan == +* ColumnarToRow (72) ++- CometTakeOrderedAndProject (71) + +- CometHashAggregate (70) + +- CometExchange (69) + +- CometHashAggregate (68) + +- CometExpand (67) + +- CometUnion (66) + :- CometProject (57) + : +- CometFilter (56) + : +- CometHashAggregate (55) + : +- CometExchange (54) + : +- CometHashAggregate (53) + : +- CometProject (52) + : +- CometBroadcastHashJoin (51) + : :- CometProject (46) + : : +- CometBroadcastHashJoin (45) + : : :- CometBroadcastHashJoin (39) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (38) + : : : +- CometProject (37) + : : : +- CometBroadcastHashJoin (36) + : : : :- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : : +- CometBroadcastExchange (35) + : : : +- CometBroadcastHashJoin (34) + : : : :- CometHashAggregate (32) + : : : : +- CometExchange (31) + : : : : +- CometHashAggregate (30) + : : : : +- CometProject (29) + : : : : +- CometBroadcastHashJoin (28) + : : : : :- CometProject (26) + : : : : : +- CometBroadcastHashJoin (25) + : : : : : :- CometFilter (6) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (5) + : : : : : +- CometBroadcastExchange (24) + : : : : : +- CometBroadcastHashJoin (23) + : : : : : :- CometFilter (8) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (7) + : : : : : +- CometBroadcastExchange (22) + : : : : : +- CometProject (21) + : : : : : +- CometBroadcastHashJoin (20) + : : : : : :- CometProject (15) + : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : :- CometFilter (10) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (9) + : : : : : : +- CometBroadcastExchange (13) + : : : : : : +- CometFilter (12) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (11) + : : : : : +- CometBroadcastExchange (19) + : : : : : +- CometProject (18) + : : : : : +- CometFilter (17) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (16) + : : : : +- ReusedExchange (27) + : : : +- ReusedExchange (33) + : : +- CometBroadcastExchange (44) + : : +- CometBroadcastHashJoin (43) + : : :- CometFilter (41) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (40) + : : +- ReusedExchange (42) + : +- CometBroadcastExchange (50) + : +- CometProject (49) + : +- CometFilter (48) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (47) + :- CometProject (61) + : +- CometFilter (60) + : +- CometHashAggregate (59) + : +- ReusedExchange (58) + +- CometProject (65) + +- CometFilter (64) + +- CometHashAggregate (63) + +- ReusedExchange (62) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(4) CometFilter +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(5) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Arguments: [ss_item_sk#9, ss_sold_date_sk#10] + +(6) CometFilter +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(7) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(8) CometFilter +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(9) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Arguments: [cs_item_sk#15, cs_sold_date_sk#16] + +(10) CometFilter +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(11) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(12) CometFilter +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(13) CometBroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(14) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Right output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_item_sk#15], [i_item_sk#17], Inner, BuildRight + +(15) CometProject +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20], [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] + +(16) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21, d_year#22] + +(17) CometFilter +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(18) CometProject +Input [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(19) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(20) CometBroadcastHashJoin +Left output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Right output [1]: [d_date_sk#21] +Arguments: [cs_sold_date_sk#16], [d_date_sk#21], Inner, BuildRight + +(21) CometProject +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20], [i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) CometBroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20] + +(23) CometBroadcastHashJoin +Left output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)], [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)], LeftSemi, BuildRight + +(24) CometBroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Right output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_item_sk#9], [i_item_sk#11], Inner, BuildRight + +(26) CometProject +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14], [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] + +(27) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#23] + +(28) CometBroadcastHashJoin +Left output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [1]: [d_date_sk#23] +Arguments: [ss_sold_date_sk#10], [d_date_sk#23], Inner, BuildRight + +(29) CometProject +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] +Arguments: [brand_id#24, class_id#25, category_id#26], [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] + +(30) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(31) CometExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(33) ReusedExchange [Reuses operator id: 22] +Output [3]: [i_brand_id#27, i_class_id#28, i_category_id#29] + +(34) CometBroadcastHashJoin +Left output [3]: [brand_id#24, class_id#25, category_id#26] +Right output [3]: [i_brand_id#27, i_class_id#28, i_category_id#29] +Arguments: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)], [coalesce(i_brand_id#27, 0), isnull(i_brand_id#27), coalesce(i_class_id#28, 0), isnull(i_class_id#28), coalesce(i_category_id#29, 0), isnull(i_category_id#29)], LeftSemi, BuildRight + +(35) CometBroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [brand_id#24, class_id#25, category_id#26] + +(36) CometBroadcastHashJoin +Left output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Right output [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [i_brand_id#6, i_class_id#7, i_category_id#8], [brand_id#24, class_id#25, category_id#26], Inner, BuildRight + +(37) CometProject +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] +Arguments: [ss_item_sk#30], [i_item_sk#5 AS ss_item_sk#30] + +(38) CometBroadcastExchange +Input [1]: [ss_item_sk#30] +Arguments: [ss_item_sk#30] + +(39) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [1]: [ss_item_sk#30] +Arguments: [ss_item_sk#1], [ss_item_sk#30], LeftSemi, BuildRight + +(40) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] + +(41) CometFilter +Input [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Condition : isnotnull(i_item_sk#31) + +(42) ReusedExchange [Reuses operator id: 38] +Output [1]: [ss_item_sk#30] + +(43) CometBroadcastHashJoin +Left output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Right output [1]: [ss_item_sk#30] +Arguments: [i_item_sk#31], [ss_item_sk#30], LeftSemi, BuildRight + +(44) CometBroadcastExchange +Input [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] + +(45) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [ss_item_sk#1], [i_item_sk#31], Inner, BuildRight + +(46) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34], [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34] + +(47) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#35, d_year#36, d_moy#37] +Arguments: [d_date_sk#35, d_year#36, d_moy#37] + +(48) CometFilter +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] +Condition : ((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2001)) AND (d_moy#37 = 11)) AND isnotnull(d_date_sk#35)) + +(49) CometProject +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] +Arguments: [d_date_sk#35], [d_date_sk#35] + +(50) CometBroadcastExchange +Input [1]: [d_date_sk#35] +Arguments: [d_date_sk#35] + +(51) CometBroadcastHashJoin +Left output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34] +Right output [1]: [d_date_sk#35] +Arguments: [ss_sold_date_sk#4], [d_date_sk#35], Inner, BuildRight + +(52) CometProject +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34, d_date_sk#35] +Arguments: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34], [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] + +(53) CometHashAggregate +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] +Keys [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] + +(54) CometExchange +Input [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#38, isEmpty#39, count#40] +Arguments: hashpartitioning(i_brand_id#32, i_class_id#33, i_category_id#34, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(55) CometHashAggregate +Input [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#38, isEmpty#39, count#40] +Keys [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] + +(56) CometFilter +Input [5]: [i_brand_id#32, i_class_id#33, i_category_id#34, sales#41, number_sales#42] +Condition : (isnotnull(sales#41) AND (cast(sales#41 as decimal(32,6)) > cast(Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(57) CometProject +Input [5]: [i_brand_id#32, i_class_id#33, i_category_id#34, sales#41, number_sales#42] +Arguments: [sales#41, number_sales#42, channel#45, i_brand_id#46, i_class_id#47, i_category_id#48], [sales#41, number_sales#42, store AS channel#45, i_brand_id#32 AS i_brand_id#46, i_class_id#33 AS i_class_id#47, i_category_id#34 AS i_category_id#48] + +(58) ReusedExchange [Reuses operator id: 54] +Output [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#52, isEmpty#53, count#54] + +(59) CometHashAggregate +Input [6]: [i_brand_id#49, i_class_id#50, i_category_id#51, sum#52, isEmpty#53, count#54] +Keys [3]: [i_brand_id#49, i_class_id#50, i_category_id#51] +Functions [2]: [sum((cast(cs_quantity#55 as decimal(10,0)) * cs_list_price#56)), count(1)] + +(60) CometFilter +Input [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sales#57, number_sales#58] +Condition : (isnotnull(sales#57) AND (cast(sales#57 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(61) CometProject +Input [5]: [i_brand_id#49, i_class_id#50, i_category_id#51, sales#57, number_sales#58] +Arguments: [sales#57, number_sales#58, channel#59, i_brand_id#49, i_class_id#50, i_category_id#51], [sales#57, number_sales#58, catalog AS channel#59, i_brand_id#49, i_class_id#50, i_category_id#51] + +(62) ReusedExchange [Reuses operator id: 54] +Output [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#63, isEmpty#64, count#65] + +(63) CometHashAggregate +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#63, isEmpty#64, count#65] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [sum((cast(ws_quantity#66 as decimal(10,0)) * ws_list_price#67)), count(1)] + +(64) CometFilter +Input [5]: [i_brand_id#60, i_class_id#61, i_category_id#62, sales#68, number_sales#69] +Condition : (isnotnull(sales#68) AND (cast(sales#68 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6)))) + +(65) CometProject +Input [5]: [i_brand_id#60, i_class_id#61, i_category_id#62, sales#68, number_sales#69] +Arguments: [sales#68, number_sales#69, channel#70, i_brand_id#60, i_class_id#61, i_category_id#62], [sales#68, number_sales#69, web AS channel#70, i_brand_id#60, i_class_id#61, i_category_id#62] + +(66) CometUnion +Child 0 Input [6]: [sales#41, number_sales#42, channel#45, i_brand_id#46, i_class_id#47, i_category_id#48] +Child 1 Input [6]: [sales#57, number_sales#58, channel#59, i_brand_id#49, i_class_id#50, i_category_id#51] +Child 2 Input [6]: [sales#68, number_sales#69, channel#70, i_brand_id#60, i_class_id#61, i_category_id#62] + +(67) CometExpand +Input [6]: [sales#41, number_sales#42, channel#45, i_brand_id#46, i_class_id#47, i_category_id#48] +Arguments: [[sales#41, number_sales#42, channel#45, i_brand_id#46, i_class_id#47, i_category_id#48, 0], [sales#41, number_sales#42, channel#45, i_brand_id#46, i_class_id#47, null, 1], [sales#41, number_sales#42, channel#45, i_brand_id#46, null, null, 3], [sales#41, number_sales#42, channel#45, null, null, null, 7], [sales#41, number_sales#42, null, null, null, null, 15]], [sales#41, number_sales#42, channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, spark_grouping_id#75] + +(68) CometHashAggregate +Input [7]: [sales#41, number_sales#42, channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, spark_grouping_id#75] +Keys [5]: [channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, spark_grouping_id#75] +Functions [2]: [partial_sum(sales#41), partial_sum(number_sales#42)] + +(69) CometExchange +Input [8]: [channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, spark_grouping_id#75, sum#76, isEmpty#77, sum#78] +Arguments: hashpartitioning(channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, spark_grouping_id#75, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(70) CometHashAggregate +Input [8]: [channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, spark_grouping_id#75, sum#76, isEmpty#77, sum#78] +Keys [5]: [channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, spark_grouping_id#75] +Functions [2]: [sum(sales#41), sum(number_sales#42)] + +(71) CometTakeOrderedAndProject +Input [6]: [channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, sum(sales)#79, sum(number_sales)#80] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[channel#71 ASC NULLS FIRST,i_brand_id#72 ASC NULLS FIRST,i_class_id#73 ASC NULLS FIRST,i_category_id#74 ASC NULLS FIRST], output=[channel#71,i_brand_id#72,i_class_id#73,i_category_id#74,sum(sales)#79,sum(number_sales)#80]), [channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, sum(sales)#79, sum(number_sales)#80], 100, [channel#71 ASC NULLS FIRST, i_brand_id#72 ASC NULLS FIRST, i_class_id#73 ASC NULLS FIRST, i_category_id#74 ASC NULLS FIRST], [channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, sum(sales)#79, sum(number_sales)#80] + +(72) ColumnarToRow [codegen id : 1] +Input [6]: [channel#71, i_brand_id#72, i_class_id#73, i_category_id#74, sum(sales)#79, sum(number_sales)#80] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 56 Hosting Expression = Subquery scalar-subquery#43, [id=#44] +* ColumnarToRow (89) ++- CometHashAggregate (88) + +- CometExchange (87) + +- CometHashAggregate (86) + +- CometUnion (85) + :- CometProject (76) + : +- CometBroadcastHashJoin (75) + : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (73) + : +- ReusedExchange (74) + :- CometProject (80) + : +- CometBroadcastHashJoin (79) + : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (77) + : +- ReusedExchange (78) + +- CometProject (84) + +- CometBroadcastHashJoin (83) + :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (81) + +- ReusedExchange (82) + + +(73) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_quantity#81, ss_list_price#82, ss_sold_date_sk#83] +Arguments: [ss_quantity#81, ss_list_price#82, ss_sold_date_sk#83] + +(74) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#84] + +(75) CometBroadcastHashJoin +Left output [3]: [ss_quantity#81, ss_list_price#82, ss_sold_date_sk#83] +Right output [1]: [d_date_sk#84] +Arguments: [ss_sold_date_sk#83], [d_date_sk#84], Inner, BuildRight + +(76) CometProject +Input [4]: [ss_quantity#81, ss_list_price#82, ss_sold_date_sk#83, d_date_sk#84] +Arguments: [quantity#85, list_price#86], [ss_quantity#81 AS quantity#85, ss_list_price#82 AS list_price#86] + +(77) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_quantity#87, cs_list_price#88, cs_sold_date_sk#89] +Arguments: [cs_quantity#87, cs_list_price#88, cs_sold_date_sk#89] + +(78) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#90] + +(79) CometBroadcastHashJoin +Left output [3]: [cs_quantity#87, cs_list_price#88, cs_sold_date_sk#89] +Right output [1]: [d_date_sk#90] +Arguments: [cs_sold_date_sk#89], [d_date_sk#90], Inner, BuildRight + +(80) CometProject +Input [4]: [cs_quantity#87, cs_list_price#88, cs_sold_date_sk#89, d_date_sk#90] +Arguments: [quantity#91, list_price#92], [cs_quantity#87 AS quantity#91, cs_list_price#88 AS list_price#92] + +(81) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_quantity#93, ws_list_price#94, ws_sold_date_sk#95] +Arguments: [ws_quantity#93, ws_list_price#94, ws_sold_date_sk#95] + +(82) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#96] + +(83) CometBroadcastHashJoin +Left output [3]: [ws_quantity#93, ws_list_price#94, ws_sold_date_sk#95] +Right output [1]: [d_date_sk#96] +Arguments: [ws_sold_date_sk#95], [d_date_sk#96], Inner, BuildRight + +(84) CometProject +Input [4]: [ws_quantity#93, ws_list_price#94, ws_sold_date_sk#95, d_date_sk#96] +Arguments: [quantity#97, list_price#98], [ws_quantity#93 AS quantity#97, ws_list_price#94 AS list_price#98] + +(85) CometUnion +Child 0 Input [2]: [quantity#85, list_price#86] +Child 1 Input [2]: [quantity#91, list_price#92] +Child 2 Input [2]: [quantity#97, list_price#98] + +(86) CometHashAggregate +Input [2]: [quantity#85, list_price#86] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#85 as decimal(10,0)) * list_price#86))] + +(87) CometExchange +Input [2]: [sum#99, count#100] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(88) CometHashAggregate +Input [2]: [sum#99, count#100] +Keys: [] +Functions [1]: [avg((cast(quantity#85 as decimal(10,0)) * list_price#86))] + +(89) ColumnarToRow [codegen id : 1] +Input [1]: [average_sales#101] + +Subquery:2 Hosting operator id = 60 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + +Subquery:3 Hosting operator id = 64 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..12407d32a3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_datafusion/simplified.txt @@ -0,0 +1,96 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales)] + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales),spark_grouping_id,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + CometExchange [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] #1 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sum,isEmpty,sum,sales,number_sales] + CometExpand [channel,i_brand_id,i_class_id,i_category_id] [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] + CometUnion [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + CometFilter [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [average_sales,sum,count,avg((cast(quantity as decimal(10,0)) * list_price))] + CometExchange #12 + CometHashAggregate [sum,count,quantity,list_price] + CometUnion [quantity,list_price] + CometProject [ss_quantity,ss_list_price] [quantity,list_price] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #9 + CometProject [cs_quantity,cs_list_price] [quantity,list_price] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_quantity,cs_list_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #9 + CometProject [ws_quantity,ws_list_price] [quantity,list_price] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #9 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #2 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometBroadcastExchange [ss_item_sk] #3 + CometProject [i_item_sk] [ss_item_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [brand_id,class_id,category_id] #4 + CometBroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + CometHashAggregate [brand_id,class_id,category_id] + CometExchange [brand_id,class_id,category_id] #5 + CometHashAggregate [brand_id,class_id,category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [brand_id,class_id,category_id] + CometBroadcastHashJoin [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #6 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #7 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [d_date_sk] #9 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + ReusedExchange [d_date_sk] #9 + ReusedExchange [i_brand_id,i_class_id,i_category_id] #7 + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,ss_item_sk] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + ReusedExchange [ss_item_sk] #3 + CometBroadcastExchange [d_date_sk] #11 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometProject [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + CometFilter [i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #1 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1)] + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #2 + CometProject [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + CometFilter [i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #1 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1)] + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..38a0d1d009 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_iceberg_compat/explain.txt @@ -0,0 +1,680 @@ +== Physical Plan == +* ColumnarToRow (102) ++- CometTakeOrderedAndProject (101) + +- CometHashAggregate (100) + +- CometExchange (99) + +- CometHashAggregate (98) + +- CometExpand (97) + +- CometUnion (96) + :- CometProject (65) + : +- CometFilter (64) + : +- CometHashAggregate (63) + : +- CometExchange (62) + : +- CometHashAggregate (61) + : +- CometProject (60) + : +- CometBroadcastHashJoin (59) + : :- CometProject (54) + : : +- CometBroadcastHashJoin (53) + : : :- CometBroadcastHashJoin (47) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (46) + : : : +- CometProject (45) + : : : +- CometBroadcastHashJoin (44) + : : : :- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.item (3) + : : : +- CometBroadcastExchange (43) + : : : +- CometBroadcastHashJoin (42) + : : : :- CometHashAggregate (32) + : : : : +- CometExchange (31) + : : : : +- CometHashAggregate (30) + : : : : +- CometProject (29) + : : : : +- CometBroadcastHashJoin (28) + : : : : :- CometProject (26) + : : : : : +- CometBroadcastHashJoin (25) + : : : : : :- CometFilter (6) + : : : : : : +- CometScan parquet spark_catalog.default.store_sales (5) + : : : : : +- CometBroadcastExchange (24) + : : : : : +- CometBroadcastHashJoin (23) + : : : : : :- CometFilter (8) + : : : : : : +- CometScan parquet spark_catalog.default.item (7) + : : : : : +- CometBroadcastExchange (22) + : : : : : +- CometProject (21) + : : : : : +- CometBroadcastHashJoin (20) + : : : : : :- CometProject (15) + : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : :- CometFilter (10) + : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (9) + : : : : : : +- CometBroadcastExchange (13) + : : : : : : +- CometFilter (12) + : : : : : : +- CometScan parquet spark_catalog.default.item (11) + : : : : : +- CometBroadcastExchange (19) + : : : : : +- CometProject (18) + : : : : : +- CometFilter (17) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (16) + : : : : +- ReusedExchange (27) + : : : +- CometBroadcastExchange (41) + : : : +- CometProject (40) + : : : +- CometBroadcastHashJoin (39) + : : : :- CometProject (37) + : : : : +- CometBroadcastHashJoin (36) + : : : : :- CometFilter (34) + : : : : : +- CometScan parquet spark_catalog.default.web_sales (33) + : : : : +- ReusedExchange (35) + : : : +- ReusedExchange (38) + : : +- CometBroadcastExchange (52) + : : +- CometBroadcastHashJoin (51) + : : :- CometFilter (49) + : : : +- CometScan parquet spark_catalog.default.item (48) + : : +- ReusedExchange (50) + : +- CometBroadcastExchange (58) + : +- CometProject (57) + : +- CometFilter (56) + : +- CometScan parquet spark_catalog.default.date_dim (55) + :- CometProject (80) + : +- CometFilter (79) + : +- CometHashAggregate (78) + : +- CometExchange (77) + : +- CometHashAggregate (76) + : +- CometProject (75) + : +- CometBroadcastHashJoin (74) + : :- CometProject (72) + : : +- CometBroadcastHashJoin (71) + : : :- CometBroadcastHashJoin (69) + : : : :- CometFilter (67) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (66) + : : : +- ReusedExchange (68) + : : +- ReusedExchange (70) + : +- ReusedExchange (73) + +- CometProject (95) + +- CometFilter (94) + +- CometHashAggregate (93) + +- CometExchange (92) + +- CometHashAggregate (91) + +- CometProject (90) + +- CometBroadcastHashJoin (89) + :- CometProject (87) + : +- CometBroadcastHashJoin (86) + : :- CometBroadcastHashJoin (84) + : : :- CometFilter (82) + : : : +- CometScan parquet spark_catalog.default.web_sales (81) + : : +- ReusedExchange (83) + : +- ReusedExchange (85) + +- ReusedExchange (88) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(5) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) CometFilter +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(7) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(8) CometFilter +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(9) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(11) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) CometFilter +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(13) CometBroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(14) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Right output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_item_sk#15], [i_item_sk#17], Inner, BuildRight + +(15) CometProject +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20], [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] + +(16) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) CometFilter +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(18) CometProject +Input [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(19) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(20) CometBroadcastHashJoin +Left output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Right output [1]: [d_date_sk#21] +Arguments: [cs_sold_date_sk#16], [d_date_sk#21], Inner, BuildRight + +(21) CometProject +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20], [i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) CometBroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20] + +(23) CometBroadcastHashJoin +Left output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)], [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)], LeftSemi, BuildRight + +(24) CometBroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Right output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_item_sk#9], [i_item_sk#11], Inner, BuildRight + +(26) CometProject +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14], [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] + +(27) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#23] + +(28) CometBroadcastHashJoin +Left output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [1]: [d_date_sk#23] +Arguments: [ss_sold_date_sk#10], [d_date_sk#23], Inner, BuildRight + +(29) CometProject +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] +Arguments: [brand_id#24, class_id#25, category_id#26], [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] + +(30) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(31) CometExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(33) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(34) CometFilter +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(35) ReusedExchange [Reuses operator id: 13] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(36) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Right output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [ws_item_sk#27], [i_item_sk#29], Inner, BuildRight + +(37) CometProject +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32], [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] + +(38) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#33] + +(39) CometBroadcastHashJoin +Left output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Right output [1]: [d_date_sk#33] +Arguments: [ws_sold_date_sk#28], [d_date_sk#33], Inner, BuildRight + +(40) CometProject +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] +Arguments: [i_brand_id#30, i_class_id#31, i_category_id#32], [i_brand_id#30, i_class_id#31, i_category_id#32] + +(41) CometBroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [i_brand_id#30, i_class_id#31, i_category_id#32] + +(42) CometBroadcastHashJoin +Left output [3]: [brand_id#24, class_id#25, category_id#26] +Right output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)], [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)], LeftSemi, BuildRight + +(43) CometBroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [brand_id#24, class_id#25, category_id#26] + +(44) CometBroadcastHashJoin +Left output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Right output [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [i_brand_id#6, i_class_id#7, i_category_id#8], [brand_id#24, class_id#25, category_id#26], Inner, BuildRight + +(45) CometProject +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] +Arguments: [ss_item_sk#34], [i_item_sk#5 AS ss_item_sk#34] + +(46) CometBroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: [ss_item_sk#34] + +(47) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [1]: [ss_item_sk#34] +Arguments: [ss_item_sk#1], [ss_item_sk#34], LeftSemi, BuildRight + +(48) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(49) CometFilter +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : isnotnull(i_item_sk#35) + +(50) ReusedExchange [Reuses operator id: 46] +Output [1]: [ss_item_sk#34] + +(51) CometBroadcastHashJoin +Left output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Right output [1]: [ss_item_sk#34] +Arguments: [i_item_sk#35], [ss_item_sk#34], LeftSemi, BuildRight + +(52) CometBroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(53) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [ss_item_sk#1], [i_item_sk#35], Inner, BuildRight + +(54) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38], [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] + +(55) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#39, d_year#40, d_moy#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(56) CometFilter +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] +Condition : ((((isnotnull(d_year#40) AND isnotnull(d_moy#41)) AND (d_year#40 = 2001)) AND (d_moy#41 = 11)) AND isnotnull(d_date_sk#39)) + +(57) CometProject +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] +Arguments: [d_date_sk#39], [d_date_sk#39] + +(58) CometBroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: [d_date_sk#39] + +(59) CometBroadcastHashJoin +Left output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Right output [1]: [d_date_sk#39] +Arguments: [ss_sold_date_sk#4], [d_date_sk#39], Inner, BuildRight + +(60) CometProject +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] +Arguments: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38], [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] + +(61) CometHashAggregate +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] + +(62) CometExchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#42, isEmpty#43, count#44] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(63) CometHashAggregate +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#42, isEmpty#43, count#44] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] + +(64) CometFilter +Input [5]: [i_brand_id#36, i_class_id#37, i_category_id#38, sales#45, number_sales#46] +Condition : (isnotnull(sales#45) AND (cast(sales#45 as decimal(32,6)) > cast(Subquery scalar-subquery#47, [id=#48] as decimal(32,6)))) + +(65) CometProject +Input [5]: [i_brand_id#36, i_class_id#37, i_category_id#38, sales#45, number_sales#46] +Arguments: [sales#45, number_sales#46, channel#49, i_brand_id#50, i_class_id#51, i_category_id#52], [sales#45, number_sales#46, store AS channel#49, i_brand_id#36 AS i_brand_id#50, i_class_id#37 AS i_class_id#51, i_category_id#38 AS i_category_id#52] + +(66) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#53, cs_quantity#54, cs_list_price#55, cs_sold_date_sk#56] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#56)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(67) CometFilter +Input [4]: [cs_item_sk#53, cs_quantity#54, cs_list_price#55, cs_sold_date_sk#56] +Condition : isnotnull(cs_item_sk#53) + +(68) ReusedExchange [Reuses operator id: 46] +Output [1]: [ss_item_sk#57] + +(69) CometBroadcastHashJoin +Left output [4]: [cs_item_sk#53, cs_quantity#54, cs_list_price#55, cs_sold_date_sk#56] +Right output [1]: [ss_item_sk#57] +Arguments: [cs_item_sk#53], [ss_item_sk#57], LeftSemi, BuildRight + +(70) ReusedExchange [Reuses operator id: 52] +Output [4]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61] + +(71) CometBroadcastHashJoin +Left output [4]: [cs_item_sk#53, cs_quantity#54, cs_list_price#55, cs_sold_date_sk#56] +Right output [4]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61] +Arguments: [cs_item_sk#53], [i_item_sk#58], Inner, BuildRight + +(72) CometProject +Input [8]: [cs_item_sk#53, cs_quantity#54, cs_list_price#55, cs_sold_date_sk#56, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61] +Arguments: [cs_quantity#54, cs_list_price#55, cs_sold_date_sk#56, i_brand_id#59, i_class_id#60, i_category_id#61], [cs_quantity#54, cs_list_price#55, cs_sold_date_sk#56, i_brand_id#59, i_class_id#60, i_category_id#61] + +(73) ReusedExchange [Reuses operator id: 58] +Output [1]: [d_date_sk#62] + +(74) CometBroadcastHashJoin +Left output [6]: [cs_quantity#54, cs_list_price#55, cs_sold_date_sk#56, i_brand_id#59, i_class_id#60, i_category_id#61] +Right output [1]: [d_date_sk#62] +Arguments: [cs_sold_date_sk#56], [d_date_sk#62], Inner, BuildRight + +(75) CometProject +Input [7]: [cs_quantity#54, cs_list_price#55, cs_sold_date_sk#56, i_brand_id#59, i_class_id#60, i_category_id#61, d_date_sk#62] +Arguments: [cs_quantity#54, cs_list_price#55, i_brand_id#59, i_class_id#60, i_category_id#61], [cs_quantity#54, cs_list_price#55, i_brand_id#59, i_class_id#60, i_category_id#61] + +(76) CometHashAggregate +Input [5]: [cs_quantity#54, cs_list_price#55, i_brand_id#59, i_class_id#60, i_category_id#61] +Keys [3]: [i_brand_id#59, i_class_id#60, i_category_id#61] +Functions [2]: [partial_sum((cast(cs_quantity#54 as decimal(10,0)) * cs_list_price#55)), partial_count(1)] + +(77) CometExchange +Input [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#63, isEmpty#64, count#65] +Arguments: hashpartitioning(i_brand_id#59, i_class_id#60, i_category_id#61, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(78) CometHashAggregate +Input [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#63, isEmpty#64, count#65] +Keys [3]: [i_brand_id#59, i_class_id#60, i_category_id#61] +Functions [2]: [sum((cast(cs_quantity#54 as decimal(10,0)) * cs_list_price#55)), count(1)] + +(79) CometFilter +Input [5]: [i_brand_id#59, i_class_id#60, i_category_id#61, sales#66, number_sales#67] +Condition : (isnotnull(sales#66) AND (cast(sales#66 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#47, [id=#48] as decimal(32,6)))) + +(80) CometProject +Input [5]: [i_brand_id#59, i_class_id#60, i_category_id#61, sales#66, number_sales#67] +Arguments: [sales#66, number_sales#67, channel#68, i_brand_id#59, i_class_id#60, i_category_id#61], [sales#66, number_sales#67, catalog AS channel#68, i_brand_id#59, i_class_id#60, i_category_id#61] + +(81) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#69, ws_quantity#70, ws_list_price#71, ws_sold_date_sk#72] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#72)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(82) CometFilter +Input [4]: [ws_item_sk#69, ws_quantity#70, ws_list_price#71, ws_sold_date_sk#72] +Condition : isnotnull(ws_item_sk#69) + +(83) ReusedExchange [Reuses operator id: 46] +Output [1]: [ss_item_sk#73] + +(84) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#69, ws_quantity#70, ws_list_price#71, ws_sold_date_sk#72] +Right output [1]: [ss_item_sk#73] +Arguments: [ws_item_sk#69], [ss_item_sk#73], LeftSemi, BuildRight + +(85) ReusedExchange [Reuses operator id: 52] +Output [4]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77] + +(86) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#69, ws_quantity#70, ws_list_price#71, ws_sold_date_sk#72] +Right output [4]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77] +Arguments: [ws_item_sk#69], [i_item_sk#74], Inner, BuildRight + +(87) CometProject +Input [8]: [ws_item_sk#69, ws_quantity#70, ws_list_price#71, ws_sold_date_sk#72, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77] +Arguments: [ws_quantity#70, ws_list_price#71, ws_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77], [ws_quantity#70, ws_list_price#71, ws_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77] + +(88) ReusedExchange [Reuses operator id: 58] +Output [1]: [d_date_sk#78] + +(89) CometBroadcastHashJoin +Left output [6]: [ws_quantity#70, ws_list_price#71, ws_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77] +Right output [1]: [d_date_sk#78] +Arguments: [ws_sold_date_sk#72], [d_date_sk#78], Inner, BuildRight + +(90) CometProject +Input [7]: [ws_quantity#70, ws_list_price#71, ws_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, d_date_sk#78] +Arguments: [ws_quantity#70, ws_list_price#71, i_brand_id#75, i_class_id#76, i_category_id#77], [ws_quantity#70, ws_list_price#71, i_brand_id#75, i_class_id#76, i_category_id#77] + +(91) CometHashAggregate +Input [5]: [ws_quantity#70, ws_list_price#71, i_brand_id#75, i_class_id#76, i_category_id#77] +Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77] +Functions [2]: [partial_sum((cast(ws_quantity#70 as decimal(10,0)) * ws_list_price#71)), partial_count(1)] + +(92) CometExchange +Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#79, isEmpty#80, count#81] +Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(93) CometHashAggregate +Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#79, isEmpty#80, count#81] +Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77] +Functions [2]: [sum((cast(ws_quantity#70 as decimal(10,0)) * ws_list_price#71)), count(1)] + +(94) CometFilter +Input [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sales#82, number_sales#83] +Condition : (isnotnull(sales#82) AND (cast(sales#82 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#47, [id=#48] as decimal(32,6)))) + +(95) CometProject +Input [5]: [i_brand_id#75, i_class_id#76, i_category_id#77, sales#82, number_sales#83] +Arguments: [sales#82, number_sales#83, channel#84, i_brand_id#75, i_class_id#76, i_category_id#77], [sales#82, number_sales#83, web AS channel#84, i_brand_id#75, i_class_id#76, i_category_id#77] + +(96) CometUnion +Child 0 Input [6]: [sales#45, number_sales#46, channel#49, i_brand_id#50, i_class_id#51, i_category_id#52] +Child 1 Input [6]: [sales#66, number_sales#67, channel#68, i_brand_id#59, i_class_id#60, i_category_id#61] +Child 2 Input [6]: [sales#82, number_sales#83, channel#84, i_brand_id#75, i_class_id#76, i_category_id#77] + +(97) CometExpand +Input [6]: [sales#45, number_sales#46, channel#49, i_brand_id#50, i_class_id#51, i_category_id#52] +Arguments: [[sales#45, number_sales#46, channel#49, i_brand_id#50, i_class_id#51, i_category_id#52, 0], [sales#45, number_sales#46, channel#49, i_brand_id#50, i_class_id#51, null, 1], [sales#45, number_sales#46, channel#49, i_brand_id#50, null, null, 3], [sales#45, number_sales#46, channel#49, null, null, null, 7], [sales#45, number_sales#46, null, null, null, null, 15]], [sales#45, number_sales#46, channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, spark_grouping_id#89] + +(98) CometHashAggregate +Input [7]: [sales#45, number_sales#46, channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, spark_grouping_id#89] +Keys [5]: [channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, spark_grouping_id#89] +Functions [2]: [partial_sum(sales#45), partial_sum(number_sales#46)] + +(99) CometExchange +Input [8]: [channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, spark_grouping_id#89, sum#90, isEmpty#91, sum#92] +Arguments: hashpartitioning(channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, spark_grouping_id#89, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(100) CometHashAggregate +Input [8]: [channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, spark_grouping_id#89, sum#90, isEmpty#91, sum#92] +Keys [5]: [channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, spark_grouping_id#89] +Functions [2]: [sum(sales#45), sum(number_sales#46)] + +(101) CometTakeOrderedAndProject +Input [6]: [channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, sum(sales)#93, sum(number_sales)#94] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[channel#85 ASC NULLS FIRST,i_brand_id#86 ASC NULLS FIRST,i_class_id#87 ASC NULLS FIRST,i_category_id#88 ASC NULLS FIRST], output=[channel#85,i_brand_id#86,i_class_id#87,i_category_id#88,sum(sales)#93,sum(number_sales)#94]), [channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, sum(sales)#93, sum(number_sales)#94], 100, [channel#85 ASC NULLS FIRST, i_brand_id#86 ASC NULLS FIRST, i_class_id#87 ASC NULLS FIRST, i_category_id#88 ASC NULLS FIRST], [channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, sum(sales)#93, sum(number_sales)#94] + +(102) ColumnarToRow [codegen id : 1] +Input [6]: [channel#85, i_brand_id#86, i_class_id#87, i_category_id#88, sum(sales)#93, sum(number_sales)#94] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquery#47, [id=#48] +* ColumnarToRow (119) ++- CometHashAggregate (118) + +- CometExchange (117) + +- CometHashAggregate (116) + +- CometUnion (115) + :- CometProject (106) + : +- CometBroadcastHashJoin (105) + : :- CometScan parquet spark_catalog.default.store_sales (103) + : +- ReusedExchange (104) + :- CometProject (110) + : +- CometBroadcastHashJoin (109) + : :- CometScan parquet spark_catalog.default.catalog_sales (107) + : +- ReusedExchange (108) + +- CometProject (114) + +- CometBroadcastHashJoin (113) + :- CometScan parquet spark_catalog.default.web_sales (111) + +- ReusedExchange (112) + + +(103) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#95, ss_list_price#96, ss_sold_date_sk#97] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#97)] +ReadSchema: struct + +(104) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#98] + +(105) CometBroadcastHashJoin +Left output [3]: [ss_quantity#95, ss_list_price#96, ss_sold_date_sk#97] +Right output [1]: [d_date_sk#98] +Arguments: [ss_sold_date_sk#97], [d_date_sk#98], Inner, BuildRight + +(106) CometProject +Input [4]: [ss_quantity#95, ss_list_price#96, ss_sold_date_sk#97, d_date_sk#98] +Arguments: [quantity#99, list_price#100], [ss_quantity#95 AS quantity#99, ss_list_price#96 AS list_price#100] + +(107) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#101, cs_list_price#102, cs_sold_date_sk#103] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#103)] +ReadSchema: struct + +(108) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#104] + +(109) CometBroadcastHashJoin +Left output [3]: [cs_quantity#101, cs_list_price#102, cs_sold_date_sk#103] +Right output [1]: [d_date_sk#104] +Arguments: [cs_sold_date_sk#103], [d_date_sk#104], Inner, BuildRight + +(110) CometProject +Input [4]: [cs_quantity#101, cs_list_price#102, cs_sold_date_sk#103, d_date_sk#104] +Arguments: [quantity#105, list_price#106], [cs_quantity#101 AS quantity#105, cs_list_price#102 AS list_price#106] + +(111) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#107, ws_list_price#108, ws_sold_date_sk#109] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#109)] +ReadSchema: struct + +(112) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#110] + +(113) CometBroadcastHashJoin +Left output [3]: [ws_quantity#107, ws_list_price#108, ws_sold_date_sk#109] +Right output [1]: [d_date_sk#110] +Arguments: [ws_sold_date_sk#109], [d_date_sk#110], Inner, BuildRight + +(114) CometProject +Input [4]: [ws_quantity#107, ws_list_price#108, ws_sold_date_sk#109, d_date_sk#110] +Arguments: [quantity#111, list_price#112], [ws_quantity#107 AS quantity#111, ws_list_price#108 AS list_price#112] + +(115) CometUnion +Child 0 Input [2]: [quantity#99, list_price#100] +Child 1 Input [2]: [quantity#105, list_price#106] +Child 2 Input [2]: [quantity#111, list_price#112] + +(116) CometHashAggregate +Input [2]: [quantity#99, list_price#100] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#99 as decimal(10,0)) * list_price#100))] + +(117) CometExchange +Input [2]: [sum#113, count#114] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(118) CometHashAggregate +Input [2]: [sum#113, count#114] +Keys: [] +Functions [1]: [avg((cast(quantity#99 as decimal(10,0)) * list_price#100))] + +(119) ColumnarToRow [codegen id : 1] +Input [1]: [average_sales#115] + +Subquery:2 Hosting operator id = 79 Hosting Expression = ReusedSubquery Subquery scalar-subquery#47, [id=#48] + +Subquery:3 Hosting operator id = 94 Hosting Expression = ReusedSubquery Subquery scalar-subquery#47, [id=#48] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..f9ba3516f2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14a.native_iceberg_compat/simplified.txt @@ -0,0 +1,126 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales)] + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales),spark_grouping_id,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + CometExchange [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] #1 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sum,isEmpty,sum,sales,number_sales] + CometExpand [channel,i_brand_id,i_class_id,i_category_id] [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] + CometUnion [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + CometFilter [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [average_sales,sum,count,avg((cast(quantity as decimal(10,0)) * list_price))] + CometExchange #13 + CometHashAggregate [sum,count,quantity,list_price] + CometUnion [quantity,list_price] + CometProject [ss_quantity,ss_list_price] [quantity,list_price] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #9 + CometProject [cs_quantity,cs_list_price] [quantity,list_price] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #9 + CometProject [ws_quantity,ws_list_price] [quantity,list_price] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #9 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #2 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometBroadcastExchange [ss_item_sk] #3 + CometProject [i_item_sk] [ss_item_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [brand_id,class_id,category_id] #4 + CometBroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + CometHashAggregate [brand_id,class_id,category_id] + CometExchange [brand_id,class_id,category_id] #5 + CometHashAggregate [brand_id,class_id,category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [brand_id,class_id,category_id] + CometBroadcastHashJoin [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #6 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #7 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [d_date_sk] #9 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + ReusedExchange [d_date_sk] #9 + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #10 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_item_sk,ws_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ws_item_sk,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 + ReusedExchange [d_date_sk] #9 + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,ss_item_sk] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + ReusedExchange [ss_item_sk] #3 + CometBroadcastExchange [d_date_sk] #12 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometProject [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + CometFilter [i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #1 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #14 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,cs_quantity,cs_list_price] + CometProject [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_quantity,cs_list_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk,ss_item_sk] + CometFilter [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + ReusedExchange [ss_item_sk] #3 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + ReusedExchange [d_date_sk] #12 + CometProject [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + CometFilter [i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #1 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #15 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ws_quantity,ws_list_price] + CometProject [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ws_quantity,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk,ss_item_sk] + CometFilter [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [ss_item_sk] #3 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + ReusedExchange [d_date_sk] #12 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_datafusion/explain.txt new file mode 100644 index 0000000000..5d8a967ff0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_datafusion/explain.txt @@ -0,0 +1,542 @@ +== Physical Plan == +* ColumnarToRow (77) ++- CometTakeOrderedAndProject (76) + +- CometBroadcastHashJoin (75) + :- CometFilter (56) + : +- CometHashAggregate (55) + : +- CometExchange (54) + : +- CometHashAggregate (53) + : +- CometProject (52) + : +- CometBroadcastHashJoin (51) + : :- CometProject (46) + : : +- CometBroadcastHashJoin (45) + : : :- CometBroadcastHashJoin (39) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (38) + : : : +- CometProject (37) + : : : +- CometBroadcastHashJoin (36) + : : : :- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : : +- CometBroadcastExchange (35) + : : : +- CometBroadcastHashJoin (34) + : : : :- CometHashAggregate (32) + : : : : +- CometExchange (31) + : : : : +- CometHashAggregate (30) + : : : : +- CometProject (29) + : : : : +- CometBroadcastHashJoin (28) + : : : : :- CometProject (26) + : : : : : +- CometBroadcastHashJoin (25) + : : : : : :- CometFilter (6) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (5) + : : : : : +- CometBroadcastExchange (24) + : : : : : +- CometBroadcastHashJoin (23) + : : : : : :- CometFilter (8) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (7) + : : : : : +- CometBroadcastExchange (22) + : : : : : +- CometProject (21) + : : : : : +- CometBroadcastHashJoin (20) + : : : : : :- CometProject (15) + : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : :- CometFilter (10) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (9) + : : : : : : +- CometBroadcastExchange (13) + : : : : : : +- CometFilter (12) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (11) + : : : : : +- CometBroadcastExchange (19) + : : : : : +- CometProject (18) + : : : : : +- CometFilter (17) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (16) + : : : : +- ReusedExchange (27) + : : : +- ReusedExchange (33) + : : +- CometBroadcastExchange (44) + : : +- CometBroadcastHashJoin (43) + : : :- CometFilter (41) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (40) + : : +- ReusedExchange (42) + : +- CometBroadcastExchange (50) + : +- CometProject (49) + : +- CometFilter (48) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (47) + +- CometBroadcastExchange (74) + +- CometFilter (73) + +- CometHashAggregate (72) + +- CometExchange (71) + +- CometHashAggregate (70) + +- CometProject (69) + +- CometBroadcastHashJoin (68) + :- CometProject (63) + : +- CometBroadcastHashJoin (62) + : :- CometBroadcastHashJoin (60) + : : :- CometFilter (58) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (57) + : : +- ReusedExchange (59) + : +- ReusedExchange (61) + +- CometBroadcastExchange (67) + +- CometProject (66) + +- CometFilter (65) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (64) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(4) CometFilter +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(5) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Arguments: [ss_item_sk#9, ss_sold_date_sk#10] + +(6) CometFilter +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(7) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(8) CometFilter +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(9) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Arguments: [cs_item_sk#15, cs_sold_date_sk#16] + +(10) CometFilter +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(11) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(12) CometFilter +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(13) CometBroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(14) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Right output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_item_sk#15], [i_item_sk#17], Inner, BuildRight + +(15) CometProject +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20], [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] + +(16) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21, d_year#22] + +(17) CometFilter +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(18) CometProject +Input [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(19) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(20) CometBroadcastHashJoin +Left output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Right output [1]: [d_date_sk#21] +Arguments: [cs_sold_date_sk#16], [d_date_sk#21], Inner, BuildRight + +(21) CometProject +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20], [i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) CometBroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20] + +(23) CometBroadcastHashJoin +Left output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)], [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)], LeftSemi, BuildRight + +(24) CometBroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Right output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_item_sk#9], [i_item_sk#11], Inner, BuildRight + +(26) CometProject +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14], [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] + +(27) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#23] + +(28) CometBroadcastHashJoin +Left output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [1]: [d_date_sk#23] +Arguments: [ss_sold_date_sk#10], [d_date_sk#23], Inner, BuildRight + +(29) CometProject +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] +Arguments: [brand_id#24, class_id#25, category_id#26], [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] + +(30) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(31) CometExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(33) ReusedExchange [Reuses operator id: 22] +Output [3]: [i_brand_id#27, i_class_id#28, i_category_id#29] + +(34) CometBroadcastHashJoin +Left output [3]: [brand_id#24, class_id#25, category_id#26] +Right output [3]: [i_brand_id#27, i_class_id#28, i_category_id#29] +Arguments: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)], [coalesce(i_brand_id#27, 0), isnull(i_brand_id#27), coalesce(i_class_id#28, 0), isnull(i_class_id#28), coalesce(i_category_id#29, 0), isnull(i_category_id#29)], LeftSemi, BuildRight + +(35) CometBroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [brand_id#24, class_id#25, category_id#26] + +(36) CometBroadcastHashJoin +Left output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Right output [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [i_brand_id#6, i_class_id#7, i_category_id#8], [brand_id#24, class_id#25, category_id#26], Inner, BuildRight + +(37) CometProject +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] +Arguments: [ss_item_sk#30], [i_item_sk#5 AS ss_item_sk#30] + +(38) CometBroadcastExchange +Input [1]: [ss_item_sk#30] +Arguments: [ss_item_sk#30] + +(39) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [1]: [ss_item_sk#30] +Arguments: [ss_item_sk#1], [ss_item_sk#30], LeftSemi, BuildRight + +(40) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] + +(41) CometFilter +Input [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Condition : (((isnotnull(i_item_sk#31) AND isnotnull(i_brand_id#32)) AND isnotnull(i_class_id#33)) AND isnotnull(i_category_id#34)) + +(42) ReusedExchange [Reuses operator id: 38] +Output [1]: [ss_item_sk#30] + +(43) CometBroadcastHashJoin +Left output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Right output [1]: [ss_item_sk#30] +Arguments: [i_item_sk#31], [ss_item_sk#30], LeftSemi, BuildRight + +(44) CometBroadcastExchange +Input [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] + +(45) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [ss_item_sk#1], [i_item_sk#31], Inner, BuildRight + +(46) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34], [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34] + +(47) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#35, d_week_seq#36] +Arguments: [d_date_sk#35, d_week_seq#36] + +(48) CometFilter +Input [2]: [d_date_sk#35, d_week_seq#36] +Condition : ((isnotnull(d_week_seq#36) AND (d_week_seq#36 = Subquery scalar-subquery#37, [id=#38])) AND isnotnull(d_date_sk#35)) + +(49) CometProject +Input [2]: [d_date_sk#35, d_week_seq#36] +Arguments: [d_date_sk#35], [d_date_sk#35] + +(50) CometBroadcastExchange +Input [1]: [d_date_sk#35] +Arguments: [d_date_sk#35] + +(51) CometBroadcastHashJoin +Left output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34] +Right output [1]: [d_date_sk#35] +Arguments: [ss_sold_date_sk#4], [d_date_sk#35], Inner, BuildRight + +(52) CometProject +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34, d_date_sk#35] +Arguments: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34], [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] + +(53) CometHashAggregate +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] +Keys [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] + +(54) CometExchange +Input [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#39, isEmpty#40, count#41] +Arguments: hashpartitioning(i_brand_id#32, i_class_id#33, i_category_id#34, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(55) CometHashAggregate +Input [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#39, isEmpty#40, count#41] +Keys [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] + +(56) CometFilter +Input [6]: [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44] +Condition : (isnotnull(sales#43) AND (cast(sales#43 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) + +(57) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50] +Arguments: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50] + +(58) CometFilter +Input [4]: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50] +Condition : isnotnull(ss_item_sk#47) + +(59) ReusedExchange [Reuses operator id: 38] +Output [1]: [ss_item_sk#51] + +(60) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50] +Right output [1]: [ss_item_sk#51] +Arguments: [ss_item_sk#47], [ss_item_sk#51], LeftSemi, BuildRight + +(61) ReusedExchange [Reuses operator id: 44] +Output [4]: [i_item_sk#52, i_brand_id#53, i_class_id#54, i_category_id#55] + +(62) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50] +Right output [4]: [i_item_sk#52, i_brand_id#53, i_class_id#54, i_category_id#55] +Arguments: [ss_item_sk#47], [i_item_sk#52], Inner, BuildRight + +(63) CometProject +Input [8]: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50, i_item_sk#52, i_brand_id#53, i_class_id#54, i_category_id#55] +Arguments: [ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50, i_brand_id#53, i_class_id#54, i_category_id#55], [ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50, i_brand_id#53, i_class_id#54, i_category_id#55] + +(64) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#56, d_week_seq#57] +Arguments: [d_date_sk#56, d_week_seq#57] + +(65) CometFilter +Input [2]: [d_date_sk#56, d_week_seq#57] +Condition : ((isnotnull(d_week_seq#57) AND (d_week_seq#57 = Subquery scalar-subquery#58, [id=#59])) AND isnotnull(d_date_sk#56)) + +(66) CometProject +Input [2]: [d_date_sk#56, d_week_seq#57] +Arguments: [d_date_sk#56], [d_date_sk#56] + +(67) CometBroadcastExchange +Input [1]: [d_date_sk#56] +Arguments: [d_date_sk#56] + +(68) CometBroadcastHashJoin +Left output [6]: [ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50, i_brand_id#53, i_class_id#54, i_category_id#55] +Right output [1]: [d_date_sk#56] +Arguments: [ss_sold_date_sk#50], [d_date_sk#56], Inner, BuildRight + +(69) CometProject +Input [7]: [ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50, i_brand_id#53, i_class_id#54, i_category_id#55, d_date_sk#56] +Arguments: [ss_quantity#48, ss_list_price#49, i_brand_id#53, i_class_id#54, i_category_id#55], [ss_quantity#48, ss_list_price#49, i_brand_id#53, i_class_id#54, i_category_id#55] + +(70) CometHashAggregate +Input [5]: [ss_quantity#48, ss_list_price#49, i_brand_id#53, i_class_id#54, i_category_id#55] +Keys [3]: [i_brand_id#53, i_class_id#54, i_category_id#55] +Functions [2]: [partial_sum((cast(ss_quantity#48 as decimal(10,0)) * ss_list_price#49)), partial_count(1)] + +(71) CometExchange +Input [6]: [i_brand_id#53, i_class_id#54, i_category_id#55, sum#60, isEmpty#61, count#62] +Arguments: hashpartitioning(i_brand_id#53, i_class_id#54, i_category_id#55, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(72) CometHashAggregate +Input [6]: [i_brand_id#53, i_class_id#54, i_category_id#55, sum#60, isEmpty#61, count#62] +Keys [3]: [i_brand_id#53, i_class_id#54, i_category_id#55] +Functions [2]: [sum((cast(ss_quantity#48 as decimal(10,0)) * ss_list_price#49)), count(1)] + +(73) CometFilter +Input [6]: [channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] +Condition : (isnotnull(sales#64) AND (cast(sales#64 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) + +(74) CometBroadcastExchange +Input [6]: [channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] +Arguments: [channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] + +(75) CometBroadcastHashJoin +Left output [6]: [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44] +Right output [6]: [channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] +Arguments: [i_brand_id#32, i_class_id#33, i_category_id#34], [i_brand_id#53, i_class_id#54, i_category_id#55], Inner, BuildRight + +(76) CometTakeOrderedAndProject +Input [12]: [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44, channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_brand_id#32 ASC NULLS FIRST,i_class_id#33 ASC NULLS FIRST,i_category_id#34 ASC NULLS FIRST], output=[channel#42,i_brand_id#32,i_class_id#33,i_category_id#34,sales#43,number_sales#44,channel#63,i_brand_id#53,i_class_id#54,i_category_id#55,sales#64,number_sales#65]), [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44, channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65], 100, [i_brand_id#32 ASC NULLS FIRST, i_class_id#33 ASC NULLS FIRST, i_category_id#34 ASC NULLS FIRST], [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44, channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] + +(77) ColumnarToRow [codegen id : 1] +Input [12]: [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44, channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 56 Hosting Expression = Subquery scalar-subquery#45, [id=#46] +* ColumnarToRow (94) ++- CometHashAggregate (93) + +- CometExchange (92) + +- CometHashAggregate (91) + +- CometUnion (90) + :- CometProject (81) + : +- CometBroadcastHashJoin (80) + : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (78) + : +- ReusedExchange (79) + :- CometProject (85) + : +- CometBroadcastHashJoin (84) + : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (82) + : +- ReusedExchange (83) + +- CometProject (89) + +- CometBroadcastHashJoin (88) + :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (86) + +- ReusedExchange (87) + + +(78) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_quantity#66, ss_list_price#67, ss_sold_date_sk#68] +Arguments: [ss_quantity#66, ss_list_price#67, ss_sold_date_sk#68] + +(79) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#69] + +(80) CometBroadcastHashJoin +Left output [3]: [ss_quantity#66, ss_list_price#67, ss_sold_date_sk#68] +Right output [1]: [d_date_sk#69] +Arguments: [ss_sold_date_sk#68], [d_date_sk#69], Inner, BuildRight + +(81) CometProject +Input [4]: [ss_quantity#66, ss_list_price#67, ss_sold_date_sk#68, d_date_sk#69] +Arguments: [quantity#70, list_price#71], [ss_quantity#66 AS quantity#70, ss_list_price#67 AS list_price#71] + +(82) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_quantity#72, cs_list_price#73, cs_sold_date_sk#74] +Arguments: [cs_quantity#72, cs_list_price#73, cs_sold_date_sk#74] + +(83) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#75] + +(84) CometBroadcastHashJoin +Left output [3]: [cs_quantity#72, cs_list_price#73, cs_sold_date_sk#74] +Right output [1]: [d_date_sk#75] +Arguments: [cs_sold_date_sk#74], [d_date_sk#75], Inner, BuildRight + +(85) CometProject +Input [4]: [cs_quantity#72, cs_list_price#73, cs_sold_date_sk#74, d_date_sk#75] +Arguments: [quantity#76, list_price#77], [cs_quantity#72 AS quantity#76, cs_list_price#73 AS list_price#77] + +(86) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] +Arguments: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] + +(87) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#81] + +(88) CometBroadcastHashJoin +Left output [3]: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] +Right output [1]: [d_date_sk#81] +Arguments: [ws_sold_date_sk#80], [d_date_sk#81], Inner, BuildRight + +(89) CometProject +Input [4]: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80, d_date_sk#81] +Arguments: [quantity#82, list_price#83], [ws_quantity#78 AS quantity#82, ws_list_price#79 AS list_price#83] + +(90) CometUnion +Child 0 Input [2]: [quantity#70, list_price#71] +Child 1 Input [2]: [quantity#76, list_price#77] +Child 2 Input [2]: [quantity#82, list_price#83] + +(91) CometHashAggregate +Input [2]: [quantity#70, list_price#71] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#70 as decimal(10,0)) * list_price#71))] + +(92) CometExchange +Input [2]: [sum#84, count#85] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(93) CometHashAggregate +Input [2]: [sum#84, count#85] +Keys: [] +Functions [1]: [avg((cast(quantity#70 as decimal(10,0)) * list_price#71))] + +(94) ColumnarToRow [codegen id : 1] +Input [1]: [average_sales#86] + +Subquery:2 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#37, [id=#38] +* ColumnarToRow (98) ++- CometProject (97) + +- CometFilter (96) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (95) + + +(95) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [4]: [d_week_seq#87, d_year#88, d_moy#89, d_dom#90] +Arguments: [d_week_seq#87, d_year#88, d_moy#89, d_dom#90] + +(96) CometFilter +Input [4]: [d_week_seq#87, d_year#88, d_moy#89, d_dom#90] +Condition : (((((isnotnull(d_year#88) AND isnotnull(d_moy#89)) AND isnotnull(d_dom#90)) AND (d_year#88 = 2000)) AND (d_moy#89 = 12)) AND (d_dom#90 = 11)) + +(97) CometProject +Input [4]: [d_week_seq#87, d_year#88, d_moy#89, d_dom#90] +Arguments: [d_week_seq#87], [d_week_seq#87] + +(98) ColumnarToRow [codegen id : 1] +Input [1]: [d_week_seq#87] + +Subquery:3 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] + +Subquery:4 Hosting operator id = 65 Hosting Expression = Subquery scalar-subquery#58, [id=#59] +* ColumnarToRow (102) ++- CometProject (101) + +- CometFilter (100) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (99) + + +(99) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Arguments: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] + +(100) CometFilter +Input [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Condition : (((((isnotnull(d_year#92) AND isnotnull(d_moy#93)) AND isnotnull(d_dom#94)) AND (d_year#92 = 1999)) AND (d_moy#93 = 12)) AND (d_dom#94 = 11)) + +(101) CometProject +Input [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Arguments: [d_week_seq#91], [d_week_seq#91] + +(102) ColumnarToRow [codegen id : 1] +Input [1]: [d_week_seq#91] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..d5143df8c8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_datafusion/simplified.txt @@ -0,0 +1,114 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + CometBroadcastHashJoin [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Subquery #2 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [average_sales,sum,count,avg((cast(quantity as decimal(10,0)) * list_price))] + CometExchange #11 + CometHashAggregate [sum,count,quantity,list_price] + CometUnion [quantity,list_price] + CometProject [ss_quantity,ss_list_price] [quantity,list_price] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometProject [cs_quantity,cs_list_price] [quantity,list_price] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_quantity,cs_list_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometProject [ws_quantity,ws_list_price] [quantity,list_price] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #1 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometBroadcastExchange [ss_item_sk] #2 + CometProject [i_item_sk] [ss_item_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [brand_id,class_id,category_id] #3 + CometBroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + CometHashAggregate [brand_id,class_id,category_id] + CometExchange [brand_id,class_id,category_id] #4 + CometHashAggregate [brand_id,class_id,category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [brand_id,class_id,category_id] + CometBroadcastHashJoin [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #5 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #6 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [d_date_sk] #8 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + ReusedExchange [d_date_sk] #8 + ReusedExchange [i_brand_id,i_class_id,i_category_id] #6 + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,ss_item_sk] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + ReusedExchange [ss_item_sk] #2 + CometBroadcastExchange [d_date_sk] #10 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year,d_moy,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_week_seq,d_year,d_moy,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq] + CometBroadcastExchange [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] #12 + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #13 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [ss_item_sk] #2 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + CometBroadcastExchange [d_date_sk] #14 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year,d_moy,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_week_seq,d_year,d_moy,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..22c4967421 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_iceberg_compat/explain.txt @@ -0,0 +1,639 @@ +== Physical Plan == +* ColumnarToRow (85) ++- CometTakeOrderedAndProject (84) + +- CometBroadcastHashJoin (83) + :- CometFilter (64) + : +- CometHashAggregate (63) + : +- CometExchange (62) + : +- CometHashAggregate (61) + : +- CometProject (60) + : +- CometBroadcastHashJoin (59) + : :- CometProject (54) + : : +- CometBroadcastHashJoin (53) + : : :- CometBroadcastHashJoin (47) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (46) + : : : +- CometProject (45) + : : : +- CometBroadcastHashJoin (44) + : : : :- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.item (3) + : : : +- CometBroadcastExchange (43) + : : : +- CometBroadcastHashJoin (42) + : : : :- CometHashAggregate (32) + : : : : +- CometExchange (31) + : : : : +- CometHashAggregate (30) + : : : : +- CometProject (29) + : : : : +- CometBroadcastHashJoin (28) + : : : : :- CometProject (26) + : : : : : +- CometBroadcastHashJoin (25) + : : : : : :- CometFilter (6) + : : : : : : +- CometScan parquet spark_catalog.default.store_sales (5) + : : : : : +- CometBroadcastExchange (24) + : : : : : +- CometBroadcastHashJoin (23) + : : : : : :- CometFilter (8) + : : : : : : +- CometScan parquet spark_catalog.default.item (7) + : : : : : +- CometBroadcastExchange (22) + : : : : : +- CometProject (21) + : : : : : +- CometBroadcastHashJoin (20) + : : : : : :- CometProject (15) + : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : :- CometFilter (10) + : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (9) + : : : : : : +- CometBroadcastExchange (13) + : : : : : : +- CometFilter (12) + : : : : : : +- CometScan parquet spark_catalog.default.item (11) + : : : : : +- CometBroadcastExchange (19) + : : : : : +- CometProject (18) + : : : : : +- CometFilter (17) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (16) + : : : : +- ReusedExchange (27) + : : : +- CometBroadcastExchange (41) + : : : +- CometProject (40) + : : : +- CometBroadcastHashJoin (39) + : : : :- CometProject (37) + : : : : +- CometBroadcastHashJoin (36) + : : : : :- CometFilter (34) + : : : : : +- CometScan parquet spark_catalog.default.web_sales (33) + : : : : +- ReusedExchange (35) + : : : +- ReusedExchange (38) + : : +- CometBroadcastExchange (52) + : : +- CometBroadcastHashJoin (51) + : : :- CometFilter (49) + : : : +- CometScan parquet spark_catalog.default.item (48) + : : +- ReusedExchange (50) + : +- CometBroadcastExchange (58) + : +- CometProject (57) + : +- CometFilter (56) + : +- CometScan parquet spark_catalog.default.date_dim (55) + +- CometBroadcastExchange (82) + +- CometFilter (81) + +- CometHashAggregate (80) + +- CometExchange (79) + +- CometHashAggregate (78) + +- CometProject (77) + +- CometBroadcastHashJoin (76) + :- CometProject (71) + : +- CometBroadcastHashJoin (70) + : :- CometBroadcastHashJoin (68) + : : :- CometFilter (66) + : : : +- CometScan parquet spark_catalog.default.store_sales (65) + : : +- ReusedExchange (67) + : +- ReusedExchange (69) + +- CometBroadcastExchange (75) + +- CometProject (74) + +- CometFilter (73) + +- CometScan parquet spark_catalog.default.date_dim (72) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(5) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) CometFilter +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(7) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(8) CometFilter +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(9) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(11) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) CometFilter +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(13) CometBroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(14) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Right output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_item_sk#15], [i_item_sk#17], Inner, BuildRight + +(15) CometProject +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20], [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] + +(16) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) CometFilter +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(18) CometProject +Input [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(19) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(20) CometBroadcastHashJoin +Left output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Right output [1]: [d_date_sk#21] +Arguments: [cs_sold_date_sk#16], [d_date_sk#21], Inner, BuildRight + +(21) CometProject +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20], [i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) CometBroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20] + +(23) CometBroadcastHashJoin +Left output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)], [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)], LeftSemi, BuildRight + +(24) CometBroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Right output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_item_sk#9], [i_item_sk#11], Inner, BuildRight + +(26) CometProject +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14], [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] + +(27) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#23] + +(28) CometBroadcastHashJoin +Left output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [1]: [d_date_sk#23] +Arguments: [ss_sold_date_sk#10], [d_date_sk#23], Inner, BuildRight + +(29) CometProject +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] +Arguments: [brand_id#24, class_id#25, category_id#26], [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] + +(30) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(31) CometExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(33) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(34) CometFilter +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(35) ReusedExchange [Reuses operator id: 13] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(36) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Right output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [ws_item_sk#27], [i_item_sk#29], Inner, BuildRight + +(37) CometProject +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32], [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] + +(38) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#33] + +(39) CometBroadcastHashJoin +Left output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Right output [1]: [d_date_sk#33] +Arguments: [ws_sold_date_sk#28], [d_date_sk#33], Inner, BuildRight + +(40) CometProject +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] +Arguments: [i_brand_id#30, i_class_id#31, i_category_id#32], [i_brand_id#30, i_class_id#31, i_category_id#32] + +(41) CometBroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [i_brand_id#30, i_class_id#31, i_category_id#32] + +(42) CometBroadcastHashJoin +Left output [3]: [brand_id#24, class_id#25, category_id#26] +Right output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)], [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)], LeftSemi, BuildRight + +(43) CometBroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [brand_id#24, class_id#25, category_id#26] + +(44) CometBroadcastHashJoin +Left output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Right output [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [i_brand_id#6, i_class_id#7, i_category_id#8], [brand_id#24, class_id#25, category_id#26], Inner, BuildRight + +(45) CometProject +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] +Arguments: [ss_item_sk#34], [i_item_sk#5 AS ss_item_sk#34] + +(46) CometBroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: [ss_item_sk#34] + +(47) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [1]: [ss_item_sk#34] +Arguments: [ss_item_sk#1], [ss_item_sk#34], LeftSemi, BuildRight + +(48) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(49) CometFilter +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : (((isnotnull(i_item_sk#35) AND isnotnull(i_brand_id#36)) AND isnotnull(i_class_id#37)) AND isnotnull(i_category_id#38)) + +(50) ReusedExchange [Reuses operator id: 46] +Output [1]: [ss_item_sk#34] + +(51) CometBroadcastHashJoin +Left output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Right output [1]: [ss_item_sk#34] +Arguments: [i_item_sk#35], [ss_item_sk#34], LeftSemi, BuildRight + +(52) CometBroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(53) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [ss_item_sk#1], [i_item_sk#35], Inner, BuildRight + +(54) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38], [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] + +(55) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#39, d_week_seq#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(56) CometFilter +Input [2]: [d_date_sk#39, d_week_seq#40] +Condition : ((isnotnull(d_week_seq#40) AND (d_week_seq#40 = Subquery scalar-subquery#41, [id=#42])) AND isnotnull(d_date_sk#39)) + +(57) CometProject +Input [2]: [d_date_sk#39, d_week_seq#40] +Arguments: [d_date_sk#39], [d_date_sk#39] + +(58) CometBroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: [d_date_sk#39] + +(59) CometBroadcastHashJoin +Left output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Right output [1]: [d_date_sk#39] +Arguments: [ss_sold_date_sk#4], [d_date_sk#39], Inner, BuildRight + +(60) CometProject +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] +Arguments: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38], [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] + +(61) CometHashAggregate +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] + +(62) CometExchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#43, isEmpty#44, count#45] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(63) CometHashAggregate +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#43, isEmpty#44, count#45] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] + +(64) CometFilter +Input [6]: [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48] +Condition : (isnotnull(sales#47) AND (cast(sales#47 as decimal(32,6)) > cast(Subquery scalar-subquery#49, [id=#50] as decimal(32,6)))) + +(65) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#51, ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#54)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(66) CometFilter +Input [4]: [ss_item_sk#51, ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54] +Condition : isnotnull(ss_item_sk#51) + +(67) ReusedExchange [Reuses operator id: 46] +Output [1]: [ss_item_sk#55] + +(68) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#51, ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54] +Right output [1]: [ss_item_sk#55] +Arguments: [ss_item_sk#51], [ss_item_sk#55], LeftSemi, BuildRight + +(69) ReusedExchange [Reuses operator id: 52] +Output [4]: [i_item_sk#56, i_brand_id#57, i_class_id#58, i_category_id#59] + +(70) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#51, ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54] +Right output [4]: [i_item_sk#56, i_brand_id#57, i_class_id#58, i_category_id#59] +Arguments: [ss_item_sk#51], [i_item_sk#56], Inner, BuildRight + +(71) CometProject +Input [8]: [ss_item_sk#51, ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54, i_item_sk#56, i_brand_id#57, i_class_id#58, i_category_id#59] +Arguments: [ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54, i_brand_id#57, i_class_id#58, i_category_id#59], [ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54, i_brand_id#57, i_class_id#58, i_category_id#59] + +(72) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#60, d_week_seq#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(73) CometFilter +Input [2]: [d_date_sk#60, d_week_seq#61] +Condition : ((isnotnull(d_week_seq#61) AND (d_week_seq#61 = Subquery scalar-subquery#62, [id=#63])) AND isnotnull(d_date_sk#60)) + +(74) CometProject +Input [2]: [d_date_sk#60, d_week_seq#61] +Arguments: [d_date_sk#60], [d_date_sk#60] + +(75) CometBroadcastExchange +Input [1]: [d_date_sk#60] +Arguments: [d_date_sk#60] + +(76) CometBroadcastHashJoin +Left output [6]: [ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54, i_brand_id#57, i_class_id#58, i_category_id#59] +Right output [1]: [d_date_sk#60] +Arguments: [ss_sold_date_sk#54], [d_date_sk#60], Inner, BuildRight + +(77) CometProject +Input [7]: [ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54, i_brand_id#57, i_class_id#58, i_category_id#59, d_date_sk#60] +Arguments: [ss_quantity#52, ss_list_price#53, i_brand_id#57, i_class_id#58, i_category_id#59], [ss_quantity#52, ss_list_price#53, i_brand_id#57, i_class_id#58, i_category_id#59] + +(78) CometHashAggregate +Input [5]: [ss_quantity#52, ss_list_price#53, i_brand_id#57, i_class_id#58, i_category_id#59] +Keys [3]: [i_brand_id#57, i_class_id#58, i_category_id#59] +Functions [2]: [partial_sum((cast(ss_quantity#52 as decimal(10,0)) * ss_list_price#53)), partial_count(1)] + +(79) CometExchange +Input [6]: [i_brand_id#57, i_class_id#58, i_category_id#59, sum#64, isEmpty#65, count#66] +Arguments: hashpartitioning(i_brand_id#57, i_class_id#58, i_category_id#59, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(80) CometHashAggregate +Input [6]: [i_brand_id#57, i_class_id#58, i_category_id#59, sum#64, isEmpty#65, count#66] +Keys [3]: [i_brand_id#57, i_class_id#58, i_category_id#59] +Functions [2]: [sum((cast(ss_quantity#52 as decimal(10,0)) * ss_list_price#53)), count(1)] + +(81) CometFilter +Input [6]: [channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] +Condition : (isnotnull(sales#68) AND (cast(sales#68 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#49, [id=#50] as decimal(32,6)))) + +(82) CometBroadcastExchange +Input [6]: [channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] +Arguments: [channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] + +(83) CometBroadcastHashJoin +Left output [6]: [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48] +Right output [6]: [channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] +Arguments: [i_brand_id#36, i_class_id#37, i_category_id#38], [i_brand_id#57, i_class_id#58, i_category_id#59], Inner, BuildRight + +(84) CometTakeOrderedAndProject +Input [12]: [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48, channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_brand_id#36 ASC NULLS FIRST,i_class_id#37 ASC NULLS FIRST,i_category_id#38 ASC NULLS FIRST], output=[channel#46,i_brand_id#36,i_class_id#37,i_category_id#38,sales#47,number_sales#48,channel#67,i_brand_id#57,i_class_id#58,i_category_id#59,sales#68,number_sales#69]), [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48, channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69], 100, [i_brand_id#36 ASC NULLS FIRST, i_class_id#37 ASC NULLS FIRST, i_category_id#38 ASC NULLS FIRST], [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48, channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] + +(85) ColumnarToRow [codegen id : 1] +Input [12]: [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48, channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquery#49, [id=#50] +* ColumnarToRow (102) ++- CometHashAggregate (101) + +- CometExchange (100) + +- CometHashAggregate (99) + +- CometUnion (98) + :- CometProject (89) + : +- CometBroadcastHashJoin (88) + : :- CometScan parquet spark_catalog.default.store_sales (86) + : +- ReusedExchange (87) + :- CometProject (93) + : +- CometBroadcastHashJoin (92) + : :- CometScan parquet spark_catalog.default.catalog_sales (90) + : +- ReusedExchange (91) + +- CometProject (97) + +- CometBroadcastHashJoin (96) + :- CometScan parquet spark_catalog.default.web_sales (94) + +- ReusedExchange (95) + + +(86) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#70, ss_list_price#71, ss_sold_date_sk#72] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#72)] +ReadSchema: struct + +(87) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#73] + +(88) CometBroadcastHashJoin +Left output [3]: [ss_quantity#70, ss_list_price#71, ss_sold_date_sk#72] +Right output [1]: [d_date_sk#73] +Arguments: [ss_sold_date_sk#72], [d_date_sk#73], Inner, BuildRight + +(89) CometProject +Input [4]: [ss_quantity#70, ss_list_price#71, ss_sold_date_sk#72, d_date_sk#73] +Arguments: [quantity#74, list_price#75], [ss_quantity#70 AS quantity#74, ss_list_price#71 AS list_price#75] + +(90) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#76, cs_list_price#77, cs_sold_date_sk#78] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#78)] +ReadSchema: struct + +(91) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#79] + +(92) CometBroadcastHashJoin +Left output [3]: [cs_quantity#76, cs_list_price#77, cs_sold_date_sk#78] +Right output [1]: [d_date_sk#79] +Arguments: [cs_sold_date_sk#78], [d_date_sk#79], Inner, BuildRight + +(93) CometProject +Input [4]: [cs_quantity#76, cs_list_price#77, cs_sold_date_sk#78, d_date_sk#79] +Arguments: [quantity#80, list_price#81], [cs_quantity#76 AS quantity#80, cs_list_price#77 AS list_price#81] + +(94) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#82, ws_list_price#83, ws_sold_date_sk#84] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#84)] +ReadSchema: struct + +(95) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#85] + +(96) CometBroadcastHashJoin +Left output [3]: [ws_quantity#82, ws_list_price#83, ws_sold_date_sk#84] +Right output [1]: [d_date_sk#85] +Arguments: [ws_sold_date_sk#84], [d_date_sk#85], Inner, BuildRight + +(97) CometProject +Input [4]: [ws_quantity#82, ws_list_price#83, ws_sold_date_sk#84, d_date_sk#85] +Arguments: [quantity#86, list_price#87], [ws_quantity#82 AS quantity#86, ws_list_price#83 AS list_price#87] + +(98) CometUnion +Child 0 Input [2]: [quantity#74, list_price#75] +Child 1 Input [2]: [quantity#80, list_price#81] +Child 2 Input [2]: [quantity#86, list_price#87] + +(99) CometHashAggregate +Input [2]: [quantity#74, list_price#75] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#74 as decimal(10,0)) * list_price#75))] + +(100) CometExchange +Input [2]: [sum#88, count#89] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(101) CometHashAggregate +Input [2]: [sum#88, count#89] +Keys: [] +Functions [1]: [avg((cast(quantity#74 as decimal(10,0)) * list_price#75))] + +(102) ColumnarToRow [codegen id : 1] +Input [1]: [average_sales#90] + +Subquery:2 Hosting operator id = 56 Hosting Expression = Subquery scalar-subquery#41, [id=#42] +* ColumnarToRow (106) ++- CometProject (105) + +- CometFilter (104) + +- CometScan parquet spark_catalog.default.date_dim (103) + + +(103) CometScan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(104) CometFilter +Input [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Condition : (((((isnotnull(d_year#92) AND isnotnull(d_moy#93)) AND isnotnull(d_dom#94)) AND (d_year#92 = 2000)) AND (d_moy#93 = 12)) AND (d_dom#94 = 11)) + +(105) CometProject +Input [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Arguments: [d_week_seq#91], [d_week_seq#91] + +(106) ColumnarToRow [codegen id : 1] +Input [1]: [d_week_seq#91] + +Subquery:3 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#49, [id=#50] + +Subquery:4 Hosting operator id = 73 Hosting Expression = Subquery scalar-subquery#62, [id=#63] +* ColumnarToRow (110) ++- CometProject (109) + +- CometFilter (108) + +- CometScan parquet spark_catalog.default.date_dim (107) + + +(107) CometScan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#95, d_year#96, d_moy#97, d_dom#98] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(108) CometFilter +Input [4]: [d_week_seq#95, d_year#96, d_moy#97, d_dom#98] +Condition : (((((isnotnull(d_year#96) AND isnotnull(d_moy#97)) AND isnotnull(d_dom#98)) AND (d_year#96 = 1999)) AND (d_moy#97 = 12)) AND (d_dom#98 = 11)) + +(109) CometProject +Input [4]: [d_week_seq#95, d_year#96, d_moy#97, d_dom#98] +Arguments: [d_week_seq#95], [d_week_seq#95] + +(110) ColumnarToRow [codegen id : 1] +Input [1]: [d_week_seq#95] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8511334b1a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q14b.native_iceberg_compat/simplified.txt @@ -0,0 +1,122 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + CometBroadcastHashJoin [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Subquery #2 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [average_sales,sum,count,avg((cast(quantity as decimal(10,0)) * list_price))] + CometExchange #12 + CometHashAggregate [sum,count,quantity,list_price] + CometUnion [quantity,list_price] + CometProject [ss_quantity,ss_list_price] [quantity,list_price] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometProject [cs_quantity,cs_list_price] [quantity,list_price] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometProject [ws_quantity,ws_list_price] [quantity,list_price] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #1 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometBroadcastExchange [ss_item_sk] #2 + CometProject [i_item_sk] [ss_item_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [brand_id,class_id,category_id] #3 + CometBroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + CometHashAggregate [brand_id,class_id,category_id] + CometExchange [brand_id,class_id,category_id] #4 + CometHashAggregate [brand_id,class_id,category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [brand_id,class_id,category_id] + CometBroadcastHashJoin [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #5 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #6 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [d_date_sk] #8 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + ReusedExchange [d_date_sk] #8 + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #9 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_item_sk,ws_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ws_item_sk,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + ReusedExchange [d_date_sk] #8 + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,ss_item_sk] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + ReusedExchange [ss_item_sk] #2 + CometBroadcastExchange [d_date_sk] #11 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year,d_moy,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + CometBroadcastExchange [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] #13 + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #14 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [ss_item_sk] #2 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + CometBroadcastExchange [d_date_sk] #15 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year,d_moy,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_datafusion/explain.txt new file mode 100644 index 0000000000..9bdb602daf --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_datafusion/explain.txt @@ -0,0 +1,122 @@ +== Physical Plan == +* ColumnarToRow (23) ++- CometTakeOrderedAndProject (22) + +- CometHashAggregate (21) + +- CometExchange (20) + +- CometHashAggregate (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (12) + : +- CometBroadcastHashJoin (11) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (3) + : +- CometBroadcastExchange (10) + : +- CometFilter (9) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (8) + +- CometBroadcastExchange (16) + +- CometProject (15) + +- CometFilter (14) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Arguments: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] + +(2) CometFilter +Input [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_bill_customer_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#4, c_current_addr_sk#5] + +(4) CometFilter +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_customer_sk#4) AND isnotnull(c_current_addr_sk#5)) + +(5) CometBroadcastExchange +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#4, c_current_addr_sk#5] + +(6) CometBroadcastHashJoin +Left output [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Right output [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#4], Inner, BuildRight + +(7) CometProject +Input [5]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3, c_customer_sk#4, c_current_addr_sk#5] +Arguments: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5], [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5] + +(8) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [ca_address_sk#6, ca_state#7, ca_zip#8] + +(9) CometFilter +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Condition : isnotnull(ca_address_sk#6) + +(10) CometBroadcastExchange +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [ca_address_sk#6, ca_state#7, ca_zip#8] + +(11) CometBroadcastHashJoin +Left output [3]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5] +Right output [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [c_current_addr_sk#5], [ca_address_sk#6], Inner, ((substr(ca_zip#8, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#7 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00)), BuildRight + +(12) CometProject +Input [6]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5, ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8], [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Arguments: [d_date_sk#9, d_year#10, d_qoy#11] + +(14) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Condition : ((((isnotnull(d_qoy#11) AND isnotnull(d_year#10)) AND (d_qoy#11 = 2)) AND (d_year#10 = 2001)) AND isnotnull(d_date_sk#9)) + +(15) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(17) CometBroadcastHashJoin +Left output [3]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8] +Right output [1]: [d_date_sk#9] +Arguments: [cs_sold_date_sk#3], [d_date_sk#9], Inner, BuildRight + +(18) CometProject +Input [4]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8, d_date_sk#9] +Arguments: [cs_sales_price#2, ca_zip#8], [cs_sales_price#2, ca_zip#8] + +(19) CometHashAggregate +Input [2]: [cs_sales_price#2, ca_zip#8] +Keys [1]: [ca_zip#8] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#2))] + +(20) CometExchange +Input [2]: [ca_zip#8, sum#12] +Arguments: hashpartitioning(ca_zip#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [2]: [ca_zip#8, sum#12] +Keys [1]: [ca_zip#8] +Functions [1]: [sum(UnscaledValue(cs_sales_price#2))] + +(22) CometTakeOrderedAndProject +Input [2]: [ca_zip#8, sum(cs_sales_price)#13] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ca_zip#8 ASC NULLS FIRST], output=[ca_zip#8,sum(cs_sales_price)#13]), [ca_zip#8, sum(cs_sales_price)#13], 100, [ca_zip#8 ASC NULLS FIRST], [ca_zip#8, sum(cs_sales_price)#13] + +(23) ColumnarToRow [codegen id : 1] +Input [2]: [ca_zip#8, sum(cs_sales_price)#13] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_datafusion/simplified.txt new file mode 100644 index 0000000000..df01bd6848 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_datafusion/simplified.txt @@ -0,0 +1,25 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [ca_zip,sum(cs_sales_price)] + CometHashAggregate [ca_zip,sum(cs_sales_price),sum,sum(UnscaledValue(cs_sales_price))] + CometExchange [ca_zip] #1 + CometHashAggregate [ca_zip,sum,cs_sales_price] + CometProject [cs_sales_price,ca_zip] + CometBroadcastHashJoin [cs_sales_price,cs_sold_date_sk,ca_zip,d_date_sk] + CometProject [cs_sales_price,cs_sold_date_sk,ca_zip] + CometBroadcastHashJoin [cs_sales_price,cs_sold_date_sk,c_current_addr_sk,ca_address_sk,ca_state,ca_zip] + CometProject [cs_sales_price,cs_sold_date_sk,c_current_addr_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,c_customer_sk,c_current_addr_sk] + CometFilter [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #2 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk,ca_state,ca_zip] #3 + CometFilter [ca_address_sk,ca_state,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_zip] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..955232ed4f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_iceberg_compat/explain.txt @@ -0,0 +1,135 @@ +== Physical Plan == +* ColumnarToRow (23) ++- CometTakeOrderedAndProject (22) + +- CometHashAggregate (21) + +- CometExchange (20) + +- CometHashAggregate (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (12) + : +- CometBroadcastHashJoin (11) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.customer (3) + : +- CometBroadcastExchange (10) + : +- CometFilter (9) + : +- CometScan parquet spark_catalog.default.customer_address (8) + +- CometBroadcastExchange (16) + +- CometProject (15) + +- CometFilter (14) + +- CometScan parquet spark_catalog.default.date_dim (13) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_bill_customer_sk#1) + +(3) CometScan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_customer_sk#4) AND isnotnull(c_current_addr_sk#5)) + +(5) CometBroadcastExchange +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#4, c_current_addr_sk#5] + +(6) CometBroadcastHashJoin +Left output [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Right output [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#4], Inner, BuildRight + +(7) CometProject +Input [5]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3, c_customer_sk#4, c_current_addr_sk#5] +Arguments: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5], [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5] + +(8) CometScan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Condition : isnotnull(ca_address_sk#6) + +(10) CometBroadcastExchange +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [ca_address_sk#6, ca_state#7, ca_zip#8] + +(11) CometBroadcastHashJoin +Left output [3]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5] +Right output [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [c_current_addr_sk#5], [ca_address_sk#6], Inner, ((substr(ca_zip#8, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#7 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00)), BuildRight + +(12) CometProject +Input [6]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5, ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8], [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Condition : ((((isnotnull(d_qoy#11) AND isnotnull(d_year#10)) AND (d_qoy#11 = 2)) AND (d_year#10 = 2001)) AND isnotnull(d_date_sk#9)) + +(15) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(17) CometBroadcastHashJoin +Left output [3]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8] +Right output [1]: [d_date_sk#9] +Arguments: [cs_sold_date_sk#3], [d_date_sk#9], Inner, BuildRight + +(18) CometProject +Input [4]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8, d_date_sk#9] +Arguments: [cs_sales_price#2, ca_zip#8], [cs_sales_price#2, ca_zip#8] + +(19) CometHashAggregate +Input [2]: [cs_sales_price#2, ca_zip#8] +Keys [1]: [ca_zip#8] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#2))] + +(20) CometExchange +Input [2]: [ca_zip#8, sum#12] +Arguments: hashpartitioning(ca_zip#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [2]: [ca_zip#8, sum#12] +Keys [1]: [ca_zip#8] +Functions [1]: [sum(UnscaledValue(cs_sales_price#2))] + +(22) CometTakeOrderedAndProject +Input [2]: [ca_zip#8, sum(cs_sales_price)#13] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ca_zip#8 ASC NULLS FIRST], output=[ca_zip#8,sum(cs_sales_price)#13]), [ca_zip#8, sum(cs_sales_price)#13], 100, [ca_zip#8 ASC NULLS FIRST], [ca_zip#8, sum(cs_sales_price)#13] + +(23) ColumnarToRow [codegen id : 1] +Input [2]: [ca_zip#8, sum(cs_sales_price)#13] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..eecab46601 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q15.native_iceberg_compat/simplified.txt @@ -0,0 +1,25 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [ca_zip,sum(cs_sales_price)] + CometHashAggregate [ca_zip,sum(cs_sales_price),sum,sum(UnscaledValue(cs_sales_price))] + CometExchange [ca_zip] #1 + CometHashAggregate [ca_zip,sum,cs_sales_price] + CometProject [cs_sales_price,ca_zip] + CometBroadcastHashJoin [cs_sales_price,cs_sold_date_sk,ca_zip,d_date_sk] + CometProject [cs_sales_price,cs_sold_date_sk,ca_zip] + CometBroadcastHashJoin [cs_sales_price,cs_sold_date_sk,c_current_addr_sk,ca_address_sk,ca_state,ca_zip] + CometProject [cs_sales_price,cs_sold_date_sk,c_current_addr_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,c_customer_sk,c_current_addr_sk] + CometFilter [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #2 + CometFilter [c_customer_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk,ca_state,ca_zip] #3 + CometFilter [ca_address_sk,ca_state,ca_zip] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_zip] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_datafusion/explain.txt new file mode 100644 index 0000000000..3fd94211dd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_datafusion/explain.txt @@ -0,0 +1,217 @@ +== Physical Plan == +* HashAggregate (40) ++- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- * ColumnarToRow (36) + +- CometHashAggregate (35) + +- CometProject (34) + +- CometBroadcastHashJoin (33) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (22) + : : +- CometBroadcastHashJoin (21) + : : :- CometSortMergeJoin (16) + : : : :- CometProject (11) + : : : : +- CometSortMergeJoin (10) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : +- CometSort (9) + : : : : +- CometExchange (8) + : : : : +- CometProject (7) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (6) + : : : +- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (12) + : : +- CometBroadcastExchange (20) + : : +- CometProject (19) + : : +- CometFilter (18) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (17) + : +- CometBroadcastExchange (26) + : +- CometProject (25) + : +- CometFilter (24) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (23) + +- CometBroadcastExchange (32) + +- CometProject (31) + +- CometFilter (30) + +- CometNativeScan: `spark_catalog`.`default`.`call_center` (29) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] + +(2) CometFilter +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Condition : ((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3)) + +(3) CometProject +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(4) CometExchange +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_order_number#5 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] +Arguments: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] + +(7) CometProject +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] +Arguments: [cs_warehouse_sk#9, cs_order_number#10], [cs_warehouse_sk#9, cs_order_number#10] + +(8) CometExchange +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: hashpartitioning(cs_order_number#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: [cs_warehouse_sk#9, cs_order_number#10], [cs_order_number#10 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: [cs_order_number#5], [cs_order_number#10], LeftSemi, NOT (cs_warehouse_sk#4 = cs_warehouse_sk#9) + +(11) CometProject +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(12) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [2]: [cr_order_number#12, cr_returned_date_sk#13] +Arguments: [cr_order_number#12, cr_returned_date_sk#13] + +(13) CometProject +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] +Arguments: [cr_order_number#12], [cr_order_number#12] + +(14) CometExchange +Input [1]: [cr_order_number#12] +Arguments: hashpartitioning(cr_order_number#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(15) CometSort +Input [1]: [cr_order_number#12] +Arguments: [cr_order_number#12], [cr_order_number#12 ASC NULLS FIRST] + +(16) CometSortMergeJoin +Left output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [cr_order_number#12] +Arguments: [cs_order_number#5], [cr_order_number#12], LeftAnti + +(17) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#14, d_date#15] +Arguments: [d_date_sk#14, d_date#15] + +(18) CometFilter +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 2002-02-01)) AND (d_date#15 <= 2002-04-02)) AND isnotnull(d_date_sk#14)) + +(19) CometProject +Input [2]: [d_date_sk#14, d_date#15] +Arguments: [d_date_sk#14], [d_date_sk#14] + +(20) CometBroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [d_date_sk#14] +Arguments: [cs_ship_date_sk#1], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#14] +Arguments: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(23) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16, ca_state#17] + +(24) CometFilter +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = GA)) AND isnotnull(ca_address_sk#16)) + +(25) CometProject +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16], [ca_address_sk#16] + +(26) CometBroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: [ca_address_sk#16] + +(27) CometBroadcastHashJoin +Left output [5]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [ca_address_sk#16] +Arguments: [cs_ship_addr_sk#2], [ca_address_sk#16], Inner, BuildRight + +(28) CometProject +Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#16] +Arguments: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(29) CometNativeScan: `spark_catalog`.`default`.`call_center` +Output [2]: [cc_call_center_sk#18, cc_county#19] +Arguments: [cc_call_center_sk#18, cc_county#19] + +(30) CometFilter +Input [2]: [cc_call_center_sk#18, cc_county#19] +Condition : ((isnotnull(cc_county#19) AND (cc_county#19 = Williamson County)) AND isnotnull(cc_call_center_sk#18)) + +(31) CometProject +Input [2]: [cc_call_center_sk#18, cc_county#19] +Arguments: [cc_call_center_sk#18], [cc_call_center_sk#18] + +(32) CometBroadcastExchange +Input [1]: [cc_call_center_sk#18] +Arguments: [cc_call_center_sk#18] + +(33) CometBroadcastHashJoin +Left output [4]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [cc_call_center_sk#18] +Arguments: [cs_call_center_sk#3], [cc_call_center_sk#18], Inner, BuildRight + +(34) CometProject +Input [5]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#18] +Arguments: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(35) CometHashAggregate +Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Keys [1]: [cs_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] + +(36) ColumnarToRow [codegen id : 1] +Input [3]: [cs_order_number#5, sum#20, sum#21] + +(37) HashAggregate [codegen id : 1] +Input [3]: [cs_order_number#5, sum#20, sum#21] +Keys [1]: [cs_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23] +Results [3]: [cs_order_number#5, sum#20, sum#21] + +(38) HashAggregate [codegen id : 1] +Input [3]: [cs_order_number#5, sum#20, sum#21] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#24] +Results [3]: [sum#20, sum#21, count#25] + +(39) Exchange +Input [3]: [sum#20, sum#21, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 2] +Input [3]: [sum#20, sum#21, count#25] +Keys: [] +Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#24] +Results [3]: [count(cs_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8a0dd1344a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_datafusion/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (2) + HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,count] + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometHashAggregate [cs_order_number,sum,sum,cs_ext_ship_cost,cs_net_profit] + CometProject [cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometBroadcastHashJoin [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cc_call_center_sk] + CometProject [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometBroadcastHashJoin [cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,ca_address_sk] + CometProject [cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometBroadcastHashJoin [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,d_date_sk] + CometSortMergeJoin [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cr_order_number] + CometProject [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometSortMergeJoin [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_warehouse_sk] + CometSort [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometExchange [cs_order_number] #2 + CometProject [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometFilter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk] + CometSort [cs_warehouse_sk,cs_order_number] + CometExchange [cs_order_number] #3 + CometProject [cs_warehouse_sk,cs_order_number] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_warehouse_sk,cs_order_number,cs_sold_date_sk] + CometSort [cr_order_number] + CometExchange [cr_order_number] #4 + CometProject [cr_order_number] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_order_number,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [cc_call_center_sk] #7 + CometProject [cc_call_center_sk] + CometFilter [cc_call_center_sk,cc_county] + CometNativeScan: `spark_catalog`.`default`.`call_center` [cc_call_center_sk,cc_county] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..acd12b2771 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_iceberg_compat/explain.txt @@ -0,0 +1,233 @@ +== Physical Plan == +* HashAggregate (40) ++- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- * ColumnarToRow (36) + +- CometHashAggregate (35) + +- CometProject (34) + +- CometBroadcastHashJoin (33) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (22) + : : +- CometBroadcastHashJoin (21) + : : :- CometSortMergeJoin (16) + : : : :- CometProject (11) + : : : : +- CometSortMergeJoin (10) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : : +- CometSort (9) + : : : : +- CometExchange (8) + : : : : +- CometProject (7) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (6) + : : : +- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometScan parquet spark_catalog.default.catalog_returns (12) + : : +- CometBroadcastExchange (20) + : : +- CometProject (19) + : : +- CometFilter (18) + : : +- CometScan parquet spark_catalog.default.date_dim (17) + : +- CometBroadcastExchange (26) + : +- CometProject (25) + : +- CometFilter (24) + : +- CometScan parquet spark_catalog.default.customer_address (23) + +- CometBroadcastExchange (32) + +- CometProject (31) + +- CometFilter (30) + +- CometScan parquet spark_catalog.default.call_center (29) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Condition : ((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3)) + +(3) CometProject +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(4) CometExchange +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_order_number#5 ASC NULLS FIRST] + +(6) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +ReadSchema: struct + +(7) CometProject +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] +Arguments: [cs_warehouse_sk#9, cs_order_number#10], [cs_warehouse_sk#9, cs_order_number#10] + +(8) CometExchange +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: hashpartitioning(cs_order_number#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: [cs_warehouse_sk#9, cs_order_number#10], [cs_order_number#10 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: [cs_order_number#5], [cs_order_number#10], LeftSemi, NOT (cs_warehouse_sk#4 = cs_warehouse_sk#9) + +(11) CometProject +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(12) CometScan parquet spark_catalog.default.catalog_returns +Output [2]: [cr_order_number#12, cr_returned_date_sk#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +ReadSchema: struct + +(13) CometProject +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] +Arguments: [cr_order_number#12], [cr_order_number#12] + +(14) CometExchange +Input [1]: [cr_order_number#12] +Arguments: hashpartitioning(cr_order_number#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(15) CometSort +Input [1]: [cr_order_number#12] +Arguments: [cr_order_number#12], [cr_order_number#12 ASC NULLS FIRST] + +(16) CometSortMergeJoin +Left output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [cr_order_number#12] +Arguments: [cs_order_number#5], [cr_order_number#12], LeftAnti + +(17) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#14, d_date#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) CometFilter +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 2002-02-01)) AND (d_date#15 <= 2002-04-02)) AND isnotnull(d_date_sk#14)) + +(19) CometProject +Input [2]: [d_date_sk#14, d_date#15] +Arguments: [d_date_sk#14], [d_date_sk#14] + +(20) CometBroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [d_date_sk#14] +Arguments: [cs_ship_date_sk#1], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#14] +Arguments: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(23) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#16, ca_state#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(24) CometFilter +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = GA)) AND isnotnull(ca_address_sk#16)) + +(25) CometProject +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16], [ca_address_sk#16] + +(26) CometBroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: [ca_address_sk#16] + +(27) CometBroadcastHashJoin +Left output [5]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [ca_address_sk#16] +Arguments: [cs_ship_addr_sk#2], [ca_address_sk#16], Inner, BuildRight + +(28) CometProject +Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#16] +Arguments: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(29) CometScan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#18, cc_county#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(30) CometFilter +Input [2]: [cc_call_center_sk#18, cc_county#19] +Condition : ((isnotnull(cc_county#19) AND (cc_county#19 = Williamson County)) AND isnotnull(cc_call_center_sk#18)) + +(31) CometProject +Input [2]: [cc_call_center_sk#18, cc_county#19] +Arguments: [cc_call_center_sk#18], [cc_call_center_sk#18] + +(32) CometBroadcastExchange +Input [1]: [cc_call_center_sk#18] +Arguments: [cc_call_center_sk#18] + +(33) CometBroadcastHashJoin +Left output [4]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [cc_call_center_sk#18] +Arguments: [cs_call_center_sk#3], [cc_call_center_sk#18], Inner, BuildRight + +(34) CometProject +Input [5]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#18] +Arguments: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(35) CometHashAggregate +Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Keys [1]: [cs_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] + +(36) ColumnarToRow [codegen id : 1] +Input [3]: [cs_order_number#5, sum#20, sum#21] + +(37) HashAggregate [codegen id : 1] +Input [3]: [cs_order_number#5, sum#20, sum#21] +Keys [1]: [cs_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23] +Results [3]: [cs_order_number#5, sum#20, sum#21] + +(38) HashAggregate [codegen id : 1] +Input [3]: [cs_order_number#5, sum#20, sum#21] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#24] +Results [3]: [sum#20, sum#21, count#25] + +(39) Exchange +Input [3]: [sum#20, sum#21, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 2] +Input [3]: [sum#20, sum#21, count#25] +Keys: [] +Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#24] +Results [3]: [count(cs_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..f054ee03e9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q16.native_iceberg_compat/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (2) + HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,count] + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometHashAggregate [cs_order_number,sum,sum,cs_ext_ship_cost,cs_net_profit] + CometProject [cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometBroadcastHashJoin [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cc_call_center_sk] + CometProject [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometBroadcastHashJoin [cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,ca_address_sk] + CometProject [cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometBroadcastHashJoin [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,d_date_sk] + CometSortMergeJoin [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cr_order_number] + CometProject [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometSortMergeJoin [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_warehouse_sk] + CometSort [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometExchange [cs_order_number] #2 + CometProject [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometFilter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk] + CometSort [cs_warehouse_sk,cs_order_number] + CometExchange [cs_order_number] #3 + CometProject [cs_warehouse_sk,cs_order_number] + CometScan parquet spark_catalog.default.catalog_sales [cs_warehouse_sk,cs_order_number,cs_sold_date_sk] + CometSort [cr_order_number] + CometExchange [cr_order_number] #4 + CometProject [cr_order_number] + CometScan parquet spark_catalog.default.catalog_returns [cr_order_number,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + CometBroadcastExchange [cc_call_center_sk] #7 + CometProject [cc_call_center_sk] + CometFilter [cc_call_center_sk,cc_county] + CometScan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_county] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_datafusion/explain.txt new file mode 100644 index 0000000000..8d26f05153 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_datafusion/explain.txt @@ -0,0 +1,220 @@ +== Physical Plan == +* ColumnarToRow (42) ++- CometTakeOrderedAndProject (41) + +- CometHashAggregate (40) + +- CometExchange (39) + +- CometHashAggregate (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (32) + : +- CometBroadcastHashJoin (31) + : :- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (8) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometProject (15) + : : : : +- CometFilter (14) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : : : +- CometBroadcastExchange (22) + : : : +- CometProject (21) + : : : +- CometFilter (20) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (19) + : : +- ReusedExchange (25) + : +- CometBroadcastExchange (30) + : +- CometFilter (29) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (28) + +- CometBroadcastExchange (35) + +- CometFilter (34) + +- CometNativeScan: `spark_catalog`.`default`.`item` (33) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(4) CometFilter +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(5) CometBroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Right output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4], [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] + +(8) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(9) CometFilter +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(10) CometBroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(11) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] +Right output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [sr_customer_sk#8, sr_item_sk#7], [cs_bill_customer_sk#12, cs_item_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#16, d_quarter_name#17] +Arguments: [d_date_sk#16, d_quarter_name#17] + +(14) CometFilter +Input [2]: [d_date_sk#16, d_quarter_name#17] +Condition : ((isnotnull(d_quarter_name#17) AND (d_quarter_name#17 = 2001Q1)) AND isnotnull(d_date_sk#16)) + +(15) CometProject +Input [2]: [d_date_sk#16, d_quarter_name#17] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(17) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [ss_sold_date_sk#6], [d_date_sk#16], Inner, BuildRight + +(18) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(19) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#18, d_quarter_name#19] +Arguments: [d_date_sk#18, d_quarter_name#19] + +(20) CometFilter +Input [2]: [d_date_sk#18, d_quarter_name#19] +Condition : (d_quarter_name#19 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#18)) + +(21) CometProject +Input [2]: [d_date_sk#18, d_quarter_name#19] +Arguments: [d_date_sk#18], [d_date_sk#18] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: [d_date_sk#18] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#18] +Arguments: [sr_returned_date_sk#11], [d_date_sk#18], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#18] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] + +(25) ReusedExchange [Reuses operator id: 22] +Output [1]: [d_date_sk#20] + +(26) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#20] +Arguments: [cs_sold_date_sk#15], [d_date_sk#20], Inner, BuildRight + +(27) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#20] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] + +(28) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#21, s_state#22] +Arguments: [s_store_sk#21, s_state#22] + +(29) CometFilter +Input [2]: [s_store_sk#21, s_state#22] +Condition : isnotnull(s_store_sk#21) + +(30) CometBroadcastExchange +Input [2]: [s_store_sk#21, s_state#22] +Arguments: [s_store_sk#21, s_state#22] + +(31) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] +Right output [2]: [s_store_sk#21, s_state#22] +Arguments: [ss_store_sk#3], [s_store_sk#21], Inner, BuildRight + +(32) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_sk#21, s_state#22] +Arguments: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22], [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22] + +(33) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [i_item_sk#23, i_item_id#24, i_item_desc#25] + +(34) CometFilter +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Condition : isnotnull(i_item_sk#23) + +(35) CometBroadcastExchange +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [i_item_sk#23, i_item_id#24, i_item_desc#25] + +(36) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22] +Right output [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [ss_item_sk#1], [i_item_sk#23], Inner, BuildRight + +(37) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25], [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] + +(38) CometHashAggregate +Input [6]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#10), partial_avg(sr_return_quantity#10), partial_stddev_samp(cast(sr_return_quantity#10 as double)), partial_count(cs_quantity#14), partial_avg(cs_quantity#14), partial_stddev_samp(cast(cs_quantity#14 as double))] + +(39) CometExchange +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#26, sum#27, count#28, n#29, avg#30, m2#31, count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43] +Arguments: hashpartitioning(i_item_id#24, i_item_desc#25, s_state#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(40) CometHashAggregate +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#26, sum#27, count#28, n#29, avg#30, m2#31, count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#10), avg(sr_return_quantity#10), stddev_samp(cast(sr_return_quantity#10 as double)), count(cs_quantity#14), avg(cs_quantity#14), stddev_samp(cast(cs_quantity#14 as double))] + +(41) CometTakeOrderedAndProject +Input [15]: [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#44, store_sales_quantityave#45, store_sales_quantitystdev#46, store_sales_quantitycov#47, as_store_returns_quantitycount#48, as_store_returns_quantityave#49, as_store_returns_quantitystdev#50, store_returns_quantitycov#51, catalog_sales_quantitycount#52, catalog_sales_quantityave#53, catalog_sales_quantitystdev#54, catalog_sales_quantitycov#55] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#24 ASC NULLS FIRST,i_item_desc#25 ASC NULLS FIRST,s_state#22 ASC NULLS FIRST], output=[i_item_id#24,i_item_desc#25,s_state#22,store_sales_quantitycount#44,store_sales_quantityave#45,store_sales_quantitystdev#46,store_sales_quantitycov#47,as_store_returns_quantitycount#48,as_store_returns_quantityave#49,as_store_returns_quantitystdev#50,store_returns_quantitycov#51,catalog_sales_quantitycount#52,catalog_sales_quantityave#53,catalog_sales_quantitystdev#54,catalog_sales_quantitycov#55]), [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#44, store_sales_quantityave#45, store_sales_quantitystdev#46, store_sales_quantitycov#47, as_store_returns_quantitycount#48, as_store_returns_quantityave#49, as_store_returns_quantitystdev#50, store_returns_quantitycov#51, catalog_sales_quantitycount#52, catalog_sales_quantityave#53, catalog_sales_quantitystdev#54, catalog_sales_quantitycov#55], 100, [i_item_id#24 ASC NULLS FIRST, i_item_desc#25 ASC NULLS FIRST, s_state#22 ASC NULLS FIRST], [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#44, store_sales_quantityave#45, store_sales_quantitystdev#46, store_sales_quantitycov#47, as_store_returns_quantitycount#48, as_store_returns_quantityave#49, as_store_returns_quantitystdev#50, store_returns_quantitycov#51, catalog_sales_quantitycount#52, catalog_sales_quantityave#53, catalog_sales_quantitystdev#54, catalog_sales_quantitycov#55] + +(42) ColumnarToRow [codegen id : 1] +Input [15]: [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#44, store_sales_quantityave#45, store_sales_quantitystdev#46, store_sales_quantitycov#47, as_store_returns_quantitycount#48, as_store_returns_quantityave#49, as_store_returns_quantitystdev#50, store_returns_quantitycov#51, catalog_sales_quantitycount#52, catalog_sales_quantityave#53, catalog_sales_quantitystdev#54, catalog_sales_quantitycov#55] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_datafusion/simplified.txt new file mode 100644 index 0000000000..746826546e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_datafusion/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov] + CometHashAggregate [i_item_id,i_item_desc,s_state,store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count(ss_quantity),avg(ss_quantity),stddev_samp(cast(ss_quantity as double)),count(sr_return_quantity),avg(sr_return_quantity),stddev_samp(cast(sr_return_quantity as double)),count(cs_quantity),avg(cs_quantity),stddev_samp(cast(cs_quantity as double))] + CometExchange [i_item_id,i_item_desc,s_state] #1 + CometHashAggregate [i_item_id,i_item_desc,s_state,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,ss_quantity,sr_return_quantity,cs_quantity] + CometProject [ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_id,i_item_desc] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_sk,i_item_id,i_item_desc] + CometProject [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_sk,s_state] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] #3 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_quarter_name] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_quarter_name] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_quarter_name] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_quarter_name] + ReusedExchange [d_date_sk] #5 + CometBroadcastExchange [s_store_sk,s_state] #6 + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc] #7 + CometFilter [i_item_sk,i_item_id,i_item_desc] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..14f9b20c46 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_iceberg_compat/explain.txt @@ -0,0 +1,244 @@ +== Physical Plan == +* ColumnarToRow (42) ++- CometTakeOrderedAndProject (41) + +- CometHashAggregate (40) + +- CometExchange (39) + +- CometHashAggregate (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (32) + : +- CometBroadcastHashJoin (31) + : :- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometScan parquet spark_catalog.default.store_returns (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (8) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometProject (15) + : : : : +- CometFilter (14) + : : : : +- CometScan parquet spark_catalog.default.date_dim (13) + : : : +- CometBroadcastExchange (22) + : : : +- CometProject (21) + : : : +- CometFilter (20) + : : : +- CometScan parquet spark_catalog.default.date_dim (19) + : : +- ReusedExchange (25) + : +- CometBroadcastExchange (30) + : +- CometFilter (29) + : +- CometScan parquet spark_catalog.default.store (28) + +- CometBroadcastExchange (35) + +- CometFilter (34) + +- CometScan parquet spark_catalog.default.item (33) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(3) CometScan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#11)] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(4) CometFilter +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(5) CometBroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Right output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4], [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] + +(8) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(9) CometFilter +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(10) CometBroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(11) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] +Right output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [sr_customer_sk#8, sr_item_sk#7], [cs_bill_customer_sk#12, cs_item_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#16, d_quarter_name#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [d_date_sk#16, d_quarter_name#17] +Condition : ((isnotnull(d_quarter_name#17) AND (d_quarter_name#17 = 2001Q1)) AND isnotnull(d_date_sk#16)) + +(15) CometProject +Input [2]: [d_date_sk#16, d_quarter_name#17] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(17) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [ss_sold_date_sk#6], [d_date_sk#16], Inner, BuildRight + +(18) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(19) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#18, d_quarter_name#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(20) CometFilter +Input [2]: [d_date_sk#18, d_quarter_name#19] +Condition : (d_quarter_name#19 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#18)) + +(21) CometProject +Input [2]: [d_date_sk#18, d_quarter_name#19] +Arguments: [d_date_sk#18], [d_date_sk#18] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: [d_date_sk#18] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#18] +Arguments: [sr_returned_date_sk#11], [d_date_sk#18], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#18] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] + +(25) ReusedExchange [Reuses operator id: 22] +Output [1]: [d_date_sk#20] + +(26) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#20] +Arguments: [cs_sold_date_sk#15], [d_date_sk#20], Inner, BuildRight + +(27) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#20] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] + +(28) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#21, s_state#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(29) CometFilter +Input [2]: [s_store_sk#21, s_state#22] +Condition : isnotnull(s_store_sk#21) + +(30) CometBroadcastExchange +Input [2]: [s_store_sk#21, s_state#22] +Arguments: [s_store_sk#21, s_state#22] + +(31) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] +Right output [2]: [s_store_sk#21, s_state#22] +Arguments: [ss_store_sk#3], [s_store_sk#21], Inner, BuildRight + +(32) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_sk#21, s_state#22] +Arguments: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22], [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22] + +(33) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(34) CometFilter +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Condition : isnotnull(i_item_sk#23) + +(35) CometBroadcastExchange +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [i_item_sk#23, i_item_id#24, i_item_desc#25] + +(36) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22] +Right output [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [ss_item_sk#1], [i_item_sk#23], Inner, BuildRight + +(37) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25], [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] + +(38) CometHashAggregate +Input [6]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#10), partial_avg(sr_return_quantity#10), partial_stddev_samp(cast(sr_return_quantity#10 as double)), partial_count(cs_quantity#14), partial_avg(cs_quantity#14), partial_stddev_samp(cast(cs_quantity#14 as double))] + +(39) CometExchange +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#26, sum#27, count#28, n#29, avg#30, m2#31, count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43] +Arguments: hashpartitioning(i_item_id#24, i_item_desc#25, s_state#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(40) CometHashAggregate +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#26, sum#27, count#28, n#29, avg#30, m2#31, count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#10), avg(sr_return_quantity#10), stddev_samp(cast(sr_return_quantity#10 as double)), count(cs_quantity#14), avg(cs_quantity#14), stddev_samp(cast(cs_quantity#14 as double))] + +(41) CometTakeOrderedAndProject +Input [15]: [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#44, store_sales_quantityave#45, store_sales_quantitystdev#46, store_sales_quantitycov#47, as_store_returns_quantitycount#48, as_store_returns_quantityave#49, as_store_returns_quantitystdev#50, store_returns_quantitycov#51, catalog_sales_quantitycount#52, catalog_sales_quantityave#53, catalog_sales_quantitystdev#54, catalog_sales_quantitycov#55] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#24 ASC NULLS FIRST,i_item_desc#25 ASC NULLS FIRST,s_state#22 ASC NULLS FIRST], output=[i_item_id#24,i_item_desc#25,s_state#22,store_sales_quantitycount#44,store_sales_quantityave#45,store_sales_quantitystdev#46,store_sales_quantitycov#47,as_store_returns_quantitycount#48,as_store_returns_quantityave#49,as_store_returns_quantitystdev#50,store_returns_quantitycov#51,catalog_sales_quantitycount#52,catalog_sales_quantityave#53,catalog_sales_quantitystdev#54,catalog_sales_quantitycov#55]), [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#44, store_sales_quantityave#45, store_sales_quantitystdev#46, store_sales_quantitycov#47, as_store_returns_quantitycount#48, as_store_returns_quantityave#49, as_store_returns_quantitystdev#50, store_returns_quantitycov#51, catalog_sales_quantitycount#52, catalog_sales_quantityave#53, catalog_sales_quantitystdev#54, catalog_sales_quantitycov#55], 100, [i_item_id#24 ASC NULLS FIRST, i_item_desc#25 ASC NULLS FIRST, s_state#22 ASC NULLS FIRST], [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#44, store_sales_quantityave#45, store_sales_quantitystdev#46, store_sales_quantitycov#47, as_store_returns_quantitycount#48, as_store_returns_quantityave#49, as_store_returns_quantitystdev#50, store_returns_quantitycov#51, catalog_sales_quantitycount#52, catalog_sales_quantityave#53, catalog_sales_quantitystdev#54, catalog_sales_quantitycov#55] + +(42) ColumnarToRow [codegen id : 1] +Input [15]: [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#44, store_sales_quantityave#45, store_sales_quantitystdev#46, store_sales_quantitycov#47, as_store_returns_quantitycount#48, as_store_returns_quantityave#49, as_store_returns_quantitystdev#50, store_returns_quantitycov#51, catalog_sales_quantitycount#52, catalog_sales_quantityave#53, catalog_sales_quantitystdev#54, catalog_sales_quantitycov#55] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..403bd6df87 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q17.native_iceberg_compat/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov] + CometHashAggregate [i_item_id,i_item_desc,s_state,store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count(ss_quantity),avg(ss_quantity),stddev_samp(cast(ss_quantity as double)),count(sr_return_quantity),avg(sr_return_quantity),stddev_samp(cast(sr_return_quantity as double)),count(cs_quantity),avg(cs_quantity),stddev_samp(cast(cs_quantity as double))] + CometExchange [i_item_id,i_item_desc,s_state] #1 + CometHashAggregate [i_item_id,i_item_desc,s_state,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,ss_quantity,sr_return_quantity,cs_quantity] + CometProject [ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_id,i_item_desc] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_sk,i_item_id,i_item_desc] + CometProject [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_sk,s_state] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] #3 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_quarter_name] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_quarter_name] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_quarter_name] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_quarter_name] + ReusedExchange [d_date_sk] #5 + CometBroadcastExchange [s_store_sk,s_state] #6 + CometFilter [s_store_sk,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_state] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc] #7 + CometFilter [i_item_sk,i_item_id,i_item_desc] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_datafusion/explain.txt new file mode 100644 index 0000000000..c4203a3d92 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_datafusion/explain.txt @@ -0,0 +1,215 @@ +== Physical Plan == +* ColumnarToRow (41) ++- CometTakeOrderedAndProject (40) + +- CometHashAggregate (39) + +- CometExchange (38) + +- CometHashAggregate (37) + +- CometExpand (36) + +- CometProject (35) + +- CometBroadcastHashJoin (34) + :- CometProject (30) + : +- CometBroadcastHashJoin (29) + : :- CometProject (24) + : : +- CometBroadcastHashJoin (23) + : : :- CometProject (19) + : : : +- CometBroadcastHashJoin (18) + : : : :- CometProject (14) + : : : : +- CometBroadcastHashJoin (13) + : : : : :- CometProject (8) + : : : : : +- CometBroadcastHashJoin (7) + : : : : : :- CometFilter (2) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : : +- CometBroadcastExchange (6) + : : : : : +- CometProject (5) + : : : : : +- CometFilter (4) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (3) + : : : : +- CometBroadcastExchange (12) + : : : : +- CometProject (11) + : : : : +- CometFilter (10) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (9) + : : : +- CometBroadcastExchange (17) + : : : +- CometFilter (16) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (15) + : : +- CometBroadcastExchange (22) + : : +- CometFilter (21) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (20) + : +- CometBroadcastExchange (28) + : +- CometProject (27) + : +- CometFilter (26) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (25) + +- CometBroadcastExchange (33) + +- CometFilter (32) + +- CometNativeScan: `spark_catalog`.`default`.`item` (31) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Arguments: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(2) CometFilter +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(4) CometFilter +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = F)) AND (cd_education_status#12 = Unknown )) AND isnotnull(cd_demo_sk#10)) + +(5) CometProject +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_dep_count#13], [cd_demo_sk#10, cd_dep_count#13] + +(6) CometBroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_dep_count#13] + +(7) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Right output [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: [cs_bill_cdemo_sk#2], [cd_demo_sk#10], Inner, BuildRight + +(8) CometProject +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] +Arguments: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13], [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(10) CometFilter +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Condition : (((c_birth_month#17 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#14)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_addr_sk#16)) + +(11) CometProject +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18], [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(12) CometBroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(13) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Right output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#14], Inner, BuildRight + +(14) CometProject +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(15) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [1]: [cd_demo_sk#19] +Arguments: [cd_demo_sk#19] + +(16) CometFilter +Input [1]: [cd_demo_sk#19] +Condition : isnotnull(cd_demo_sk#19) + +(17) CometBroadcastExchange +Input [1]: [cd_demo_sk#19] +Arguments: [cd_demo_sk#19] + +(18) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Right output [1]: [cd_demo_sk#19] +Arguments: [c_current_cdemo_sk#15], [cd_demo_sk#19], Inner, BuildRight + +(19) CometProject +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] + +(20) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(21) CometFilter +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#20)) + +(22) CometBroadcastExchange +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(23) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Right output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [c_current_addr_sk#16], [ca_address_sk#20], Inner, BuildRight + +(24) CometProject +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] + +(25) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#24, d_year#25] +Arguments: [d_date_sk#24, d_year#25] + +(26) CometFilter +Input [2]: [d_date_sk#24, d_year#25] +Condition : ((isnotnull(d_year#25) AND (d_year#25 = 1998)) AND isnotnull(d_date_sk#24)) + +(27) CometProject +Input [2]: [d_date_sk#24, d_year#25] +Arguments: [d_date_sk#24], [d_date_sk#24] + +(28) CometBroadcastExchange +Input [1]: [d_date_sk#24] +Arguments: [d_date_sk#24] + +(29) CometBroadcastHashJoin +Left output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Right output [1]: [d_date_sk#24] +Arguments: [cs_sold_date_sk#9], [d_date_sk#24], Inner, BuildRight + +(30) CometProject +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, d_date_sk#24] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] + +(31) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#26, i_item_id#27] +Arguments: [i_item_sk#26, i_item_id#27] + +(32) CometFilter +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) + +(33) CometBroadcastExchange +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: [i_item_sk#26, i_item_id#27] + +(34) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Right output [2]: [i_item_sk#26, i_item_id#27] +Arguments: [cs_item_sk#3], [i_item_sk#26], Inner, BuildRight + +(35) CometProject +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] +Arguments: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21] + +(36) CometExpand +Input [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Arguments: [[cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21, 0], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, null, 1], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, null, null, 3], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, null, null, null, 7], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, null, null, null, null, 15]], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] + +(37) CometHashAggregate +Input [12]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [partial_avg(cast(cs_quantity#4 as decimal(12,2))), partial_avg(cast(cs_list_price#5 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#7 as decimal(12,2))), partial_avg(cast(cs_sales_price#6 as decimal(12,2))), partial_avg(cast(cs_net_profit#8 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#13 as decimal(12,2)))] + +(38) CometExchange +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46] +Arguments: hashpartitioning(i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(39) CometHashAggregate +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [avg(cast(cs_quantity#4 as decimal(12,2))), avg(cast(cs_list_price#5 as decimal(12,2))), avg(cast(cs_coupon_amt#7 as decimal(12,2))), avg(cast(cs_sales_price#6 as decimal(12,2))), avg(cast(cs_net_profit#8 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#13 as decimal(12,2)))] + +(40) CometTakeOrderedAndProject +Input [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#47, agg2#48, agg3#49, agg4#50, agg5#51, agg6#52, agg7#53] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ca_country#29 ASC NULLS FIRST,ca_state#30 ASC NULLS FIRST,ca_county#31 ASC NULLS FIRST,i_item_id#28 ASC NULLS FIRST], output=[i_item_id#28,ca_country#29,ca_state#30,ca_county#31,agg1#47,agg2#48,agg3#49,agg4#50,agg5#51,agg6#52,agg7#53]), [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#47, agg2#48, agg3#49, agg4#50, agg5#51, agg6#52, agg7#53], 100, [ca_country#29 ASC NULLS FIRST, ca_state#30 ASC NULLS FIRST, ca_county#31 ASC NULLS FIRST, i_item_id#28 ASC NULLS FIRST], [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#47, agg2#48, agg3#49, agg4#50, agg5#51, agg6#52, agg7#53] + +(41) ColumnarToRow [codegen id : 1] +Input [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#47, agg2#48, agg3#49, agg4#50, agg5#51, agg6#52, agg7#53] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_datafusion/simplified.txt new file mode 100644 index 0000000000..e795eab5b8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_datafusion/simplified.txt @@ -0,0 +1,43 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometHashAggregate [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2)))] + CometExchange [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id] #1 + CometHashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + CometExpand [i_item_id,ca_country,ca_state,ca_county] [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county,spark_grouping_id] + CometProject [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk,ca_county,ca_state,ca_country] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometBroadcastExchange [cd_demo_sk,cd_dep_count] #2 + CometProject [cd_demo_sk,cd_dep_count] + CometFilter [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + CometProject [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + CometBroadcastExchange [cd_demo_sk] #4 + CometFilter [cd_demo_sk] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk] + CometBroadcastExchange [ca_address_sk,ca_county,ca_state,ca_country] #5 + CometFilter [ca_address_sk,ca_county,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_county,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #6 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #7 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..36cd51bb51 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_iceberg_compat/explain.txt @@ -0,0 +1,237 @@ +== Physical Plan == +* ColumnarToRow (41) ++- CometTakeOrderedAndProject (40) + +- CometHashAggregate (39) + +- CometExchange (38) + +- CometHashAggregate (37) + +- CometExpand (36) + +- CometProject (35) + +- CometBroadcastHashJoin (34) + :- CometProject (30) + : +- CometBroadcastHashJoin (29) + : :- CometProject (24) + : : +- CometBroadcastHashJoin (23) + : : :- CometProject (19) + : : : +- CometBroadcastHashJoin (18) + : : : :- CometProject (14) + : : : : +- CometBroadcastHashJoin (13) + : : : : :- CometProject (8) + : : : : : +- CometBroadcastHashJoin (7) + : : : : : :- CometFilter (2) + : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : : : +- CometBroadcastExchange (6) + : : : : : +- CometProject (5) + : : : : : +- CometFilter (4) + : : : : : +- CometScan parquet spark_catalog.default.customer_demographics (3) + : : : : +- CometBroadcastExchange (12) + : : : : +- CometProject (11) + : : : : +- CometFilter (10) + : : : : +- CometScan parquet spark_catalog.default.customer (9) + : : : +- CometBroadcastExchange (17) + : : : +- CometFilter (16) + : : : +- CometScan parquet spark_catalog.default.customer_demographics (15) + : : +- CometBroadcastExchange (22) + : : +- CometFilter (21) + : : +- CometScan parquet spark_catalog.default.customer_address (20) + : +- CometBroadcastExchange (28) + : +- CometProject (27) + : +- CometFilter (26) + : +- CometScan parquet spark_catalog.default.date_dim (25) + +- CometBroadcastExchange (33) + +- CometFilter (32) + +- CometScan parquet spark_catalog.default.item (31) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(3) CometScan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_education_status,Unknown ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = F)) AND (cd_education_status#12 = Unknown )) AND isnotnull(cd_demo_sk#10)) + +(5) CometProject +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_dep_count#13], [cd_demo_sk#10, cd_dep_count#13] + +(6) CometBroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_dep_count#13] + +(7) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Right output [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: [cs_bill_cdemo_sk#2], [cd_demo_sk#10], Inner, BuildRight + +(8) CometProject +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] +Arguments: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13], [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] + +(9) CometScan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [In(c_birth_month, [1,12,2,6,8,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(10) CometFilter +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Condition : (((c_birth_month#17 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#14)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_addr_sk#16)) + +(11) CometProject +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18], [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(12) CometBroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(13) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Right output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#14], Inner, BuildRight + +(14) CometProject +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(15) CometScan parquet spark_catalog.default.customer_demographics +Output [1]: [cd_demo_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(16) CometFilter +Input [1]: [cd_demo_sk#19] +Condition : isnotnull(cd_demo_sk#19) + +(17) CometBroadcastExchange +Input [1]: [cd_demo_sk#19] +Arguments: [cd_demo_sk#19] + +(18) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Right output [1]: [cd_demo_sk#19] +Arguments: [c_current_cdemo_sk#15], [cd_demo_sk#19], Inner, BuildRight + +(19) CometProject +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] + +(20) CometScan parquet spark_catalog.default.customer_address +Output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [IN,MS,ND,NM,OK,VA]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(21) CometFilter +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#20)) + +(22) CometBroadcastExchange +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(23) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Right output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [c_current_addr_sk#16], [ca_address_sk#20], Inner, BuildRight + +(24) CometProject +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] + +(25) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#24, d_year#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(26) CometFilter +Input [2]: [d_date_sk#24, d_year#25] +Condition : ((isnotnull(d_year#25) AND (d_year#25 = 1998)) AND isnotnull(d_date_sk#24)) + +(27) CometProject +Input [2]: [d_date_sk#24, d_year#25] +Arguments: [d_date_sk#24], [d_date_sk#24] + +(28) CometBroadcastExchange +Input [1]: [d_date_sk#24] +Arguments: [d_date_sk#24] + +(29) CometBroadcastHashJoin +Left output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Right output [1]: [d_date_sk#24] +Arguments: [cs_sold_date_sk#9], [d_date_sk#24], Inner, BuildRight + +(30) CometProject +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, d_date_sk#24] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] + +(31) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#26, i_item_id#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(32) CometFilter +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) + +(33) CometBroadcastExchange +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: [i_item_sk#26, i_item_id#27] + +(34) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Right output [2]: [i_item_sk#26, i_item_id#27] +Arguments: [cs_item_sk#3], [i_item_sk#26], Inner, BuildRight + +(35) CometProject +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] +Arguments: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21] + +(36) CometExpand +Input [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Arguments: [[cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21, 0], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, null, 1], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, null, null, 3], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, null, null, null, 7], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, null, null, null, null, 15]], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] + +(37) CometHashAggregate +Input [12]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [partial_avg(cast(cs_quantity#4 as decimal(12,2))), partial_avg(cast(cs_list_price#5 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#7 as decimal(12,2))), partial_avg(cast(cs_sales_price#6 as decimal(12,2))), partial_avg(cast(cs_net_profit#8 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#13 as decimal(12,2)))] + +(38) CometExchange +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46] +Arguments: hashpartitioning(i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(39) CometHashAggregate +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [avg(cast(cs_quantity#4 as decimal(12,2))), avg(cast(cs_list_price#5 as decimal(12,2))), avg(cast(cs_coupon_amt#7 as decimal(12,2))), avg(cast(cs_sales_price#6 as decimal(12,2))), avg(cast(cs_net_profit#8 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#13 as decimal(12,2)))] + +(40) CometTakeOrderedAndProject +Input [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#47, agg2#48, agg3#49, agg4#50, agg5#51, agg6#52, agg7#53] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ca_country#29 ASC NULLS FIRST,ca_state#30 ASC NULLS FIRST,ca_county#31 ASC NULLS FIRST,i_item_id#28 ASC NULLS FIRST], output=[i_item_id#28,ca_country#29,ca_state#30,ca_county#31,agg1#47,agg2#48,agg3#49,agg4#50,agg5#51,agg6#52,agg7#53]), [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#47, agg2#48, agg3#49, agg4#50, agg5#51, agg6#52, agg7#53], 100, [ca_country#29 ASC NULLS FIRST, ca_state#30 ASC NULLS FIRST, ca_county#31 ASC NULLS FIRST, i_item_id#28 ASC NULLS FIRST], [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#47, agg2#48, agg3#49, agg4#50, agg5#51, agg6#52, agg7#53] + +(41) ColumnarToRow [codegen id : 1] +Input [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#47, agg2#48, agg3#49, agg4#50, agg5#51, agg6#52, agg7#53] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..d52a088c8e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q18.native_iceberg_compat/simplified.txt @@ -0,0 +1,43 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometHashAggregate [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2)))] + CometExchange [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id] #1 + CometHashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + CometExpand [i_item_id,ca_country,ca_state,ca_county] [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county,spark_grouping_id] + CometProject [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk,ca_county,ca_state,ca_country] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometBroadcastExchange [cd_demo_sk,cd_dep_count] #2 + CometProject [cd_demo_sk,cd_dep_count] + CometFilter [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + CometProject [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + CometBroadcastExchange [cd_demo_sk] #4 + CometFilter [cd_demo_sk] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk] + CometBroadcastExchange [ca_address_sk,ca_county,ca_state,ca_country] #5 + CometFilter [ca_address_sk,ca_county,ca_state,ca_country] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #6 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #7 + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_datafusion/explain.txt new file mode 100644 index 0000000000..eb29ad43d0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_datafusion/explain.txt @@ -0,0 +1,168 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometTakeOrderedAndProject (31) + +- CometHashAggregate (30) + +- CometExchange (29) + +- CometHashAggregate (28) + +- CometProject (27) + +- CometBroadcastHashJoin (26) + :- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometProject (14) + : : : +- CometBroadcastHashJoin (13) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + : : : +- CometBroadcastExchange (12) + : : : +- CometProject (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + : : +- CometBroadcastExchange (17) + : : +- CometFilter (16) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (15) + : +- CometBroadcastExchange (22) + : +- CometFilter (21) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (20) + +- ReusedExchange (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2, d_moy#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1998)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1], [d_date_sk#1] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] + +(5) CometFilter +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_item_sk#4) AND isnotnull(ss_customer_sk#5)) AND isnotnull(ss_store_sk#6)) + +(6) CometBroadcastExchange +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] + +(7) CometBroadcastHashJoin +Left output [1]: [d_date_sk#1] +Right output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [d_date_sk#1], [ss_sold_date_sk#8], Inner, BuildRight + +(8) CometProject +Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7], [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Arguments: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] + +(10) CometFilter +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Condition : ((isnotnull(i_manager_id#14) AND (i_manager_id#14 = 8)) AND isnotnull(i_item_sk#9)) + +(11) CometProject +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Arguments: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13], [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(12) CometBroadcastExchange +Input [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7] +Right output [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: [ss_item_sk#4], [i_item_sk#9], Inner, BuildRight + +(14) CometProject +Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13], [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(15) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: [c_customer_sk#15, c_current_addr_sk#16] + +(16) CometFilter +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_current_addr_sk#16)) + +(17) CometBroadcastExchange +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: [c_customer_sk#15, c_current_addr_sk#16] + +(18) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Right output [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: [ss_customer_sk#5], [c_customer_sk#15], Inner, BuildRight + +(19) CometProject +Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_customer_sk#15, c_current_addr_sk#16] +Arguments: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16], [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16] + +(20) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#17, ca_zip#18] +Arguments: [ca_address_sk#17, ca_zip#18] + +(21) CometFilter +Input [2]: [ca_address_sk#17, ca_zip#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_zip#18)) + +(22) CometBroadcastExchange +Input [2]: [ca_address_sk#17, ca_zip#18] +Arguments: [ca_address_sk#17, ca_zip#18] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16] +Right output [2]: [ca_address_sk#17, ca_zip#18] +Arguments: [c_current_addr_sk#16], [ca_address_sk#17], Inner, BuildRight + +(24) CometProject +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16, ca_address_sk#17, ca_zip#18] +Arguments: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18], [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18] + +(25) ReusedExchange [Reuses operator id: 22] +Output [2]: [s_store_sk#19, s_zip#20] + +(26) CometBroadcastHashJoin +Left output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18] +Right output [2]: [s_store_sk#19, s_zip#20] +Arguments: [ss_store_sk#6], [s_store_sk#19], Inner, NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#20, 1, 5)), BuildRight + +(27) CometProject +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18, s_store_sk#19, s_zip#20] +Arguments: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13], [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(28) CometHashAggregate +Input [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] + +(29) CometExchange +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#21] +Arguments: hashpartitioning(i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(30) CometHashAggregate +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#21] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] + +(31) CometTakeOrderedAndProject +Input [5]: [brand_id#22, brand#23, i_manufact_id#12, i_manufact#13, ext_price#24] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ext_price#24 DESC NULLS LAST,brand#23 ASC NULLS FIRST,brand_id#22 ASC NULLS FIRST,i_manufact_id#12 ASC NULLS FIRST,i_manufact#13 ASC NULLS FIRST], output=[brand_id#22,brand#23,i_manufact_id#12,i_manufact#13,ext_price#24]), [brand_id#22, brand#23, i_manufact_id#12, i_manufact#13, ext_price#24], 100, [ext_price#24 DESC NULLS LAST, brand#23 ASC NULLS FIRST, brand_id#22 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST, i_manufact#13 ASC NULLS FIRST], [brand_id#22, brand#23, i_manufact_id#12, i_manufact#13, ext_price#24] + +(32) ColumnarToRow [codegen id : 1] +Input [5]: [brand_id#22, brand#23, i_manufact_id#12, i_manufact#13, ext_price#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_datafusion/simplified.txt new file mode 100644 index 0000000000..92be42b721 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [brand_id,brand,i_manufact_id,i_manufact,ext_price] + CometHashAggregate [brand_id,brand,i_manufact_id,i_manufact,ext_price,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 + CometHashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,ca_zip,s_store_sk,s_zip] + CometProject [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,ca_zip] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_current_addr_sk,ca_address_sk,ca_zip] + CometProject [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_current_addr_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_customer_sk,c_current_addr_sk] + CometProject [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] #3 + CometProject [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #4 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk,ca_zip] #5 + CometFilter [ca_address_sk,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_zip] + ReusedExchange [s_store_sk,s_zip] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..5ce5a2d1e5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_iceberg_compat/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +* ColumnarToRow (34) ++- CometTakeOrderedAndProject (33) + +- CometHashAggregate (32) + +- CometExchange (31) + +- CometHashAggregate (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometProject (14) + : : : +- CometBroadcastHashJoin (13) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometFilter (5) + : : : : +- CometScan parquet spark_catalog.default.store_sales (4) + : : : +- CometBroadcastExchange (12) + : : : +- CometProject (11) + : : : +- CometFilter (10) + : : : +- CometScan parquet spark_catalog.default.item (9) + : : +- CometBroadcastExchange (17) + : : +- CometFilter (16) + : : +- CometScan parquet spark_catalog.default.customer (15) + : +- CometBroadcastExchange (22) + : +- CometFilter (21) + : +- CometScan parquet spark_catalog.default.customer_address (20) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometScan parquet spark_catalog.default.store (25) + + +(1) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1998)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1], [d_date_sk#1] + +(4) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) CometFilter +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_item_sk#4) AND isnotnull(ss_customer_sk#5)) AND isnotnull(ss_store_sk#6)) + +(6) CometBroadcastExchange +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] + +(7) CometBroadcastHashJoin +Left output [1]: [d_date_sk#1] +Right output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [d_date_sk#1], [ss_sold_date_sk#8], Inner, BuildRight + +(8) CometProject +Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7], [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7] + +(9) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Condition : ((isnotnull(i_manager_id#14) AND (i_manager_id#14 = 8)) AND isnotnull(i_item_sk#9)) + +(11) CometProject +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Arguments: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13], [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(12) CometBroadcastExchange +Input [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7] +Right output [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: [ss_item_sk#4], [i_item_sk#9], Inner, BuildRight + +(14) CometProject +Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13], [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(15) CometScan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#15, c_current_addr_sk#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(16) CometFilter +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_current_addr_sk#16)) + +(17) CometBroadcastExchange +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: [c_customer_sk#15, c_current_addr_sk#16] + +(18) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Right output [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: [ss_customer_sk#5], [c_customer_sk#15], Inner, BuildRight + +(19) CometProject +Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_customer_sk#15, c_current_addr_sk#16] +Arguments: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16], [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16] + +(20) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_zip#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] +ReadSchema: struct + +(21) CometFilter +Input [2]: [ca_address_sk#17, ca_zip#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_zip#18)) + +(22) CometBroadcastExchange +Input [2]: [ca_address_sk#17, ca_zip#18] +Arguments: [ca_address_sk#17, ca_zip#18] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16] +Right output [2]: [ca_address_sk#17, ca_zip#18] +Arguments: [c_current_addr_sk#16], [ca_address_sk#17], Inner, BuildRight + +(24) CometProject +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16, ca_address_sk#17, ca_zip#18] +Arguments: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18], [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18] + +(25) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#19, s_zip#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] +ReadSchema: struct + +(26) CometFilter +Input [2]: [s_store_sk#19, s_zip#20] +Condition : (isnotnull(s_zip#20) AND isnotnull(s_store_sk#19)) + +(27) CometBroadcastExchange +Input [2]: [s_store_sk#19, s_zip#20] +Arguments: [s_store_sk#19, s_zip#20] + +(28) CometBroadcastHashJoin +Left output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18] +Right output [2]: [s_store_sk#19, s_zip#20] +Arguments: [ss_store_sk#6], [s_store_sk#19], Inner, NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#20, 1, 5)), BuildRight + +(29) CometProject +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18, s_store_sk#19, s_zip#20] +Arguments: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13], [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(30) CometHashAggregate +Input [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] + +(31) CometExchange +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#21] +Arguments: hashpartitioning(i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#21] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] + +(33) CometTakeOrderedAndProject +Input [5]: [brand_id#22, brand#23, i_manufact_id#12, i_manufact#13, ext_price#24] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ext_price#24 DESC NULLS LAST,brand#23 ASC NULLS FIRST,brand_id#22 ASC NULLS FIRST,i_manufact_id#12 ASC NULLS FIRST,i_manufact#13 ASC NULLS FIRST], output=[brand_id#22,brand#23,i_manufact_id#12,i_manufact#13,ext_price#24]), [brand_id#22, brand#23, i_manufact_id#12, i_manufact#13, ext_price#24], 100, [ext_price#24 DESC NULLS LAST, brand#23 ASC NULLS FIRST, brand_id#22 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST, i_manufact#13 ASC NULLS FIRST], [brand_id#22, brand#23, i_manufact_id#12, i_manufact#13, ext_price#24] + +(34) ColumnarToRow [codegen id : 1] +Input [5]: [brand_id#22, brand#23, i_manufact_id#12, i_manufact#13, ext_price#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..46b0d650a5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q19.native_iceberg_compat/simplified.txt @@ -0,0 +1,36 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [brand_id,brand,i_manufact_id,i_manufact,ext_price] + CometHashAggregate [brand_id,brand,i_manufact_id,i_manufact,ext_price,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 + CometHashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,ca_zip,s_store_sk,s_zip] + CometProject [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,ca_zip] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_current_addr_sk,ca_address_sk,ca_zip] + CometProject [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_current_addr_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_customer_sk,c_current_addr_sk] + CometProject [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] #3 + CometProject [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #4 + CometFilter [c_customer_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk,ca_zip] #5 + CometFilter [ca_address_sk,ca_zip] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip] + CometBroadcastExchange [s_store_sk,s_zip] #6 + CometFilter [s_store_sk,s_zip] + CometScan parquet spark_catalog.default.store [s_store_sk,s_zip] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_datafusion/explain.txt new file mode 100644 index 0000000000..93dc1af246 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_datafusion/explain.txt @@ -0,0 +1,173 @@ +== Physical Plan == +* ColumnarToRow (33) ++- CometSort (32) + +- CometColumnarExchange (31) + +- CometProject (30) + +- CometBroadcastHashJoin (29) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometHashAggregate (13) + : : +- CometExchange (12) + : : +- CometHashAggregate (11) + : : +- CometProject (10) + : : +- CometBroadcastHashJoin (9) + : : :- CometUnion (5) + : : : :- CometProject (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : +- CometProject (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (3) + : : +- CometBroadcastExchange (8) + : : +- CometFilter (7) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (6) + : +- CometBroadcastExchange (17) + : +- CometProject (16) + : +- CometFilter (15) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (14) + +- CometBroadcastExchange (28) + +- CometProject (27) + +- CometBroadcastHashJoin (26) + :- CometHashAggregate (21) + : +- ReusedExchange (20) + +- CometBroadcastExchange (25) + +- CometProject (24) + +- CometFilter (23) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (22) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] +Arguments: [ws_ext_sales_price#1, ws_sold_date_sk#2] + +(2) CometProject +Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] +Arguments: [sold_date_sk#3, sales_price#4], [ws_sold_date_sk#2 AS sold_date_sk#3, ws_ext_sales_price#1 AS sales_price#4] + +(3) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] +Arguments: [cs_ext_sales_price#5, cs_sold_date_sk#6] + +(4) CometProject +Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] +Arguments: [sold_date_sk#7, sales_price#8], [cs_sold_date_sk#6 AS sold_date_sk#7, cs_ext_sales_price#5 AS sales_price#8] + +(5) CometUnion +Child 0 Input [2]: [sold_date_sk#3, sales_price#4] +Child 1 Input [2]: [sold_date_sk#7, sales_price#8] + +(6) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [d_date_sk#9, d_week_seq#10, d_day_name#11] + +(7) CometFilter +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) + +(8) CometBroadcastExchange +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [d_date_sk#9, d_week_seq#10, d_day_name#11] + +(9) CometBroadcastHashJoin +Left output [2]: [sold_date_sk#3, sales_price#4] +Right output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [sold_date_sk#3], [d_date_sk#9], Inner, BuildRight + +(10) CometProject +Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [sales_price#4, d_week_seq#10, d_day_name#11], [sales_price#4, d_week_seq#10, d_day_name#11] + +(11) CometHashAggregate +Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Keys [1]: [d_week_seq#10] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] + +(12) CometExchange +Input [8]: [d_week_seq#10, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(13) CometHashAggregate +Input [8]: [d_week_seq#10, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] + +(14) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_week_seq#19, d_year#20] +Arguments: [d_week_seq#19, d_year#20] + +(15) CometFilter +Input [2]: [d_week_seq#19, d_year#20] +Condition : ((isnotnull(d_year#20) AND (d_year#20 = 2001)) AND isnotnull(d_week_seq#19)) + +(16) CometProject +Input [2]: [d_week_seq#19, d_year#20] +Arguments: [d_week_seq#19], [d_week_seq#19] + +(17) CometBroadcastExchange +Input [1]: [d_week_seq#19] +Arguments: [d_week_seq#19] + +(18) CometBroadcastHashJoin +Left output [8]: [d_week_seq#10, sun_sales#21, mon_sales#22, tue_sales#23, wed_sales#24, thu_sales#25, fri_sales#26, sat_sales#27] +Right output [1]: [d_week_seq#19] +Arguments: [d_week_seq#10], [d_week_seq#19], Inner, BuildRight + +(19) CometProject +Input [9]: [d_week_seq#10, sun_sales#21, mon_sales#22, tue_sales#23, wed_sales#24, thu_sales#25, fri_sales#26, sat_sales#27, d_week_seq#19] +Arguments: [d_week_seq1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35], [d_week_seq#10 AS d_week_seq1#28, sun_sales#21 AS sun_sales1#29, mon_sales#22 AS mon_sales1#30, tue_sales#23 AS tue_sales1#31, wed_sales#24 AS wed_sales1#32, thu_sales#25 AS thu_sales1#33, fri_sales#26 AS fri_sales1#34, sat_sales#27 AS sat_sales1#35] + +(20) ReusedExchange [Reuses operator id: 12] +Output [8]: [d_week_seq#36, sum#37, sum#38, sum#39, sum#40, sum#41, sum#42, sum#43] + +(21) CometHashAggregate +Input [8]: [d_week_seq#36, sum#37, sum#38, sum#39, sum#40, sum#41, sum#42, sum#43] +Keys [1]: [d_week_seq#36] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#44 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Saturday ) THEN sales_price#4 END))] + +(22) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_week_seq#45, d_year#46] +Arguments: [d_week_seq#45, d_year#46] + +(23) CometFilter +Input [2]: [d_week_seq#45, d_year#46] +Condition : ((isnotnull(d_year#46) AND (d_year#46 = 2002)) AND isnotnull(d_week_seq#45)) + +(24) CometProject +Input [2]: [d_week_seq#45, d_year#46] +Arguments: [d_week_seq#45], [d_week_seq#45] + +(25) CometBroadcastExchange +Input [1]: [d_week_seq#45] +Arguments: [d_week_seq#45] + +(26) CometBroadcastHashJoin +Left output [8]: [d_week_seq#36, sun_sales#47, mon_sales#48, tue_sales#49, wed_sales#50, thu_sales#51, fri_sales#52, sat_sales#53] +Right output [1]: [d_week_seq#45] +Arguments: [d_week_seq#36], [d_week_seq#45], Inner, BuildRight + +(27) CometProject +Input [9]: [d_week_seq#36, sun_sales#47, mon_sales#48, tue_sales#49, wed_sales#50, thu_sales#51, fri_sales#52, sat_sales#53, d_week_seq#45] +Arguments: [d_week_seq2#54, sun_sales2#55, mon_sales2#56, tue_sales2#57, wed_sales2#58, thu_sales2#59, fri_sales2#60, sat_sales2#61], [d_week_seq#36 AS d_week_seq2#54, sun_sales#47 AS sun_sales2#55, mon_sales#48 AS mon_sales2#56, tue_sales#49 AS tue_sales2#57, wed_sales#50 AS wed_sales2#58, thu_sales#51 AS thu_sales2#59, fri_sales#52 AS fri_sales2#60, sat_sales#53 AS sat_sales2#61] + +(28) CometBroadcastExchange +Input [8]: [d_week_seq2#54, sun_sales2#55, mon_sales2#56, tue_sales2#57, wed_sales2#58, thu_sales2#59, fri_sales2#60, sat_sales2#61] +Arguments: [d_week_seq2#54, sun_sales2#55, mon_sales2#56, tue_sales2#57, wed_sales2#58, thu_sales2#59, fri_sales2#60, sat_sales2#61] + +(29) CometBroadcastHashJoin +Left output [8]: [d_week_seq1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35] +Right output [8]: [d_week_seq2#54, sun_sales2#55, mon_sales2#56, tue_sales2#57, wed_sales2#58, thu_sales2#59, fri_sales2#60, sat_sales2#61] +Arguments: [d_week_seq1#28], [(d_week_seq2#54 - 53)], Inner, BuildRight + +(30) CometProject +Input [16]: [d_week_seq1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35, d_week_seq2#54, sun_sales2#55, mon_sales2#56, tue_sales2#57, wed_sales2#58, thu_sales2#59, fri_sales2#60, sat_sales2#61] +Arguments: [d_week_seq1#28, round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1 / sat_sales2), 2)#68], [d_week_seq1#28, round((sun_sales1#29 / sun_sales2#55), 2) AS round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1#30 / mon_sales2#56), 2) AS round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1#31 / tue_sales2#57), 2) AS round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1#32 / wed_sales2#58), 2) AS round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1#33 / thu_sales2#59), 2) AS round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1#34 / fri_sales2#60), 2) AS round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1#35 / sat_sales2#61), 2) AS round((sat_sales1 / sat_sales2), 2)#68] + +(31) CometColumnarExchange +Input [8]: [d_week_seq1#28, round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1 / sat_sales2), 2)#68] +Arguments: rangepartitioning(d_week_seq1#28 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(32) CometSort +Input [8]: [d_week_seq1#28, round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1 / sat_sales2), 2)#68] +Arguments: [d_week_seq1#28, round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1 / sat_sales2), 2)#68], [d_week_seq1#28 ASC NULLS FIRST] + +(33) ColumnarToRow [codegen id : 1] +Input [8]: [d_week_seq1#28, round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1 / sat_sales2), 2)#68] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8550678605 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_datafusion/simplified.txt @@ -0,0 +1,35 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [d_week_seq1,round((sun_sales1 / sun_sales2), 2),round((mon_sales1 / mon_sales2), 2),round((tue_sales1 / tue_sales2), 2),round((wed_sales1 / wed_sales2), 2),round((thu_sales1 / thu_sales2), 2),round((fri_sales1 / fri_sales2), 2),round((sat_sales1 / sat_sales2), 2)] + CometColumnarExchange [d_week_seq1] #1 + CometProject [sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] [d_week_seq1,round((sun_sales1 / sun_sales2), 2),round((mon_sales1 / mon_sales2), 2),round((tue_sales1 / tue_sales2), 2),round((wed_sales1 / wed_sales2), 2),round((thu_sales1 / thu_sales2), 2),round((fri_sales1 / fri_sales2), 2),round((sat_sales1 / sat_sales2), 2)] + CometBroadcastHashJoin [d_week_seq1,sun_sales1,mon_sales1,tue_sales1,wed_sales1,thu_sales1,fri_sales1,sat_sales1,d_week_seq2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] + CometProject [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] [d_week_seq1,sun_sales1,mon_sales1,tue_sales1,wed_sales1,thu_sales1,fri_sales1,sat_sales1] + CometBroadcastHashJoin [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,d_week_seq] + CometHashAggregate [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END))] + CometExchange [d_week_seq] #2 + CometHashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum,d_day_name,sales_price] + CometProject [sales_price,d_week_seq,d_day_name] + CometBroadcastHashJoin [sold_date_sk,sales_price,d_date_sk,d_week_seq,d_day_name] + CometUnion [sold_date_sk,sales_price] + CometProject [ws_sold_date_sk,ws_ext_sales_price] [sold_date_sk,sales_price] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_ext_sales_price,ws_sold_date_sk] + CometProject [cs_sold_date_sk,cs_ext_sales_price] [sold_date_sk,sales_price] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_week_seq,d_day_name] #3 + CometFilter [d_date_sk,d_week_seq,d_day_name] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq,d_day_name] + CometBroadcastExchange [d_week_seq] #4 + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_week_seq,d_year] + CometBroadcastExchange [d_week_seq2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] #5 + CometProject [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] [d_week_seq2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] + CometBroadcastHashJoin [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,d_week_seq] + CometHashAggregate [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END))] + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 + CometBroadcastExchange [d_week_seq] #6 + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_week_seq,d_year] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d51ae75262 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_iceberg_compat/explain.txt @@ -0,0 +1,188 @@ +== Physical Plan == +* ColumnarToRow (33) ++- CometSort (32) + +- CometColumnarExchange (31) + +- CometProject (30) + +- CometBroadcastHashJoin (29) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometHashAggregate (13) + : : +- CometExchange (12) + : : +- CometHashAggregate (11) + : : +- CometProject (10) + : : +- CometBroadcastHashJoin (9) + : : :- CometUnion (5) + : : : :- CometProject (2) + : : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : : +- CometProject (4) + : : : +- CometScan parquet spark_catalog.default.catalog_sales (3) + : : +- CometBroadcastExchange (8) + : : +- CometFilter (7) + : : +- CometScan parquet spark_catalog.default.date_dim (6) + : +- CometBroadcastExchange (17) + : +- CometProject (16) + : +- CometFilter (15) + : +- CometScan parquet spark_catalog.default.date_dim (14) + +- CometBroadcastExchange (28) + +- CometProject (27) + +- CometBroadcastHashJoin (26) + :- CometHashAggregate (21) + : +- ReusedExchange (20) + +- CometBroadcastExchange (25) + +- CometProject (24) + +- CometFilter (23) + +- CometScan parquet spark_catalog.default.date_dim (22) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#2)] +ReadSchema: struct + +(2) CometProject +Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] +Arguments: [sold_date_sk#3, sales_price#4], [ws_sold_date_sk#2 AS sold_date_sk#3, ws_ext_sales_price#1 AS sales_price#4] + +(3) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#6)] +ReadSchema: struct + +(4) CometProject +Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] +Arguments: [sold_date_sk#7, sales_price#8], [cs_sold_date_sk#6 AS sold_date_sk#7, cs_ext_sales_price#5 AS sales_price#8] + +(5) CometUnion +Child 0 Input [2]: [sold_date_sk#3, sales_price#4] +Child 1 Input [2]: [sold_date_sk#7, sales_price#8] + +(6) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(7) CometFilter +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) + +(8) CometBroadcastExchange +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [d_date_sk#9, d_week_seq#10, d_day_name#11] + +(9) CometBroadcastHashJoin +Left output [2]: [sold_date_sk#3, sales_price#4] +Right output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [sold_date_sk#3], [d_date_sk#9], Inner, BuildRight + +(10) CometProject +Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [sales_price#4, d_week_seq#10, d_day_name#11], [sales_price#4, d_week_seq#10, d_day_name#11] + +(11) CometHashAggregate +Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Keys [1]: [d_week_seq#10] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] + +(12) CometExchange +Input [8]: [d_week_seq#10, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(13) CometHashAggregate +Input [8]: [d_week_seq#10, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] + +(14) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_week_seq#19, d_year#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)] +ReadSchema: struct + +(15) CometFilter +Input [2]: [d_week_seq#19, d_year#20] +Condition : ((isnotnull(d_year#20) AND (d_year#20 = 2001)) AND isnotnull(d_week_seq#19)) + +(16) CometProject +Input [2]: [d_week_seq#19, d_year#20] +Arguments: [d_week_seq#19], [d_week_seq#19] + +(17) CometBroadcastExchange +Input [1]: [d_week_seq#19] +Arguments: [d_week_seq#19] + +(18) CometBroadcastHashJoin +Left output [8]: [d_week_seq#10, sun_sales#21, mon_sales#22, tue_sales#23, wed_sales#24, thu_sales#25, fri_sales#26, sat_sales#27] +Right output [1]: [d_week_seq#19] +Arguments: [d_week_seq#10], [d_week_seq#19], Inner, BuildRight + +(19) CometProject +Input [9]: [d_week_seq#10, sun_sales#21, mon_sales#22, tue_sales#23, wed_sales#24, thu_sales#25, fri_sales#26, sat_sales#27, d_week_seq#19] +Arguments: [d_week_seq1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35], [d_week_seq#10 AS d_week_seq1#28, sun_sales#21 AS sun_sales1#29, mon_sales#22 AS mon_sales1#30, tue_sales#23 AS tue_sales1#31, wed_sales#24 AS wed_sales1#32, thu_sales#25 AS thu_sales1#33, fri_sales#26 AS fri_sales1#34, sat_sales#27 AS sat_sales1#35] + +(20) ReusedExchange [Reuses operator id: 12] +Output [8]: [d_week_seq#36, sum#37, sum#38, sum#39, sum#40, sum#41, sum#42, sum#43] + +(21) CometHashAggregate +Input [8]: [d_week_seq#36, sum#37, sum#38, sum#39, sum#40, sum#41, sum#42, sum#43] +Keys [1]: [d_week_seq#36] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#44 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#44 = Saturday ) THEN sales_price#4 END))] + +(22) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_week_seq#45, d_year#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] +ReadSchema: struct + +(23) CometFilter +Input [2]: [d_week_seq#45, d_year#46] +Condition : ((isnotnull(d_year#46) AND (d_year#46 = 2002)) AND isnotnull(d_week_seq#45)) + +(24) CometProject +Input [2]: [d_week_seq#45, d_year#46] +Arguments: [d_week_seq#45], [d_week_seq#45] + +(25) CometBroadcastExchange +Input [1]: [d_week_seq#45] +Arguments: [d_week_seq#45] + +(26) CometBroadcastHashJoin +Left output [8]: [d_week_seq#36, sun_sales#47, mon_sales#48, tue_sales#49, wed_sales#50, thu_sales#51, fri_sales#52, sat_sales#53] +Right output [1]: [d_week_seq#45] +Arguments: [d_week_seq#36], [d_week_seq#45], Inner, BuildRight + +(27) CometProject +Input [9]: [d_week_seq#36, sun_sales#47, mon_sales#48, tue_sales#49, wed_sales#50, thu_sales#51, fri_sales#52, sat_sales#53, d_week_seq#45] +Arguments: [d_week_seq2#54, sun_sales2#55, mon_sales2#56, tue_sales2#57, wed_sales2#58, thu_sales2#59, fri_sales2#60, sat_sales2#61], [d_week_seq#36 AS d_week_seq2#54, sun_sales#47 AS sun_sales2#55, mon_sales#48 AS mon_sales2#56, tue_sales#49 AS tue_sales2#57, wed_sales#50 AS wed_sales2#58, thu_sales#51 AS thu_sales2#59, fri_sales#52 AS fri_sales2#60, sat_sales#53 AS sat_sales2#61] + +(28) CometBroadcastExchange +Input [8]: [d_week_seq2#54, sun_sales2#55, mon_sales2#56, tue_sales2#57, wed_sales2#58, thu_sales2#59, fri_sales2#60, sat_sales2#61] +Arguments: [d_week_seq2#54, sun_sales2#55, mon_sales2#56, tue_sales2#57, wed_sales2#58, thu_sales2#59, fri_sales2#60, sat_sales2#61] + +(29) CometBroadcastHashJoin +Left output [8]: [d_week_seq1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35] +Right output [8]: [d_week_seq2#54, sun_sales2#55, mon_sales2#56, tue_sales2#57, wed_sales2#58, thu_sales2#59, fri_sales2#60, sat_sales2#61] +Arguments: [d_week_seq1#28], [(d_week_seq2#54 - 53)], Inner, BuildRight + +(30) CometProject +Input [16]: [d_week_seq1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35, d_week_seq2#54, sun_sales2#55, mon_sales2#56, tue_sales2#57, wed_sales2#58, thu_sales2#59, fri_sales2#60, sat_sales2#61] +Arguments: [d_week_seq1#28, round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1 / sat_sales2), 2)#68], [d_week_seq1#28, round((sun_sales1#29 / sun_sales2#55), 2) AS round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1#30 / mon_sales2#56), 2) AS round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1#31 / tue_sales2#57), 2) AS round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1#32 / wed_sales2#58), 2) AS round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1#33 / thu_sales2#59), 2) AS round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1#34 / fri_sales2#60), 2) AS round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1#35 / sat_sales2#61), 2) AS round((sat_sales1 / sat_sales2), 2)#68] + +(31) CometColumnarExchange +Input [8]: [d_week_seq1#28, round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1 / sat_sales2), 2)#68] +Arguments: rangepartitioning(d_week_seq1#28 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(32) CometSort +Input [8]: [d_week_seq1#28, round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1 / sat_sales2), 2)#68] +Arguments: [d_week_seq1#28, round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1 / sat_sales2), 2)#68], [d_week_seq1#28 ASC NULLS FIRST] + +(33) ColumnarToRow [codegen id : 1] +Input [8]: [d_week_seq1#28, round((sun_sales1 / sun_sales2), 2)#62, round((mon_sales1 / mon_sales2), 2)#63, round((tue_sales1 / tue_sales2), 2)#64, round((wed_sales1 / wed_sales2), 2)#65, round((thu_sales1 / thu_sales2), 2)#66, round((fri_sales1 / fri_sales2), 2)#67, round((sat_sales1 / sat_sales2), 2)#68] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..ed772bade0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q2.native_iceberg_compat/simplified.txt @@ -0,0 +1,35 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [d_week_seq1,round((sun_sales1 / sun_sales2), 2),round((mon_sales1 / mon_sales2), 2),round((tue_sales1 / tue_sales2), 2),round((wed_sales1 / wed_sales2), 2),round((thu_sales1 / thu_sales2), 2),round((fri_sales1 / fri_sales2), 2),round((sat_sales1 / sat_sales2), 2)] + CometColumnarExchange [d_week_seq1] #1 + CometProject [sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] [d_week_seq1,round((sun_sales1 / sun_sales2), 2),round((mon_sales1 / mon_sales2), 2),round((tue_sales1 / tue_sales2), 2),round((wed_sales1 / wed_sales2), 2),round((thu_sales1 / thu_sales2), 2),round((fri_sales1 / fri_sales2), 2),round((sat_sales1 / sat_sales2), 2)] + CometBroadcastHashJoin [d_week_seq1,sun_sales1,mon_sales1,tue_sales1,wed_sales1,thu_sales1,fri_sales1,sat_sales1,d_week_seq2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] + CometProject [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] [d_week_seq1,sun_sales1,mon_sales1,tue_sales1,wed_sales1,thu_sales1,fri_sales1,sat_sales1] + CometBroadcastHashJoin [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,d_week_seq] + CometHashAggregate [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END))] + CometExchange [d_week_seq] #2 + CometHashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum,d_day_name,sales_price] + CometProject [sales_price,d_week_seq,d_day_name] + CometBroadcastHashJoin [sold_date_sk,sales_price,d_date_sk,d_week_seq,d_day_name] + CometUnion [sold_date_sk,sales_price] + CometProject [ws_sold_date_sk,ws_ext_sales_price] [sold_date_sk,sales_price] + CometScan parquet spark_catalog.default.web_sales [ws_ext_sales_price,ws_sold_date_sk] + CometProject [cs_sold_date_sk,cs_ext_sales_price] [sold_date_sk,sales_price] + CometScan parquet spark_catalog.default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_week_seq,d_day_name] #3 + CometFilter [d_date_sk,d_week_seq,d_day_name] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name] + CometBroadcastExchange [d_week_seq] #4 + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year] + CometScan parquet spark_catalog.default.date_dim [d_week_seq,d_year] + CometBroadcastExchange [d_week_seq2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] #5 + CometProject [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] [d_week_seq2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] + CometBroadcastHashJoin [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,d_week_seq] + CometHashAggregate [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END))] + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 + CometBroadcastExchange [d_week_seq] #6 + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year] + CometScan parquet spark_catalog.default.date_dim [d_week_seq,d_year] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_datafusion/explain.txt new file mode 100644 index 0000000000..01031df826 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_datafusion/explain.txt @@ -0,0 +1,116 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Arguments: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] + +(2) CometFilter +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [cs_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [cs_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] + +(20) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5, _we0#15] + +(22) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_datafusion/simplified.txt new file mode 100644 index 0000000000..515af15c2b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_datafusion/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (2) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id] + CometExchange [i_class] #1 + CometHashAggregate [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #3 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..c223b71290 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_iceberg_compat/explain.txt @@ -0,0 +1,126 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometScan parquet spark_catalog.default.item (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometScan parquet spark_catalog.default.date_dim (8) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [cs_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [cs_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] + +(20) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5, _we0#15] + +(22) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a2a547e345 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q20.native_iceberg_compat/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (2) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id] + CometExchange [i_class] #1 + CometHashAggregate [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #3 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_datafusion/explain.txt new file mode 100644 index 0000000000..4a2b1204d9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_datafusion/explain.txt @@ -0,0 +1,127 @@ +== Physical Plan == +* ColumnarToRow (24) ++- CometTakeOrderedAndProject (23) + +- CometFilter (22) + +- CometHashAggregate (21) + +- CometExchange (20) + +- CometHashAggregate (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (3) + : +- CometBroadcastExchange (11) + : +- CometProject (10) + : +- CometFilter (9) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (8) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_warehouse_sk#2) AND isnotnull(inv_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [w_warehouse_sk#5, w_warehouse_name#6] + +(4) CometFilter +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Condition : isnotnull(w_warehouse_sk#5) + +(5) CometBroadcastExchange +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [w_warehouse_sk#5, w_warehouse_name#6] + +(6) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#5], Inner, BuildRight + +(7) CometProject +Input [6]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6], [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6] + +(8) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Arguments: [i_item_sk#7, i_item_id#8, i_current_price#9] + +(9) CometFilter +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Condition : (((isnotnull(i_current_price#9) AND (i_current_price#9 >= 0.99)) AND (i_current_price#9 <= 1.49)) AND isnotnull(i_item_sk#7)) + +(10) CometProject +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Arguments: [i_item_sk#7, i_item_id#8], [i_item_sk#7, i_item_id#8] + +(11) CometBroadcastExchange +Input [2]: [i_item_sk#7, i_item_id#8] +Arguments: [i_item_sk#7, i_item_id#8] + +(12) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6] +Right output [2]: [i_item_sk#7, i_item_id#8] +Arguments: [inv_item_sk#1], [i_item_sk#7], Inner, BuildRight + +(13) CometProject +Input [6]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_sk#7, i_item_id#8] +Arguments: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8], [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8] + +(14) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(15) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 2000-02-10)) AND (d_date#11 <= 2000-04-10)) AND isnotnull(d_date_sk#10)) + +(16) CometBroadcastExchange +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(17) CometBroadcastHashJoin +Left output [4]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8] +Right output [2]: [d_date_sk#10, d_date#11] +Arguments: [inv_date_sk#4], [d_date_sk#10], Inner, BuildRight + +(18) CometProject +Input [6]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8, d_date_sk#10, d_date#11] +Arguments: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11], [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] + +(19) CometHashAggregate +Input [4]: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] +Keys [2]: [w_warehouse_name#6, i_item_id#8] +Functions [2]: [partial_sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), partial_sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] + +(20) CometExchange +Input [4]: [w_warehouse_name#6, i_item_id#8, sum#12, sum#13] +Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [4]: [w_warehouse_name#6, i_item_id#8, sum#12, sum#13] +Keys [2]: [w_warehouse_name#6, i_item_id#8] +Functions [2]: [sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] + +(22) CometFilter +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#14, inv_after#15] +Condition : (CASE WHEN (inv_before#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(inv_after#15 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(inv_before#14 as double)))))) >= 0.666667) END AND CASE WHEN (inv_before#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(inv_after#15 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(inv_before#14 as double)))))) <= 1.5) END) + +(23) CometTakeOrderedAndProject +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#14, inv_after#15] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#6 ASC NULLS FIRST,i_item_id#8 ASC NULLS FIRST], output=[w_warehouse_name#6,i_item_id#8,inv_before#14,inv_after#15]), [w_warehouse_name#6, i_item_id#8, inv_before#14, inv_after#15], 100, [w_warehouse_name#6 ASC NULLS FIRST, i_item_id#8 ASC NULLS FIRST], [w_warehouse_name#6, i_item_id#8, inv_before#14, inv_after#15] + +(24) ColumnarToRow [codegen id : 1] +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#14, inv_after#15] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_datafusion/simplified.txt new file mode 100644 index 0000000000..25f0c2270e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_datafusion/simplified.txt @@ -0,0 +1,26 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] + CometFilter [w_warehouse_name,i_item_id,inv_before,inv_after] + CometHashAggregate [w_warehouse_name,i_item_id,inv_before,inv_after,sum,sum,sum(CASE WHEN (d_date < 2000-03-11) THEN inv_quantity_on_hand ELSE 0 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN inv_quantity_on_hand ELSE 0 END)] + CometExchange [w_warehouse_name,i_item_id] #1 + CometHashAggregate [w_warehouse_name,i_item_id,sum,sum,d_date,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_id,d_date_sk,d_date] + CometProject [inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_id] + CometBroadcastHashJoin [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_sk,i_item_id] + CometProject [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_sk,w_warehouse_name] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #2 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [i_item_sk,i_item_id] #3 + CometProject [i_item_sk,i_item_id] + CometFilter [i_item_sk,i_item_id,i_current_price] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_current_price] + CometBroadcastExchange [d_date_sk,d_date] #4 + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..46b4846919 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_iceberg_compat/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +* ColumnarToRow (24) ++- CometTakeOrderedAndProject (23) + +- CometFilter (22) + +- CometHashAggregate (21) + +- CometExchange (20) + +- CometHashAggregate (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.inventory (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.warehouse (3) + : +- CometBroadcastExchange (11) + : +- CometProject (10) + : +- CometFilter (9) + : +- CometScan parquet spark_catalog.default.item (8) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometScan parquet spark_catalog.default.date_dim (14) + + +(1) CometScan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_warehouse_sk#2) AND isnotnull(inv_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Condition : isnotnull(w_warehouse_sk#5) + +(5) CometBroadcastExchange +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [w_warehouse_sk#5, w_warehouse_name#6] + +(6) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#5], Inner, BuildRight + +(7) CometProject +Input [6]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6], [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6] + +(8) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Condition : (((isnotnull(i_current_price#9) AND (i_current_price#9 >= 0.99)) AND (i_current_price#9 <= 1.49)) AND isnotnull(i_item_sk#7)) + +(10) CometProject +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Arguments: [i_item_sk#7, i_item_id#8], [i_item_sk#7, i_item_id#8] + +(11) CometBroadcastExchange +Input [2]: [i_item_sk#7, i_item_id#8] +Arguments: [i_item_sk#7, i_item_id#8] + +(12) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6] +Right output [2]: [i_item_sk#7, i_item_id#8] +Arguments: [inv_item_sk#1], [i_item_sk#7], Inner, BuildRight + +(13) CometProject +Input [6]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_sk#7, i_item_id#8] +Arguments: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8], [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8] + +(14) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 2000-02-10)) AND (d_date#11 <= 2000-04-10)) AND isnotnull(d_date_sk#10)) + +(16) CometBroadcastExchange +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(17) CometBroadcastHashJoin +Left output [4]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8] +Right output [2]: [d_date_sk#10, d_date#11] +Arguments: [inv_date_sk#4], [d_date_sk#10], Inner, BuildRight + +(18) CometProject +Input [6]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8, d_date_sk#10, d_date#11] +Arguments: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11], [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] + +(19) CometHashAggregate +Input [4]: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] +Keys [2]: [w_warehouse_name#6, i_item_id#8] +Functions [2]: [partial_sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), partial_sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] + +(20) CometExchange +Input [4]: [w_warehouse_name#6, i_item_id#8, sum#12, sum#13] +Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [4]: [w_warehouse_name#6, i_item_id#8, sum#12, sum#13] +Keys [2]: [w_warehouse_name#6, i_item_id#8] +Functions [2]: [sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] + +(22) CometFilter +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#14, inv_after#15] +Condition : (CASE WHEN (inv_before#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(inv_after#15 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(inv_before#14 as double)))))) >= 0.666667) END AND CASE WHEN (inv_before#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(inv_after#15 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(inv_before#14 as double)))))) <= 1.5) END) + +(23) CometTakeOrderedAndProject +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#14, inv_after#15] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#6 ASC NULLS FIRST,i_item_id#8 ASC NULLS FIRST], output=[w_warehouse_name#6,i_item_id#8,inv_before#14,inv_after#15]), [w_warehouse_name#6, i_item_id#8, inv_before#14, inv_after#15], 100, [w_warehouse_name#6 ASC NULLS FIRST, i_item_id#8 ASC NULLS FIRST], [w_warehouse_name#6, i_item_id#8, inv_before#14, inv_after#15] + +(24) ColumnarToRow [codegen id : 1] +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#14, inv_after#15] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bf9f325fcd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q21.native_iceberg_compat/simplified.txt @@ -0,0 +1,26 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] + CometFilter [w_warehouse_name,i_item_id,inv_before,inv_after] + CometHashAggregate [w_warehouse_name,i_item_id,inv_before,inv_after,sum,sum,sum(CASE WHEN (d_date < 2000-03-11) THEN inv_quantity_on_hand ELSE 0 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN inv_quantity_on_hand ELSE 0 END)] + CometExchange [w_warehouse_name,i_item_id] #1 + CometHashAggregate [w_warehouse_name,i_item_id,sum,sum,d_date,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_id,d_date_sk,d_date] + CometProject [inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_id] + CometBroadcastHashJoin [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_sk,i_item_id] + CometProject [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_sk,w_warehouse_name] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #2 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [i_item_sk,i_item_id] #3 + CometProject [i_item_sk,i_item_id] + CometFilter [i_item_sk,i_item_id,i_current_price] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_current_price] + CometBroadcastExchange [d_date_sk,d_date] #4 + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_datafusion/explain.txt new file mode 100644 index 0000000000..4a89a80d9c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_datafusion/explain.txt @@ -0,0 +1,127 @@ +== Physical Plan == +* ColumnarToRow (24) ++- CometTakeOrderedAndProject (23) + +- CometHashAggregate (22) + +- CometExchange (21) + +- CometHashAggregate (20) + +- CometExpand (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_month_seq#6] +Arguments: [d_date_sk#5, d_month_seq#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_month_seq#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [inv_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#5] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3], [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(10) CometFilter +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Condition : isnotnull(i_item_sk#7) + +(11) CometBroadcastExchange +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(12) CometBroadcastHashJoin +Left output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] +Right output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [inv_item_sk#1], [i_item_sk#7], Inner, BuildRight + +(13) CometProject +Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11], [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(14) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [1]: [w_warehouse_sk#12] +Arguments: [w_warehouse_sk#12] + +(15) CometFilter +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) + +(16) CometBroadcastExchange +Input [1]: [w_warehouse_sk#12] +Arguments: [w_warehouse_sk#12] + +(17) CometBroadcastHashJoin +Left output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Right output [1]: [w_warehouse_sk#12] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#12], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] +Arguments: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] + +(19) CometExpand +Input [5]: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] +Arguments: [[inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, null, 1], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, null, null, 3], [inv_quantity_on_hand#3, i_product_name#11, null, null, null, 7], [inv_quantity_on_hand#3, null, null, null, null, 15]], [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] + +(20) CometHashAggregate +Input [6]: [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Functions [1]: [partial_avg(inv_quantity_on_hand#3)] + +(21) CometExchange +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#18, count#19] +Arguments: hashpartitioning(i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#18, count#19] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Functions [1]: [avg(inv_quantity_on_hand#3)] + +(23) CometTakeOrderedAndProject +Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#20] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[qoh#20 ASC NULLS FIRST,i_product_name#13 ASC NULLS FIRST,i_brand#14 ASC NULLS FIRST,i_class#15 ASC NULLS FIRST,i_category#16 ASC NULLS FIRST], output=[i_product_name#13,i_brand#14,i_class#15,i_category#16,qoh#20]), [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#20], 100, [qoh#20 ASC NULLS FIRST, i_product_name#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_category#16 ASC NULLS FIRST], [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#20] + +(24) ColumnarToRow [codegen id : 1] +Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_datafusion/simplified.txt new file mode 100644 index 0000000000..5e6610f3bc --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_datafusion/simplified.txt @@ -0,0 +1,26 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_product_name,i_brand,i_class,i_category,qoh] + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,spark_grouping_id,sum,count,avg(inv_quantity_on_hand)] + CometExchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 + CometHashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count,inv_quantity_on_hand] + CometExpand [i_product_name,i_brand,i_class,i_category] [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category,spark_grouping_id] + CometProject [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name,w_warehouse_sk] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,d_date_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #3 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometBroadcastExchange [w_warehouse_sk] #4 + CometFilter [w_warehouse_sk] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d56a8ca3b2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_iceberg_compat/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +* ColumnarToRow (24) ++- CometTakeOrderedAndProject (23) + +- CometHashAggregate (22) + +- CometExchange (21) + +- CometHashAggregate (20) + +- CometExpand (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.inventory (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.date_dim (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometScan parquet spark_catalog.default.item (9) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometScan parquet spark_catalog.default.warehouse (14) + + +(1) CometScan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_month_seq#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [inv_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#5] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3], [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] + +(9) CometScan parquet spark_catalog.default.item +Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Condition : isnotnull(i_item_sk#7) + +(11) CometBroadcastExchange +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(12) CometBroadcastHashJoin +Left output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] +Right output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [inv_item_sk#1], [i_item_sk#7], Inner, BuildRight + +(13) CometProject +Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11], [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(14) CometScan parquet spark_catalog.default.warehouse +Output [1]: [w_warehouse_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(15) CometFilter +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) + +(16) CometBroadcastExchange +Input [1]: [w_warehouse_sk#12] +Arguments: [w_warehouse_sk#12] + +(17) CometBroadcastHashJoin +Left output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Right output [1]: [w_warehouse_sk#12] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#12], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] +Arguments: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] + +(19) CometExpand +Input [5]: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] +Arguments: [[inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, null, 1], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, null, null, 3], [inv_quantity_on_hand#3, i_product_name#11, null, null, null, 7], [inv_quantity_on_hand#3, null, null, null, null, 15]], [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] + +(20) CometHashAggregate +Input [6]: [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Functions [1]: [partial_avg(inv_quantity_on_hand#3)] + +(21) CometExchange +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#18, count#19] +Arguments: hashpartitioning(i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#18, count#19] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Functions [1]: [avg(inv_quantity_on_hand#3)] + +(23) CometTakeOrderedAndProject +Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#20] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[qoh#20 ASC NULLS FIRST,i_product_name#13 ASC NULLS FIRST,i_brand#14 ASC NULLS FIRST,i_class#15 ASC NULLS FIRST,i_category#16 ASC NULLS FIRST], output=[i_product_name#13,i_brand#14,i_class#15,i_category#16,qoh#20]), [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#20], 100, [qoh#20 ASC NULLS FIRST, i_product_name#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_category#16 ASC NULLS FIRST], [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#20] + +(24) ColumnarToRow [codegen id : 1] +Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a2ba9f00e1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q22.native_iceberg_compat/simplified.txt @@ -0,0 +1,26 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_product_name,i_brand,i_class,i_category,qoh] + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,spark_grouping_id,sum,count,avg(inv_quantity_on_hand)] + CometExchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 + CometHashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count,inv_quantity_on_hand] + CometExpand [i_product_name,i_brand,i_class,i_category] [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category,spark_grouping_id] + CometProject [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name,w_warehouse_sk] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,d_date_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #3 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometBroadcastExchange [w_warehouse_sk] #4 + CometFilter [w_warehouse_sk] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_datafusion/explain.txt new file mode 100644 index 0000000000..a7df911fce --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_datafusion/explain.txt @@ -0,0 +1,452 @@ +== Physical Plan == +* ColumnarToRow (67) ++- CometHashAggregate (66) + +- CometExchange (65) + +- CometHashAggregate (64) + +- CometUnion (63) + :- CometProject (46) + : +- CometBroadcastHashJoin (45) + : :- CometProject (40) + : : +- CometSortMergeJoin (39) + : : :- CometSort (24) + : : : +- CometExchange (23) + : : : +- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : +- CometBroadcastExchange (20) + : : : +- CometProject (19) + : : : +- CometFilter (18) + : : : +- CometHashAggregate (17) + : : : +- CometExchange (16) + : : : +- CometHashAggregate (15) + : : : +- CometProject (14) + : : : +- CometBroadcastHashJoin (13) + : : : :- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometFilter (3) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (2) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : : +- CometBroadcastExchange (12) + : : : +- CometFilter (11) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (10) + : : +- CometSort (38) + : : +- CometProject (37) + : : +- CometFilter (36) + : : +- CometHashAggregate (35) + : : +- CometExchange (34) + : : +- CometHashAggregate (33) + : : +- CometProject (32) + : : +- CometBroadcastHashJoin (31) + : : :- CometProject (27) + : : : +- CometFilter (26) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (25) + : : +- CometBroadcastExchange (30) + : : +- CometFilter (29) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (28) + : +- CometBroadcastExchange (44) + : +- CometProject (43) + : +- CometFilter (42) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (41) + +- CometProject (62) + +- CometBroadcastHashJoin (61) + :- CometProject (59) + : +- CometSortMergeJoin (58) + : :- CometSort (52) + : : +- CometExchange (51) + : : +- CometProject (50) + : : +- CometBroadcastHashJoin (49) + : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (47) + : : +- ReusedExchange (48) + : +- CometSort (57) + : +- CometProject (56) + : +- CometFilter (55) + : +- CometHashAggregate (54) + : +- ReusedExchange (53) + +- ReusedExchange (60) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(2) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#6, ss_sold_date_sk#7] + +(3) CometFilter +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_item_sk#6) + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Arguments: [d_date_sk#8, d_date#9, d_year#10] + +(5) CometFilter +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Arguments: [d_date_sk#8, d_date#9], [d_date_sk#8, d_date#9] + +(7) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: [d_date_sk#8, d_date#9] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_date#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [4]: [ss_item_sk#6, ss_sold_date_sk#7, d_date_sk#8, d_date#9] +Arguments: [ss_item_sk#6, d_date#9], [ss_item_sk#6, d_date#9] + +(10) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [i_item_sk#11, i_item_desc#12] + +(11) CometFilter +Input [2]: [i_item_sk#11, i_item_desc#12] +Condition : isnotnull(i_item_sk#11) + +(12) CometBroadcastExchange +Input [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [i_item_sk#11, i_item_desc#12] + +(13) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, d_date#9] +Right output [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [ss_item_sk#6], [i_item_sk#11], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#6, d_date#9, i_item_sk#11, i_item_desc#12] +Arguments: [d_date#9, i_item_sk#11, _groupingexpression#13], [d_date#9, i_item_sk#11, substr(i_item_desc#12, 1, 30) AS _groupingexpression#13] + +(15) CometHashAggregate +Input [3]: [d_date#9, i_item_sk#11, _groupingexpression#13] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [partial_count(1)] + +(16) CometExchange +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Arguments: hashpartitioning(_groupingexpression#13, i_item_sk#11, d_date#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [count(1)] + +(18) CometFilter +Input [2]: [item_sk#15, cnt#16] +Condition : (cnt#16 > 4) + +(19) CometProject +Input [2]: [item_sk#15, cnt#16] +Arguments: [item_sk#15], [item_sk#15] + +(20) CometBroadcastExchange +Input [1]: [item_sk#15] +Arguments: [item_sk#15] + +(21) CometBroadcastHashJoin +Left output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [item_sk#15] +Arguments: [cs_item_sk#2], [item_sk#15], LeftSemi, BuildRight + +(22) CometProject +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(23) CometExchange +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(24) CometSort +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1 ASC NULLS FIRST] + +(25) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Arguments: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] + +(26) CometFilter +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Condition : isnotnull(ss_customer_sk#17) + +(27) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Arguments: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19], [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] + +(28) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21] + +(29) CometFilter +Input [1]: [c_customer_sk#21] +Condition : isnotnull(c_customer_sk#21) + +(30) CometBroadcastExchange +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21] + +(31) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] +Right output [1]: [c_customer_sk#21] +Arguments: [ss_customer_sk#17], [c_customer_sk#21], Inner, BuildRight + +(32) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Arguments: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21], [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] + +(33) CometHashAggregate +Input [3]: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Keys [1]: [c_customer_sk#21] +Functions [1]: [partial_sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] + +(34) CometExchange +Input [3]: [c_customer_sk#21, sum#22, isEmpty#23] +Arguments: hashpartitioning(c_customer_sk#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(35) CometHashAggregate +Input [3]: [c_customer_sk#21, sum#22, isEmpty#23] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] + +(36) CometFilter +Input [2]: [c_customer_sk#21, ssales#24] +Condition : (isnotnull(ssales#24) AND (cast(ssales#24 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#25, [id=#26]))) + +(37) CometProject +Input [2]: [c_customer_sk#21, ssales#24] +Arguments: [c_customer_sk#21], [c_customer_sk#21] + +(38) CometSort +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21], [c_customer_sk#21 ASC NULLS FIRST] + +(39) CometSortMergeJoin +Left output [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [c_customer_sk#21] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#21], LeftSemi + +(40) CometProject +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(41) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#27, d_year#28, d_moy#29] +Arguments: [d_date_sk#27, d_year#28, d_moy#29] + +(42) CometFilter +Input [3]: [d_date_sk#27, d_year#28, d_moy#29] +Condition : ((((isnotnull(d_year#28) AND isnotnull(d_moy#29)) AND (d_year#28 = 2000)) AND (d_moy#29 = 2)) AND isnotnull(d_date_sk#27)) + +(43) CometProject +Input [3]: [d_date_sk#27, d_year#28, d_moy#29] +Arguments: [d_date_sk#27], [d_date_sk#27] + +(44) CometBroadcastExchange +Input [1]: [d_date_sk#27] +Arguments: [d_date_sk#27] + +(45) CometBroadcastHashJoin +Left output [3]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [d_date_sk#27] +Arguments: [cs_sold_date_sk#5], [d_date_sk#27], Inner, BuildRight + +(46) CometProject +Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#27] +Arguments: [sales#30], [(cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4) AS sales#30] + +(47) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [5]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Arguments: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] + +(48) ReusedExchange [Reuses operator id: 20] +Output [1]: [item_sk#36] + +(49) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Right output [1]: [item_sk#36] +Arguments: [ws_item_sk#31], [item_sk#36], LeftSemi, BuildRight + +(50) CometProject +Input [5]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Arguments: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35], [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] + +(51) CometExchange +Input [4]: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Arguments: hashpartitioning(ws_bill_customer_sk#32, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(52) CometSort +Input [4]: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Arguments: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35], [ws_bill_customer_sk#32 ASC NULLS FIRST] + +(53) ReusedExchange [Reuses operator id: 34] +Output [3]: [c_customer_sk#37, sum#38, isEmpty#39] + +(54) CometHashAggregate +Input [3]: [c_customer_sk#37, sum#38, isEmpty#39] +Keys [1]: [c_customer_sk#37] +Functions [1]: [sum((cast(ss_quantity#40 as decimal(10,0)) * ss_sales_price#41))] + +(55) CometFilter +Input [2]: [c_customer_sk#37, ssales#42] +Condition : (isnotnull(ssales#42) AND (cast(ssales#42 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#25, [id=#26]))) + +(56) CometProject +Input [2]: [c_customer_sk#37, ssales#42] +Arguments: [c_customer_sk#37], [c_customer_sk#37] + +(57) CometSort +Input [1]: [c_customer_sk#37] +Arguments: [c_customer_sk#37], [c_customer_sk#37 ASC NULLS FIRST] + +(58) CometSortMergeJoin +Left output [4]: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Right output [1]: [c_customer_sk#37] +Arguments: [ws_bill_customer_sk#32], [c_customer_sk#37], LeftSemi + +(59) CometProject +Input [4]: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Arguments: [ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35], [ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] + +(60) ReusedExchange [Reuses operator id: 44] +Output [1]: [d_date_sk#43] + +(61) CometBroadcastHashJoin +Left output [3]: [ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Right output [1]: [d_date_sk#43] +Arguments: [ws_sold_date_sk#35], [d_date_sk#43], Inner, BuildRight + +(62) CometProject +Input [4]: [ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35, d_date_sk#43] +Arguments: [sales#44], [(cast(ws_quantity#33 as decimal(10,0)) * ws_list_price#34) AS sales#44] + +(63) CometUnion +Child 0 Input [1]: [sales#30] +Child 1 Input [1]: [sales#44] + +(64) CometHashAggregate +Input [1]: [sales#30] +Keys: [] +Functions [1]: [partial_sum(sales#30)] + +(65) CometExchange +Input [2]: [sum#45, isEmpty#46] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(66) CometHashAggregate +Input [2]: [sum#45, isEmpty#46] +Keys: [] +Functions [1]: [sum(sales#30)] + +(67) ColumnarToRow [codegen id : 1] +Input [1]: [sum(sales)#47] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 36 Hosting Expression = Subquery scalar-subquery#25, [id=#26] +* ColumnarToRow (85) ++- CometHashAggregate (84) + +- CometExchange (83) + +- CometHashAggregate (82) + +- CometHashAggregate (81) + +- CometExchange (80) + +- CometHashAggregate (79) + +- CometProject (78) + +- CometBroadcastHashJoin (77) + :- CometProject (72) + : +- CometBroadcastHashJoin (71) + : :- CometFilter (69) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (68) + : +- ReusedExchange (70) + +- CometBroadcastExchange (76) + +- CometProject (75) + +- CometFilter (74) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (73) + + +(68) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#48, ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51] +Arguments: [ss_customer_sk#48, ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51] + +(69) CometFilter +Input [4]: [ss_customer_sk#48, ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51] +Condition : isnotnull(ss_customer_sk#48) + +(70) ReusedExchange [Reuses operator id: 30] +Output [1]: [c_customer_sk#52] + +(71) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#48, ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51] +Right output [1]: [c_customer_sk#52] +Arguments: [ss_customer_sk#48], [c_customer_sk#52], Inner, BuildRight + +(72) CometProject +Input [5]: [ss_customer_sk#48, ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51, c_customer_sk#52] +Arguments: [ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51, c_customer_sk#52], [ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51, c_customer_sk#52] + +(73) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#53, d_year#54] +Arguments: [d_date_sk#53, d_year#54] + +(74) CometFilter +Input [2]: [d_date_sk#53, d_year#54] +Condition : (d_year#54 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#53)) + +(75) CometProject +Input [2]: [d_date_sk#53, d_year#54] +Arguments: [d_date_sk#53], [d_date_sk#53] + +(76) CometBroadcastExchange +Input [1]: [d_date_sk#53] +Arguments: [d_date_sk#53] + +(77) CometBroadcastHashJoin +Left output [4]: [ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51, c_customer_sk#52] +Right output [1]: [d_date_sk#53] +Arguments: [ss_sold_date_sk#51], [d_date_sk#53], Inner, BuildRight + +(78) CometProject +Input [5]: [ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51, c_customer_sk#52, d_date_sk#53] +Arguments: [ss_quantity#49, ss_sales_price#50, c_customer_sk#52], [ss_quantity#49, ss_sales_price#50, c_customer_sk#52] + +(79) CometHashAggregate +Input [3]: [ss_quantity#49, ss_sales_price#50, c_customer_sk#52] +Keys [1]: [c_customer_sk#52] +Functions [1]: [partial_sum((cast(ss_quantity#49 as decimal(10,0)) * ss_sales_price#50))] + +(80) CometExchange +Input [3]: [c_customer_sk#52, sum#55, isEmpty#56] +Arguments: hashpartitioning(c_customer_sk#52, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(81) CometHashAggregate +Input [3]: [c_customer_sk#52, sum#55, isEmpty#56] +Keys [1]: [c_customer_sk#52] +Functions [1]: [sum((cast(ss_quantity#49 as decimal(10,0)) * ss_sales_price#50))] + +(82) CometHashAggregate +Input [1]: [csales#57] +Keys: [] +Functions [1]: [partial_max(csales#57)] + +(83) CometExchange +Input [1]: [max#58] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(84) CometHashAggregate +Input [1]: [max#58] +Keys: [] +Functions [1]: [max(csales#57)] + +(85) ColumnarToRow [codegen id : 1] +Input [1]: [tpcds_cmax#59] + +Subquery:2 Hosting operator id = 55 Hosting Expression = ReusedSubquery Subquery scalar-subquery#25, [id=#26] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..5159f36c51 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_datafusion/simplified.txt @@ -0,0 +1,91 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [sum(sales),sum,isEmpty,sum(sales)] + CometExchange #1 + CometHashAggregate [sum,isEmpty,sales] + CometUnion [sales] + CometProject [cs_quantity,cs_list_price] [sales] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometProject [cs_quantity,cs_list_price,cs_sold_date_sk] + CometSortMergeJoin [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk,c_customer_sk] + CometSort [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometExchange [cs_bill_customer_sk] #2 + CometProject [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk,item_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastExchange [item_sk] #3 + CometProject [item_sk] + CometFilter [item_sk,cnt] + CometHashAggregate [item_sk,cnt,_groupingexpression,i_item_sk,d_date,count,count(1)] + CometExchange [_groupingexpression,i_item_sk,d_date] #4 + CometHashAggregate [_groupingexpression,i_item_sk,d_date,count] + CometProject [i_item_desc] [d_date,i_item_sk,_groupingexpression] + CometBroadcastHashJoin [ss_item_sk,d_date,i_item_sk,i_item_desc] + CometProject [ss_item_sk,d_date] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #5 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_year] + CometBroadcastExchange [i_item_sk,i_item_desc] #6 + CometFilter [i_item_sk,i_item_desc] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_desc] + CometSort [c_customer_sk] + CometProject [c_customer_sk] + CometFilter [c_customer_sk,ssales] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [tpcds_cmax,max,max(csales)] + CometExchange #9 + CometHashAggregate [max,csales] + CometHashAggregate [csales,c_customer_sk,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + CometExchange [c_customer_sk] #10 + CometHashAggregate [c_customer_sk,sum,isEmpty,ss_quantity,ss_sales_price] + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk,d_date_sk] + CometProject [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + ReusedExchange [c_customer_sk] #8 + CometBroadcastExchange [d_date_sk] #11 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometHashAggregate [c_customer_sk,ssales,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + CometExchange [c_customer_sk] #7 + CometHashAggregate [c_customer_sk,sum,isEmpty,ss_quantity,ss_sales_price] + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,c_customer_sk] + CometProject [ss_customer_sk,ss_quantity,ss_sales_price] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [c_customer_sk] #8 + CometFilter [c_customer_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk] + CometBroadcastExchange [d_date_sk] #12 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometProject [ws_quantity,ws_list_price] [sales] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometProject [ws_quantity,ws_list_price,ws_sold_date_sk] + CometSortMergeJoin [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,c_customer_sk] + CometSort [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometExchange [ws_bill_customer_sk] #13 + CometProject [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,item_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [item_sk] #3 + CometSort [c_customer_sk] + CometProject [c_customer_sk] + CometFilter [c_customer_sk,ssales] + ReusedSubquery [tpcds_cmax] #1 + CometHashAggregate [c_customer_sk,ssales,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + ReusedExchange [d_date_sk] #12 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d402d87218 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_iceberg_compat/explain.txt @@ -0,0 +1,484 @@ +== Physical Plan == +* ColumnarToRow (67) ++- CometHashAggregate (66) + +- CometExchange (65) + +- CometHashAggregate (64) + +- CometUnion (63) + :- CometProject (46) + : +- CometBroadcastHashJoin (45) + : :- CometProject (40) + : : +- CometSortMergeJoin (39) + : : :- CometSort (24) + : : : +- CometExchange (23) + : : : +- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : +- CometBroadcastExchange (20) + : : : +- CometProject (19) + : : : +- CometFilter (18) + : : : +- CometHashAggregate (17) + : : : +- CometExchange (16) + : : : +- CometHashAggregate (15) + : : : +- CometProject (14) + : : : +- CometBroadcastHashJoin (13) + : : : :- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometFilter (3) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (2) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometScan parquet spark_catalog.default.date_dim (4) + : : : +- CometBroadcastExchange (12) + : : : +- CometFilter (11) + : : : +- CometScan parquet spark_catalog.default.item (10) + : : +- CometSort (38) + : : +- CometProject (37) + : : +- CometFilter (36) + : : +- CometHashAggregate (35) + : : +- CometExchange (34) + : : +- CometHashAggregate (33) + : : +- CometProject (32) + : : +- CometBroadcastHashJoin (31) + : : :- CometProject (27) + : : : +- CometFilter (26) + : : : +- CometScan parquet spark_catalog.default.store_sales (25) + : : +- CometBroadcastExchange (30) + : : +- CometFilter (29) + : : +- CometScan parquet spark_catalog.default.customer (28) + : +- CometBroadcastExchange (44) + : +- CometProject (43) + : +- CometFilter (42) + : +- CometScan parquet spark_catalog.default.date_dim (41) + +- CometProject (62) + +- CometBroadcastHashJoin (61) + :- CometProject (59) + : +- CometSortMergeJoin (58) + : :- CometSort (52) + : : +- CometExchange (51) + : : +- CometProject (50) + : : +- CometBroadcastHashJoin (49) + : : :- CometScan parquet spark_catalog.default.web_sales (47) + : : +- ReusedExchange (48) + : +- CometSort (57) + : +- CometProject (56) + : +- CometFilter (55) + : +- CometHashAggregate (54) + : +- ReusedExchange (53) + +- ReusedExchange (60) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +ReadSchema: struct + +(2) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(3) CometFilter +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_item_sk#6) + +(4) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Arguments: [d_date_sk#8, d_date#9], [d_date_sk#8, d_date#9] + +(7) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: [d_date_sk#8, d_date#9] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_date#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [4]: [ss_item_sk#6, ss_sold_date_sk#7, d_date_sk#8, d_date#9] +Arguments: [ss_item_sk#6, d_date#9], [ss_item_sk#6, d_date#9] + +(10) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#11, i_item_desc#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) CometFilter +Input [2]: [i_item_sk#11, i_item_desc#12] +Condition : isnotnull(i_item_sk#11) + +(12) CometBroadcastExchange +Input [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [i_item_sk#11, i_item_desc#12] + +(13) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, d_date#9] +Right output [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [ss_item_sk#6], [i_item_sk#11], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#6, d_date#9, i_item_sk#11, i_item_desc#12] +Arguments: [d_date#9, i_item_sk#11, _groupingexpression#13], [d_date#9, i_item_sk#11, substr(i_item_desc#12, 1, 30) AS _groupingexpression#13] + +(15) CometHashAggregate +Input [3]: [d_date#9, i_item_sk#11, _groupingexpression#13] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [partial_count(1)] + +(16) CometExchange +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Arguments: hashpartitioning(_groupingexpression#13, i_item_sk#11, d_date#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [count(1)] + +(18) CometFilter +Input [2]: [item_sk#15, cnt#16] +Condition : (cnt#16 > 4) + +(19) CometProject +Input [2]: [item_sk#15, cnt#16] +Arguments: [item_sk#15], [item_sk#15] + +(20) CometBroadcastExchange +Input [1]: [item_sk#15] +Arguments: [item_sk#15] + +(21) CometBroadcastHashJoin +Left output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [item_sk#15] +Arguments: [cs_item_sk#2], [item_sk#15], LeftSemi, BuildRight + +(22) CometProject +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(23) CometExchange +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(24) CometSort +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1 ASC NULLS FIRST] + +(25) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(26) CometFilter +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Condition : isnotnull(ss_customer_sk#17) + +(27) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Arguments: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19], [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] + +(28) CometScan parquet spark_catalog.default.customer +Output [1]: [c_customer_sk#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(29) CometFilter +Input [1]: [c_customer_sk#21] +Condition : isnotnull(c_customer_sk#21) + +(30) CometBroadcastExchange +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21] + +(31) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] +Right output [1]: [c_customer_sk#21] +Arguments: [ss_customer_sk#17], [c_customer_sk#21], Inner, BuildRight + +(32) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Arguments: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21], [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] + +(33) CometHashAggregate +Input [3]: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Keys [1]: [c_customer_sk#21] +Functions [1]: [partial_sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] + +(34) CometExchange +Input [3]: [c_customer_sk#21, sum#22, isEmpty#23] +Arguments: hashpartitioning(c_customer_sk#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(35) CometHashAggregate +Input [3]: [c_customer_sk#21, sum#22, isEmpty#23] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] + +(36) CometFilter +Input [2]: [c_customer_sk#21, ssales#24] +Condition : (isnotnull(ssales#24) AND (cast(ssales#24 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#25, [id=#26]))) + +(37) CometProject +Input [2]: [c_customer_sk#21, ssales#24] +Arguments: [c_customer_sk#21], [c_customer_sk#21] + +(38) CometSort +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21], [c_customer_sk#21 ASC NULLS FIRST] + +(39) CometSortMergeJoin +Left output [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [c_customer_sk#21] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#21], LeftSemi + +(40) CometProject +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(41) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#27, d_year#28, d_moy#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(42) CometFilter +Input [3]: [d_date_sk#27, d_year#28, d_moy#29] +Condition : ((((isnotnull(d_year#28) AND isnotnull(d_moy#29)) AND (d_year#28 = 2000)) AND (d_moy#29 = 2)) AND isnotnull(d_date_sk#27)) + +(43) CometProject +Input [3]: [d_date_sk#27, d_year#28, d_moy#29] +Arguments: [d_date_sk#27], [d_date_sk#27] + +(44) CometBroadcastExchange +Input [1]: [d_date_sk#27] +Arguments: [d_date_sk#27] + +(45) CometBroadcastHashJoin +Left output [3]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [d_date_sk#27] +Arguments: [cs_sold_date_sk#5], [d_date_sk#27], Inner, BuildRight + +(46) CometProject +Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#27] +Arguments: [sales#30], [(cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4) AS sales#30] + +(47) CometScan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#35)] +ReadSchema: struct + +(48) ReusedExchange [Reuses operator id: 20] +Output [1]: [item_sk#36] + +(49) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Right output [1]: [item_sk#36] +Arguments: [ws_item_sk#31], [item_sk#36], LeftSemi, BuildRight + +(50) CometProject +Input [5]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Arguments: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35], [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] + +(51) CometExchange +Input [4]: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Arguments: hashpartitioning(ws_bill_customer_sk#32, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(52) CometSort +Input [4]: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Arguments: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35], [ws_bill_customer_sk#32 ASC NULLS FIRST] + +(53) ReusedExchange [Reuses operator id: 34] +Output [3]: [c_customer_sk#37, sum#38, isEmpty#39] + +(54) CometHashAggregate +Input [3]: [c_customer_sk#37, sum#38, isEmpty#39] +Keys [1]: [c_customer_sk#37] +Functions [1]: [sum((cast(ss_quantity#40 as decimal(10,0)) * ss_sales_price#41))] + +(55) CometFilter +Input [2]: [c_customer_sk#37, ssales#42] +Condition : (isnotnull(ssales#42) AND (cast(ssales#42 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#25, [id=#26]))) + +(56) CometProject +Input [2]: [c_customer_sk#37, ssales#42] +Arguments: [c_customer_sk#37], [c_customer_sk#37] + +(57) CometSort +Input [1]: [c_customer_sk#37] +Arguments: [c_customer_sk#37], [c_customer_sk#37 ASC NULLS FIRST] + +(58) CometSortMergeJoin +Left output [4]: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Right output [1]: [c_customer_sk#37] +Arguments: [ws_bill_customer_sk#32], [c_customer_sk#37], LeftSemi + +(59) CometProject +Input [4]: [ws_bill_customer_sk#32, ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Arguments: [ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35], [ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] + +(60) ReusedExchange [Reuses operator id: 44] +Output [1]: [d_date_sk#43] + +(61) CometBroadcastHashJoin +Left output [3]: [ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35] +Right output [1]: [d_date_sk#43] +Arguments: [ws_sold_date_sk#35], [d_date_sk#43], Inner, BuildRight + +(62) CometProject +Input [4]: [ws_quantity#33, ws_list_price#34, ws_sold_date_sk#35, d_date_sk#43] +Arguments: [sales#44], [(cast(ws_quantity#33 as decimal(10,0)) * ws_list_price#34) AS sales#44] + +(63) CometUnion +Child 0 Input [1]: [sales#30] +Child 1 Input [1]: [sales#44] + +(64) CometHashAggregate +Input [1]: [sales#30] +Keys: [] +Functions [1]: [partial_sum(sales#30)] + +(65) CometExchange +Input [2]: [sum#45, isEmpty#46] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(66) CometHashAggregate +Input [2]: [sum#45, isEmpty#46] +Keys: [] +Functions [1]: [sum(sales#30)] + +(67) ColumnarToRow [codegen id : 1] +Input [1]: [sum(sales)#47] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 36 Hosting Expression = Subquery scalar-subquery#25, [id=#26] +* ColumnarToRow (85) ++- CometHashAggregate (84) + +- CometExchange (83) + +- CometHashAggregate (82) + +- CometHashAggregate (81) + +- CometExchange (80) + +- CometHashAggregate (79) + +- CometProject (78) + +- CometBroadcastHashJoin (77) + :- CometProject (72) + : +- CometBroadcastHashJoin (71) + : :- CometFilter (69) + : : +- CometScan parquet spark_catalog.default.store_sales (68) + : +- ReusedExchange (70) + +- CometBroadcastExchange (76) + +- CometProject (75) + +- CometFilter (74) + +- CometScan parquet spark_catalog.default.date_dim (73) + + +(68) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#48, ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#51)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(69) CometFilter +Input [4]: [ss_customer_sk#48, ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51] +Condition : isnotnull(ss_customer_sk#48) + +(70) ReusedExchange [Reuses operator id: 30] +Output [1]: [c_customer_sk#52] + +(71) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#48, ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51] +Right output [1]: [c_customer_sk#52] +Arguments: [ss_customer_sk#48], [c_customer_sk#52], Inner, BuildRight + +(72) CometProject +Input [5]: [ss_customer_sk#48, ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51, c_customer_sk#52] +Arguments: [ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51, c_customer_sk#52], [ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51, c_customer_sk#52] + +(73) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#53, d_year#54] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(74) CometFilter +Input [2]: [d_date_sk#53, d_year#54] +Condition : (d_year#54 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#53)) + +(75) CometProject +Input [2]: [d_date_sk#53, d_year#54] +Arguments: [d_date_sk#53], [d_date_sk#53] + +(76) CometBroadcastExchange +Input [1]: [d_date_sk#53] +Arguments: [d_date_sk#53] + +(77) CometBroadcastHashJoin +Left output [4]: [ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51, c_customer_sk#52] +Right output [1]: [d_date_sk#53] +Arguments: [ss_sold_date_sk#51], [d_date_sk#53], Inner, BuildRight + +(78) CometProject +Input [5]: [ss_quantity#49, ss_sales_price#50, ss_sold_date_sk#51, c_customer_sk#52, d_date_sk#53] +Arguments: [ss_quantity#49, ss_sales_price#50, c_customer_sk#52], [ss_quantity#49, ss_sales_price#50, c_customer_sk#52] + +(79) CometHashAggregate +Input [3]: [ss_quantity#49, ss_sales_price#50, c_customer_sk#52] +Keys [1]: [c_customer_sk#52] +Functions [1]: [partial_sum((cast(ss_quantity#49 as decimal(10,0)) * ss_sales_price#50))] + +(80) CometExchange +Input [3]: [c_customer_sk#52, sum#55, isEmpty#56] +Arguments: hashpartitioning(c_customer_sk#52, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(81) CometHashAggregate +Input [3]: [c_customer_sk#52, sum#55, isEmpty#56] +Keys [1]: [c_customer_sk#52] +Functions [1]: [sum((cast(ss_quantity#49 as decimal(10,0)) * ss_sales_price#50))] + +(82) CometHashAggregate +Input [1]: [csales#57] +Keys: [] +Functions [1]: [partial_max(csales#57)] + +(83) CometExchange +Input [1]: [max#58] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(84) CometHashAggregate +Input [1]: [max#58] +Keys: [] +Functions [1]: [max(csales#57)] + +(85) ColumnarToRow [codegen id : 1] +Input [1]: [tpcds_cmax#59] + +Subquery:2 Hosting operator id = 55 Hosting Expression = ReusedSubquery Subquery scalar-subquery#25, [id=#26] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..1f3ec729e7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23a.native_iceberg_compat/simplified.txt @@ -0,0 +1,91 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [sum(sales),sum,isEmpty,sum(sales)] + CometExchange #1 + CometHashAggregate [sum,isEmpty,sales] + CometUnion [sales] + CometProject [cs_quantity,cs_list_price] [sales] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometProject [cs_quantity,cs_list_price,cs_sold_date_sk] + CometSortMergeJoin [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk,c_customer_sk] + CometSort [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometExchange [cs_bill_customer_sk] #2 + CometProject [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk,item_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastExchange [item_sk] #3 + CometProject [item_sk] + CometFilter [item_sk,cnt] + CometHashAggregate [item_sk,cnt,_groupingexpression,i_item_sk,d_date,count,count(1)] + CometExchange [_groupingexpression,i_item_sk,d_date] #4 + CometHashAggregate [_groupingexpression,i_item_sk,d_date,count] + CometProject [i_item_desc] [d_date,i_item_sk,_groupingexpression] + CometBroadcastHashJoin [ss_item_sk,d_date,i_item_sk,i_item_desc] + CometProject [ss_item_sk,d_date] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #5 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_year] + CometBroadcastExchange [i_item_sk,i_item_desc] #6 + CometFilter [i_item_sk,i_item_desc] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + CometSort [c_customer_sk] + CometProject [c_customer_sk] + CometFilter [c_customer_sk,ssales] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [tpcds_cmax,max,max(csales)] + CometExchange #9 + CometHashAggregate [max,csales] + CometHashAggregate [csales,c_customer_sk,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + CometExchange [c_customer_sk] #10 + CometHashAggregate [c_customer_sk,sum,isEmpty,ss_quantity,ss_sales_price] + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk,d_date_sk] + CometProject [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + ReusedExchange [c_customer_sk] #8 + CometBroadcastExchange [d_date_sk] #11 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometHashAggregate [c_customer_sk,ssales,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + CometExchange [c_customer_sk] #7 + CometHashAggregate [c_customer_sk,sum,isEmpty,ss_quantity,ss_sales_price] + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,c_customer_sk] + CometProject [ss_customer_sk,ss_quantity,ss_sales_price] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [c_customer_sk] #8 + CometFilter [c_customer_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk] + CometBroadcastExchange [d_date_sk] #12 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometProject [ws_quantity,ws_list_price] [sales] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometProject [ws_quantity,ws_list_price,ws_sold_date_sk] + CometSortMergeJoin [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,c_customer_sk] + CometSort [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometExchange [ws_bill_customer_sk] #13 + CometProject [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,item_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [item_sk] #3 + CometSort [c_customer_sk] + CometProject [c_customer_sk] + CometFilter [c_customer_sk,ssales] + ReusedSubquery [tpcds_cmax] #1 + CometHashAggregate [c_customer_sk,ssales,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + ReusedExchange [d_date_sk] #12 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_datafusion/explain.txt new file mode 100644 index 0000000000..1611bcf325 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_datafusion/explain.txt @@ -0,0 +1,558 @@ +== Physical Plan == +* ColumnarToRow (87) ++- CometTakeOrderedAndProject (86) + +- CometUnion (85) + :- CometHashAggregate (62) + : +- CometExchange (61) + : +- CometHashAggregate (60) + : +- CometProject (59) + : +- CometBroadcastHashJoin (58) + : :- CometProject (53) + : : +- CometBroadcastHashJoin (52) + : : :- CometSortMergeJoin (40) + : : : :- CometSort (25) + : : : : +- CometExchange (24) + : : : : +- CometProject (23) + : : : : +- CometBroadcastHashJoin (22) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : +- CometBroadcastExchange (21) + : : : : +- CometProject (20) + : : : : +- CometFilter (19) + : : : : +- CometHashAggregate (18) + : : : : +- CometExchange (17) + : : : : +- CometHashAggregate (16) + : : : : +- CometProject (15) + : : : : +- CometBroadcastHashJoin (14) + : : : : :- CometProject (10) + : : : : : +- CometBroadcastHashJoin (9) + : : : : : :- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : : +- CometBroadcastExchange (8) + : : : : : +- CometProject (7) + : : : : : +- CometFilter (6) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (5) + : : : : +- CometBroadcastExchange (13) + : : : : +- CometFilter (12) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (11) + : : : +- CometSort (39) + : : : +- CometProject (38) + : : : +- CometFilter (37) + : : : +- CometHashAggregate (36) + : : : +- CometExchange (35) + : : : +- CometHashAggregate (34) + : : : +- CometProject (33) + : : : +- CometBroadcastHashJoin (32) + : : : :- CometProject (28) + : : : : +- CometFilter (27) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (26) + : : : +- CometBroadcastExchange (31) + : : : +- CometFilter (30) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (29) + : : +- CometBroadcastExchange (51) + : : +- CometSortMergeJoin (50) + : : :- CometSort (44) + : : : +- CometExchange (43) + : : : +- CometFilter (42) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (41) + : : +- CometSort (49) + : : +- CometProject (48) + : : +- CometFilter (47) + : : +- CometHashAggregate (46) + : : +- ReusedExchange (45) + : +- CometBroadcastExchange (57) + : +- CometProject (56) + : +- CometFilter (55) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (54) + +- CometHashAggregate (84) + +- CometExchange (83) + +- CometHashAggregate (82) + +- CometProject (81) + +- CometBroadcastHashJoin (80) + :- CometProject (78) + : +- CometBroadcastHashJoin (77) + : :- CometSortMergeJoin (75) + : : :- CometSort (69) + : : : +- CometExchange (68) + : : : +- CometProject (67) + : : : +- CometBroadcastHashJoin (66) + : : : :- CometFilter (64) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (63) + : : : +- ReusedExchange (65) + : : +- CometSort (74) + : : +- CometProject (73) + : : +- CometFilter (72) + : : +- CometHashAggregate (71) + : : +- ReusedExchange (70) + : +- ReusedExchange (76) + +- ReusedExchange (79) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(2) CometFilter +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_bill_customer_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#6, ss_sold_date_sk#7] + +(4) CometFilter +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_item_sk#6) + +(5) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Arguments: [d_date_sk#8, d_date#9, d_year#10] + +(6) CometFilter +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(7) CometProject +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Arguments: [d_date_sk#8, d_date#9], [d_date_sk#8, d_date#9] + +(8) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: [d_date_sk#8, d_date#9] + +(9) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_date#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(10) CometProject +Input [4]: [ss_item_sk#6, ss_sold_date_sk#7, d_date_sk#8, d_date#9] +Arguments: [ss_item_sk#6, d_date#9], [ss_item_sk#6, d_date#9] + +(11) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [i_item_sk#11, i_item_desc#12] + +(12) CometFilter +Input [2]: [i_item_sk#11, i_item_desc#12] +Condition : isnotnull(i_item_sk#11) + +(13) CometBroadcastExchange +Input [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [i_item_sk#11, i_item_desc#12] + +(14) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, d_date#9] +Right output [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [ss_item_sk#6], [i_item_sk#11], Inner, BuildRight + +(15) CometProject +Input [4]: [ss_item_sk#6, d_date#9, i_item_sk#11, i_item_desc#12] +Arguments: [d_date#9, i_item_sk#11, _groupingexpression#13], [d_date#9, i_item_sk#11, substr(i_item_desc#12, 1, 30) AS _groupingexpression#13] + +(16) CometHashAggregate +Input [3]: [d_date#9, i_item_sk#11, _groupingexpression#13] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [partial_count(1)] + +(17) CometExchange +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Arguments: hashpartitioning(_groupingexpression#13, i_item_sk#11, d_date#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(18) CometHashAggregate +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [count(1)] + +(19) CometFilter +Input [2]: [item_sk#15, cnt#16] +Condition : (cnt#16 > 4) + +(20) CometProject +Input [2]: [item_sk#15, cnt#16] +Arguments: [item_sk#15], [item_sk#15] + +(21) CometBroadcastExchange +Input [1]: [item_sk#15] +Arguments: [item_sk#15] + +(22) CometBroadcastHashJoin +Left output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [item_sk#15] +Arguments: [cs_item_sk#2], [item_sk#15], LeftSemi, BuildRight + +(23) CometProject +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(24) CometExchange +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(25) CometSort +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1 ASC NULLS FIRST] + +(26) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Arguments: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] + +(27) CometFilter +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Condition : isnotnull(ss_customer_sk#17) + +(28) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Arguments: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19], [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] + +(29) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21] + +(30) CometFilter +Input [1]: [c_customer_sk#21] +Condition : isnotnull(c_customer_sk#21) + +(31) CometBroadcastExchange +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21] + +(32) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] +Right output [1]: [c_customer_sk#21] +Arguments: [ss_customer_sk#17], [c_customer_sk#21], Inner, BuildRight + +(33) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Arguments: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21], [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] + +(34) CometHashAggregate +Input [3]: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Keys [1]: [c_customer_sk#21] +Functions [1]: [partial_sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] + +(35) CometExchange +Input [3]: [c_customer_sk#21, sum#22, isEmpty#23] +Arguments: hashpartitioning(c_customer_sk#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(36) CometHashAggregate +Input [3]: [c_customer_sk#21, sum#22, isEmpty#23] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] + +(37) CometFilter +Input [2]: [c_customer_sk#21, ssales#24] +Condition : (isnotnull(ssales#24) AND (cast(ssales#24 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#25, [id=#26]))) + +(38) CometProject +Input [2]: [c_customer_sk#21, ssales#24] +Arguments: [c_customer_sk#21], [c_customer_sk#21] + +(39) CometSort +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21], [c_customer_sk#21 ASC NULLS FIRST] + +(40) CometSortMergeJoin +Left output [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [c_customer_sk#21] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#21], LeftSemi + +(41) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: [c_customer_sk#27, c_first_name#28, c_last_name#29] + +(42) CometFilter +Input [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Condition : isnotnull(c_customer_sk#27) + +(43) CometExchange +Input [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: hashpartitioning(c_customer_sk#27, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(44) CometSort +Input [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: [c_customer_sk#27, c_first_name#28, c_last_name#29], [c_customer_sk#27 ASC NULLS FIRST] + +(45) ReusedExchange [Reuses operator id: 35] +Output [3]: [c_customer_sk#21, sum#22, isEmpty#23] + +(46) CometHashAggregate +Input [3]: [c_customer_sk#21, sum#22, isEmpty#23] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] + +(47) CometFilter +Input [2]: [c_customer_sk#21, ssales#24] +Condition : (isnotnull(ssales#24) AND (cast(ssales#24 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#25, [id=#26]))) + +(48) CometProject +Input [2]: [c_customer_sk#21, ssales#24] +Arguments: [c_customer_sk#21], [c_customer_sk#21] + +(49) CometSort +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21], [c_customer_sk#21 ASC NULLS FIRST] + +(50) CometSortMergeJoin +Left output [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Right output [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#27], [c_customer_sk#21], LeftSemi + +(51) CometBroadcastExchange +Input [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: [c_customer_sk#27, c_first_name#28, c_last_name#29] + +(52) CometBroadcastHashJoin +Left output [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#27], Inner, BuildRight + +(53) CometProject +Input [7]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#28, c_last_name#29], [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#28, c_last_name#29] + +(54) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#30, d_year#31, d_moy#32] +Arguments: [d_date_sk#30, d_year#31, d_moy#32] + +(55) CometFilter +Input [3]: [d_date_sk#30, d_year#31, d_moy#32] +Condition : ((((isnotnull(d_year#31) AND isnotnull(d_moy#32)) AND (d_year#31 = 2000)) AND (d_moy#32 = 2)) AND isnotnull(d_date_sk#30)) + +(56) CometProject +Input [3]: [d_date_sk#30, d_year#31, d_moy#32] +Arguments: [d_date_sk#30], [d_date_sk#30] + +(57) CometBroadcastExchange +Input [1]: [d_date_sk#30] +Arguments: [d_date_sk#30] + +(58) CometBroadcastHashJoin +Left output [5]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#28, c_last_name#29] +Right output [1]: [d_date_sk#30] +Arguments: [cs_sold_date_sk#5], [d_date_sk#30], Inner, BuildRight + +(59) CometProject +Input [6]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#28, c_last_name#29, d_date_sk#30] +Arguments: [cs_quantity#3, cs_list_price#4, c_first_name#28, c_last_name#29], [cs_quantity#3, cs_list_price#4, c_first_name#28, c_last_name#29] + +(60) CometHashAggregate +Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#28, c_last_name#29] +Keys [2]: [c_last_name#29, c_first_name#28] +Functions [1]: [partial_sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))] + +(61) CometExchange +Input [4]: [c_last_name#29, c_first_name#28, sum#33, isEmpty#34] +Arguments: hashpartitioning(c_last_name#29, c_first_name#28, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(62) CometHashAggregate +Input [4]: [c_last_name#29, c_first_name#28, sum#33, isEmpty#34] +Keys [2]: [c_last_name#29, c_first_name#28] +Functions [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))] + +(63) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] + +(64) CometFilter +Input [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Condition : isnotnull(ws_bill_customer_sk#36) + +(65) ReusedExchange [Reuses operator id: 21] +Output [1]: [item_sk#40] + +(66) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Right output [1]: [item_sk#40] +Arguments: [ws_item_sk#35], [item_sk#40], LeftSemi, BuildRight + +(67) CometProject +Input [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39], [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] + +(68) CometExchange +Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: hashpartitioning(ws_bill_customer_sk#36, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(69) CometSort +Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39], [ws_bill_customer_sk#36 ASC NULLS FIRST] + +(70) ReusedExchange [Reuses operator id: 35] +Output [3]: [c_customer_sk#41, sum#42, isEmpty#43] + +(71) CometHashAggregate +Input [3]: [c_customer_sk#41, sum#42, isEmpty#43] +Keys [1]: [c_customer_sk#41] +Functions [1]: [sum((cast(ss_quantity#44 as decimal(10,0)) * ss_sales_price#45))] + +(72) CometFilter +Input [2]: [c_customer_sk#41, ssales#46] +Condition : (isnotnull(ssales#46) AND (cast(ssales#46 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#25, [id=#26]))) + +(73) CometProject +Input [2]: [c_customer_sk#41, ssales#46] +Arguments: [c_customer_sk#41], [c_customer_sk#41] + +(74) CometSort +Input [1]: [c_customer_sk#41] +Arguments: [c_customer_sk#41], [c_customer_sk#41 ASC NULLS FIRST] + +(75) CometSortMergeJoin +Left output [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Right output [1]: [c_customer_sk#41] +Arguments: [ws_bill_customer_sk#36], [c_customer_sk#41], LeftSemi + +(76) ReusedExchange [Reuses operator id: 51] +Output [3]: [c_customer_sk#47, c_first_name#48, c_last_name#49] + +(77) CometBroadcastHashJoin +Left output [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Right output [3]: [c_customer_sk#47, c_first_name#48, c_last_name#49] +Arguments: [ws_bill_customer_sk#36], [c_customer_sk#47], Inner, BuildRight + +(78) CometProject +Input [7]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, c_customer_sk#47, c_first_name#48, c_last_name#49] +Arguments: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, c_first_name#48, c_last_name#49], [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, c_first_name#48, c_last_name#49] + +(79) ReusedExchange [Reuses operator id: 57] +Output [1]: [d_date_sk#50] + +(80) CometBroadcastHashJoin +Left output [5]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, c_first_name#48, c_last_name#49] +Right output [1]: [d_date_sk#50] +Arguments: [ws_sold_date_sk#39], [d_date_sk#50], Inner, BuildRight + +(81) CometProject +Input [6]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, c_first_name#48, c_last_name#49, d_date_sk#50] +Arguments: [ws_quantity#37, ws_list_price#38, c_first_name#48, c_last_name#49], [ws_quantity#37, ws_list_price#38, c_first_name#48, c_last_name#49] + +(82) CometHashAggregate +Input [4]: [ws_quantity#37, ws_list_price#38, c_first_name#48, c_last_name#49] +Keys [2]: [c_last_name#49, c_first_name#48] +Functions [1]: [partial_sum((cast(ws_quantity#37 as decimal(10,0)) * ws_list_price#38))] + +(83) CometExchange +Input [4]: [c_last_name#49, c_first_name#48, sum#51, isEmpty#52] +Arguments: hashpartitioning(c_last_name#49, c_first_name#48, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(84) CometHashAggregate +Input [4]: [c_last_name#49, c_first_name#48, sum#51, isEmpty#52] +Keys [2]: [c_last_name#49, c_first_name#48] +Functions [1]: [sum((cast(ws_quantity#37 as decimal(10,0)) * ws_list_price#38))] + +(85) CometUnion +Child 0 Input [3]: [c_last_name#29, c_first_name#28, sales#53] +Child 1 Input [3]: [c_last_name#49, c_first_name#48, sales#54] + +(86) CometTakeOrderedAndProject +Input [3]: [c_last_name#29, c_first_name#28, sales#53] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_last_name#29 ASC NULLS FIRST,c_first_name#28 ASC NULLS FIRST,sales#53 ASC NULLS FIRST], output=[c_last_name#29,c_first_name#28,sales#53]), [c_last_name#29, c_first_name#28, sales#53], 100, [c_last_name#29 ASC NULLS FIRST, c_first_name#28 ASC NULLS FIRST, sales#53 ASC NULLS FIRST], [c_last_name#29, c_first_name#28, sales#53] + +(87) ColumnarToRow [codegen id : 1] +Input [3]: [c_last_name#29, c_first_name#28, sales#53] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 37 Hosting Expression = Subquery scalar-subquery#25, [id=#26] +* ColumnarToRow (105) ++- CometHashAggregate (104) + +- CometExchange (103) + +- CometHashAggregate (102) + +- CometHashAggregate (101) + +- CometExchange (100) + +- CometHashAggregate (99) + +- CometProject (98) + +- CometBroadcastHashJoin (97) + :- CometProject (92) + : +- CometBroadcastHashJoin (91) + : :- CometFilter (89) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (88) + : +- ReusedExchange (90) + +- CometBroadcastExchange (96) + +- CometProject (95) + +- CometFilter (94) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (93) + + +(88) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58] +Arguments: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58] + +(89) CometFilter +Input [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58] +Condition : isnotnull(ss_customer_sk#55) + +(90) ReusedExchange [Reuses operator id: 31] +Output [1]: [c_customer_sk#59] + +(91) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58] +Right output [1]: [c_customer_sk#59] +Arguments: [ss_customer_sk#55], [c_customer_sk#59], Inner, BuildRight + +(92) CometProject +Input [5]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, c_customer_sk#59] +Arguments: [ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, c_customer_sk#59], [ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, c_customer_sk#59] + +(93) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#60, d_year#61] +Arguments: [d_date_sk#60, d_year#61] + +(94) CometFilter +Input [2]: [d_date_sk#60, d_year#61] +Condition : (d_year#61 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#60)) + +(95) CometProject +Input [2]: [d_date_sk#60, d_year#61] +Arguments: [d_date_sk#60], [d_date_sk#60] + +(96) CometBroadcastExchange +Input [1]: [d_date_sk#60] +Arguments: [d_date_sk#60] + +(97) CometBroadcastHashJoin +Left output [4]: [ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, c_customer_sk#59] +Right output [1]: [d_date_sk#60] +Arguments: [ss_sold_date_sk#58], [d_date_sk#60], Inner, BuildRight + +(98) CometProject +Input [5]: [ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, c_customer_sk#59, d_date_sk#60] +Arguments: [ss_quantity#56, ss_sales_price#57, c_customer_sk#59], [ss_quantity#56, ss_sales_price#57, c_customer_sk#59] + +(99) CometHashAggregate +Input [3]: [ss_quantity#56, ss_sales_price#57, c_customer_sk#59] +Keys [1]: [c_customer_sk#59] +Functions [1]: [partial_sum((cast(ss_quantity#56 as decimal(10,0)) * ss_sales_price#57))] + +(100) CometExchange +Input [3]: [c_customer_sk#59, sum#62, isEmpty#63] +Arguments: hashpartitioning(c_customer_sk#59, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(101) CometHashAggregate +Input [3]: [c_customer_sk#59, sum#62, isEmpty#63] +Keys [1]: [c_customer_sk#59] +Functions [1]: [sum((cast(ss_quantity#56 as decimal(10,0)) * ss_sales_price#57))] + +(102) CometHashAggregate +Input [1]: [csales#64] +Keys: [] +Functions [1]: [partial_max(csales#64)] + +(103) CometExchange +Input [1]: [max#65] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(104) CometHashAggregate +Input [1]: [max#65] +Keys: [] +Functions [1]: [max(csales#64)] + +(105) ColumnarToRow [codegen id : 1] +Input [1]: [tpcds_cmax#66] + +Subquery:2 Hosting operator id = 47 Hosting Expression = ReusedSubquery Subquery scalar-subquery#25, [id=#26] + +Subquery:3 Hosting operator id = 72 Hosting Expression = ReusedSubquery Subquery scalar-subquery#25, [id=#26] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..85aa2ac471 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_datafusion/simplified.txt @@ -0,0 +1,112 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_last_name,c_first_name,sales] + CometUnion [c_last_name,c_first_name,sales] + CometHashAggregate [c_last_name,c_first_name,sales,sum,isEmpty,sum((cast(cs_quantity as decimal(10,0)) * cs_list_price))] + CometExchange [c_last_name,c_first_name] #1 + CometHashAggregate [c_last_name,c_first_name,sum,isEmpty,cs_quantity,cs_list_price] + CometProject [cs_quantity,cs_list_price,c_first_name,c_last_name] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,c_first_name,c_last_name,d_date_sk] + CometProject [cs_quantity,cs_list_price,cs_sold_date_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk,c_customer_sk,c_first_name,c_last_name] + CometSortMergeJoin [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk,c_customer_sk] + CometSort [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometExchange [cs_bill_customer_sk] #2 + CometProject [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk,item_sk] + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastExchange [item_sk] #3 + CometProject [item_sk] + CometFilter [item_sk,cnt] + CometHashAggregate [item_sk,cnt,_groupingexpression,i_item_sk,d_date,count,count(1)] + CometExchange [_groupingexpression,i_item_sk,d_date] #4 + CometHashAggregate [_groupingexpression,i_item_sk,d_date,count] + CometProject [i_item_desc] [d_date,i_item_sk,_groupingexpression] + CometBroadcastHashJoin [ss_item_sk,d_date,i_item_sk,i_item_desc] + CometProject [ss_item_sk,d_date] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #5 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_year] + CometBroadcastExchange [i_item_sk,i_item_desc] #6 + CometFilter [i_item_sk,i_item_desc] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_desc] + CometSort [c_customer_sk] + CometProject [c_customer_sk] + CometFilter [c_customer_sk,ssales] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [tpcds_cmax,max,max(csales)] + CometExchange #9 + CometHashAggregate [max,csales] + CometHashAggregate [csales,c_customer_sk,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + CometExchange [c_customer_sk] #10 + CometHashAggregate [c_customer_sk,sum,isEmpty,ss_quantity,ss_sales_price] + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk,d_date_sk] + CometProject [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + ReusedExchange [c_customer_sk] #8 + CometBroadcastExchange [d_date_sk] #11 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometHashAggregate [c_customer_sk,ssales,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + CometExchange [c_customer_sk] #7 + CometHashAggregate [c_customer_sk,sum,isEmpty,ss_quantity,ss_sales_price] + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,c_customer_sk] + CometProject [ss_customer_sk,ss_quantity,ss_sales_price] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [c_customer_sk] #8 + CometFilter [c_customer_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name] #12 + CometSortMergeJoin [c_customer_sk,c_first_name,c_last_name,c_customer_sk] + CometSort [c_customer_sk,c_first_name,c_last_name] + CometExchange [c_customer_sk] #13 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name] + CometSort [c_customer_sk] + CometProject [c_customer_sk] + CometFilter [c_customer_sk,ssales] + ReusedSubquery [tpcds_cmax] #1 + CometHashAggregate [c_customer_sk,ssales,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + CometBroadcastExchange [d_date_sk] #14 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometHashAggregate [c_last_name,c_first_name,sales,sum,isEmpty,sum((cast(ws_quantity as decimal(10,0)) * ws_list_price))] + CometExchange [c_last_name,c_first_name] #15 + CometHashAggregate [c_last_name,c_first_name,sum,isEmpty,ws_quantity,ws_list_price] + CometProject [ws_quantity,ws_list_price,c_first_name,c_last_name] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,c_first_name,c_last_name,d_date_sk] + CometProject [ws_quantity,ws_list_price,ws_sold_date_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,c_customer_sk,c_first_name,c_last_name] + CometSortMergeJoin [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,c_customer_sk] + CometSort [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometExchange [ws_bill_customer_sk] #16 + CometProject [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,item_sk] + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [item_sk] #3 + CometSort [c_customer_sk] + CometProject [c_customer_sk] + CometFilter [c_customer_sk,ssales] + ReusedSubquery [tpcds_cmax] #1 + CometHashAggregate [c_customer_sk,ssales,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #12 + ReusedExchange [d_date_sk] #14 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..07b298afcd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_iceberg_compat/explain.txt @@ -0,0 +1,595 @@ +== Physical Plan == +* ColumnarToRow (87) ++- CometTakeOrderedAndProject (86) + +- CometUnion (85) + :- CometHashAggregate (62) + : +- CometExchange (61) + : +- CometHashAggregate (60) + : +- CometProject (59) + : +- CometBroadcastHashJoin (58) + : :- CometProject (53) + : : +- CometBroadcastHashJoin (52) + : : :- CometSortMergeJoin (40) + : : : :- CometSort (25) + : : : : +- CometExchange (24) + : : : : +- CometProject (23) + : : : : +- CometBroadcastHashJoin (22) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : : +- CometBroadcastExchange (21) + : : : : +- CometProject (20) + : : : : +- CometFilter (19) + : : : : +- CometHashAggregate (18) + : : : : +- CometExchange (17) + : : : : +- CometHashAggregate (16) + : : : : +- CometProject (15) + : : : : +- CometBroadcastHashJoin (14) + : : : : :- CometProject (10) + : : : : : +- CometBroadcastHashJoin (9) + : : : : : :- CometFilter (4) + : : : : : : +- CometScan parquet spark_catalog.default.store_sales (3) + : : : : : +- CometBroadcastExchange (8) + : : : : : +- CometProject (7) + : : : : : +- CometFilter (6) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (5) + : : : : +- CometBroadcastExchange (13) + : : : : +- CometFilter (12) + : : : : +- CometScan parquet spark_catalog.default.item (11) + : : : +- CometSort (39) + : : : +- CometProject (38) + : : : +- CometFilter (37) + : : : +- CometHashAggregate (36) + : : : +- CometExchange (35) + : : : +- CometHashAggregate (34) + : : : +- CometProject (33) + : : : +- CometBroadcastHashJoin (32) + : : : :- CometProject (28) + : : : : +- CometFilter (27) + : : : : +- CometScan parquet spark_catalog.default.store_sales (26) + : : : +- CometBroadcastExchange (31) + : : : +- CometFilter (30) + : : : +- CometScan parquet spark_catalog.default.customer (29) + : : +- CometBroadcastExchange (51) + : : +- CometSortMergeJoin (50) + : : :- CometSort (44) + : : : +- CometExchange (43) + : : : +- CometFilter (42) + : : : +- CometScan parquet spark_catalog.default.customer (41) + : : +- CometSort (49) + : : +- CometProject (48) + : : +- CometFilter (47) + : : +- CometHashAggregate (46) + : : +- ReusedExchange (45) + : +- CometBroadcastExchange (57) + : +- CometProject (56) + : +- CometFilter (55) + : +- CometScan parquet spark_catalog.default.date_dim (54) + +- CometHashAggregate (84) + +- CometExchange (83) + +- CometHashAggregate (82) + +- CometProject (81) + +- CometBroadcastHashJoin (80) + :- CometProject (78) + : +- CometBroadcastHashJoin (77) + : :- CometSortMergeJoin (75) + : : :- CometSort (69) + : : : +- CometExchange (68) + : : : +- CometProject (67) + : : : +- CometBroadcastHashJoin (66) + : : : :- CometFilter (64) + : : : : +- CometScan parquet spark_catalog.default.web_sales (63) + : : : +- ReusedExchange (65) + : : +- CometSort (74) + : : +- CometProject (73) + : : +- CometFilter (72) + : : +- CometHashAggregate (71) + : : +- ReusedExchange (70) + : +- ReusedExchange (76) + +- ReusedExchange (79) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_bill_customer_sk#1) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_item_sk#6) + +(5) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(6) CometFilter +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(7) CometProject +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Arguments: [d_date_sk#8, d_date#9], [d_date_sk#8, d_date#9] + +(8) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: [d_date_sk#8, d_date#9] + +(9) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_date#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(10) CometProject +Input [4]: [ss_item_sk#6, ss_sold_date_sk#7, d_date_sk#8, d_date#9] +Arguments: [ss_item_sk#6, d_date#9], [ss_item_sk#6, d_date#9] + +(11) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#11, i_item_desc#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) CometFilter +Input [2]: [i_item_sk#11, i_item_desc#12] +Condition : isnotnull(i_item_sk#11) + +(13) CometBroadcastExchange +Input [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [i_item_sk#11, i_item_desc#12] + +(14) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, d_date#9] +Right output [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [ss_item_sk#6], [i_item_sk#11], Inner, BuildRight + +(15) CometProject +Input [4]: [ss_item_sk#6, d_date#9, i_item_sk#11, i_item_desc#12] +Arguments: [d_date#9, i_item_sk#11, _groupingexpression#13], [d_date#9, i_item_sk#11, substr(i_item_desc#12, 1, 30) AS _groupingexpression#13] + +(16) CometHashAggregate +Input [3]: [d_date#9, i_item_sk#11, _groupingexpression#13] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [partial_count(1)] + +(17) CometExchange +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Arguments: hashpartitioning(_groupingexpression#13, i_item_sk#11, d_date#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(18) CometHashAggregate +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [count(1)] + +(19) CometFilter +Input [2]: [item_sk#15, cnt#16] +Condition : (cnt#16 > 4) + +(20) CometProject +Input [2]: [item_sk#15, cnt#16] +Arguments: [item_sk#15], [item_sk#15] + +(21) CometBroadcastExchange +Input [1]: [item_sk#15] +Arguments: [item_sk#15] + +(22) CometBroadcastHashJoin +Left output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [item_sk#15] +Arguments: [cs_item_sk#2], [item_sk#15], LeftSemi, BuildRight + +(23) CometProject +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(24) CometExchange +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(25) CometSort +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1 ASC NULLS FIRST] + +(26) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(27) CometFilter +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Condition : isnotnull(ss_customer_sk#17) + +(28) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Arguments: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19], [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] + +(29) CometScan parquet spark_catalog.default.customer +Output [1]: [c_customer_sk#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) CometFilter +Input [1]: [c_customer_sk#21] +Condition : isnotnull(c_customer_sk#21) + +(31) CometBroadcastExchange +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21] + +(32) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] +Right output [1]: [c_customer_sk#21] +Arguments: [ss_customer_sk#17], [c_customer_sk#21], Inner, BuildRight + +(33) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Arguments: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21], [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] + +(34) CometHashAggregate +Input [3]: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Keys [1]: [c_customer_sk#21] +Functions [1]: [partial_sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] + +(35) CometExchange +Input [3]: [c_customer_sk#21, sum#22, isEmpty#23] +Arguments: hashpartitioning(c_customer_sk#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(36) CometHashAggregate +Input [3]: [c_customer_sk#21, sum#22, isEmpty#23] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] + +(37) CometFilter +Input [2]: [c_customer_sk#21, ssales#24] +Condition : (isnotnull(ssales#24) AND (cast(ssales#24 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#25, [id=#26]))) + +(38) CometProject +Input [2]: [c_customer_sk#21, ssales#24] +Arguments: [c_customer_sk#21], [c_customer_sk#21] + +(39) CometSort +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21], [c_customer_sk#21 ASC NULLS FIRST] + +(40) CometSortMergeJoin +Left output [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [c_customer_sk#21] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#21], LeftSemi + +(41) CometScan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(42) CometFilter +Input [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Condition : isnotnull(c_customer_sk#27) + +(43) CometExchange +Input [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: hashpartitioning(c_customer_sk#27, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(44) CometSort +Input [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: [c_customer_sk#27, c_first_name#28, c_last_name#29], [c_customer_sk#27 ASC NULLS FIRST] + +(45) ReusedExchange [Reuses operator id: 35] +Output [3]: [c_customer_sk#21, sum#22, isEmpty#23] + +(46) CometHashAggregate +Input [3]: [c_customer_sk#21, sum#22, isEmpty#23] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] + +(47) CometFilter +Input [2]: [c_customer_sk#21, ssales#24] +Condition : (isnotnull(ssales#24) AND (cast(ssales#24 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#25, [id=#26]))) + +(48) CometProject +Input [2]: [c_customer_sk#21, ssales#24] +Arguments: [c_customer_sk#21], [c_customer_sk#21] + +(49) CometSort +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21], [c_customer_sk#21 ASC NULLS FIRST] + +(50) CometSortMergeJoin +Left output [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Right output [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#27], [c_customer_sk#21], LeftSemi + +(51) CometBroadcastExchange +Input [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: [c_customer_sk#27, c_first_name#28, c_last_name#29] + +(52) CometBroadcastHashJoin +Left output [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [3]: [c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#27], Inner, BuildRight + +(53) CometProject +Input [7]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_customer_sk#27, c_first_name#28, c_last_name#29] +Arguments: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#28, c_last_name#29], [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#28, c_last_name#29] + +(54) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#30, d_year#31, d_moy#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(55) CometFilter +Input [3]: [d_date_sk#30, d_year#31, d_moy#32] +Condition : ((((isnotnull(d_year#31) AND isnotnull(d_moy#32)) AND (d_year#31 = 2000)) AND (d_moy#32 = 2)) AND isnotnull(d_date_sk#30)) + +(56) CometProject +Input [3]: [d_date_sk#30, d_year#31, d_moy#32] +Arguments: [d_date_sk#30], [d_date_sk#30] + +(57) CometBroadcastExchange +Input [1]: [d_date_sk#30] +Arguments: [d_date_sk#30] + +(58) CometBroadcastHashJoin +Left output [5]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#28, c_last_name#29] +Right output [1]: [d_date_sk#30] +Arguments: [cs_sold_date_sk#5], [d_date_sk#30], Inner, BuildRight + +(59) CometProject +Input [6]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#28, c_last_name#29, d_date_sk#30] +Arguments: [cs_quantity#3, cs_list_price#4, c_first_name#28, c_last_name#29], [cs_quantity#3, cs_list_price#4, c_first_name#28, c_last_name#29] + +(60) CometHashAggregate +Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#28, c_last_name#29] +Keys [2]: [c_last_name#29, c_first_name#28] +Functions [1]: [partial_sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))] + +(61) CometExchange +Input [4]: [c_last_name#29, c_first_name#28, sum#33, isEmpty#34] +Arguments: hashpartitioning(c_last_name#29, c_first_name#28, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(62) CometHashAggregate +Input [4]: [c_last_name#29, c_first_name#28, sum#33, isEmpty#34] +Keys [2]: [c_last_name#29, c_first_name#28] +Functions [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))] + +(63) CometScan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#39)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(64) CometFilter +Input [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Condition : isnotnull(ws_bill_customer_sk#36) + +(65) ReusedExchange [Reuses operator id: 21] +Output [1]: [item_sk#40] + +(66) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Right output [1]: [item_sk#40] +Arguments: [ws_item_sk#35], [item_sk#40], LeftSemi, BuildRight + +(67) CometProject +Input [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39], [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] + +(68) CometExchange +Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: hashpartitioning(ws_bill_customer_sk#36, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(69) CometSort +Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39], [ws_bill_customer_sk#36 ASC NULLS FIRST] + +(70) ReusedExchange [Reuses operator id: 35] +Output [3]: [c_customer_sk#41, sum#42, isEmpty#43] + +(71) CometHashAggregate +Input [3]: [c_customer_sk#41, sum#42, isEmpty#43] +Keys [1]: [c_customer_sk#41] +Functions [1]: [sum((cast(ss_quantity#44 as decimal(10,0)) * ss_sales_price#45))] + +(72) CometFilter +Input [2]: [c_customer_sk#41, ssales#46] +Condition : (isnotnull(ssales#46) AND (cast(ssales#46 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#25, [id=#26]))) + +(73) CometProject +Input [2]: [c_customer_sk#41, ssales#46] +Arguments: [c_customer_sk#41], [c_customer_sk#41] + +(74) CometSort +Input [1]: [c_customer_sk#41] +Arguments: [c_customer_sk#41], [c_customer_sk#41 ASC NULLS FIRST] + +(75) CometSortMergeJoin +Left output [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Right output [1]: [c_customer_sk#41] +Arguments: [ws_bill_customer_sk#36], [c_customer_sk#41], LeftSemi + +(76) ReusedExchange [Reuses operator id: 51] +Output [3]: [c_customer_sk#47, c_first_name#48, c_last_name#49] + +(77) CometBroadcastHashJoin +Left output [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Right output [3]: [c_customer_sk#47, c_first_name#48, c_last_name#49] +Arguments: [ws_bill_customer_sk#36], [c_customer_sk#47], Inner, BuildRight + +(78) CometProject +Input [7]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, c_customer_sk#47, c_first_name#48, c_last_name#49] +Arguments: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, c_first_name#48, c_last_name#49], [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, c_first_name#48, c_last_name#49] + +(79) ReusedExchange [Reuses operator id: 57] +Output [1]: [d_date_sk#50] + +(80) CometBroadcastHashJoin +Left output [5]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, c_first_name#48, c_last_name#49] +Right output [1]: [d_date_sk#50] +Arguments: [ws_sold_date_sk#39], [d_date_sk#50], Inner, BuildRight + +(81) CometProject +Input [6]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, c_first_name#48, c_last_name#49, d_date_sk#50] +Arguments: [ws_quantity#37, ws_list_price#38, c_first_name#48, c_last_name#49], [ws_quantity#37, ws_list_price#38, c_first_name#48, c_last_name#49] + +(82) CometHashAggregate +Input [4]: [ws_quantity#37, ws_list_price#38, c_first_name#48, c_last_name#49] +Keys [2]: [c_last_name#49, c_first_name#48] +Functions [1]: [partial_sum((cast(ws_quantity#37 as decimal(10,0)) * ws_list_price#38))] + +(83) CometExchange +Input [4]: [c_last_name#49, c_first_name#48, sum#51, isEmpty#52] +Arguments: hashpartitioning(c_last_name#49, c_first_name#48, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(84) CometHashAggregate +Input [4]: [c_last_name#49, c_first_name#48, sum#51, isEmpty#52] +Keys [2]: [c_last_name#49, c_first_name#48] +Functions [1]: [sum((cast(ws_quantity#37 as decimal(10,0)) * ws_list_price#38))] + +(85) CometUnion +Child 0 Input [3]: [c_last_name#29, c_first_name#28, sales#53] +Child 1 Input [3]: [c_last_name#49, c_first_name#48, sales#54] + +(86) CometTakeOrderedAndProject +Input [3]: [c_last_name#29, c_first_name#28, sales#53] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_last_name#29 ASC NULLS FIRST,c_first_name#28 ASC NULLS FIRST,sales#53 ASC NULLS FIRST], output=[c_last_name#29,c_first_name#28,sales#53]), [c_last_name#29, c_first_name#28, sales#53], 100, [c_last_name#29 ASC NULLS FIRST, c_first_name#28 ASC NULLS FIRST, sales#53 ASC NULLS FIRST], [c_last_name#29, c_first_name#28, sales#53] + +(87) ColumnarToRow [codegen id : 1] +Input [3]: [c_last_name#29, c_first_name#28, sales#53] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 37 Hosting Expression = Subquery scalar-subquery#25, [id=#26] +* ColumnarToRow (105) ++- CometHashAggregate (104) + +- CometExchange (103) + +- CometHashAggregate (102) + +- CometHashAggregate (101) + +- CometExchange (100) + +- CometHashAggregate (99) + +- CometProject (98) + +- CometBroadcastHashJoin (97) + :- CometProject (92) + : +- CometBroadcastHashJoin (91) + : :- CometFilter (89) + : : +- CometScan parquet spark_catalog.default.store_sales (88) + : +- ReusedExchange (90) + +- CometBroadcastExchange (96) + +- CometProject (95) + +- CometFilter (94) + +- CometScan parquet spark_catalog.default.date_dim (93) + + +(88) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#58)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(89) CometFilter +Input [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58] +Condition : isnotnull(ss_customer_sk#55) + +(90) ReusedExchange [Reuses operator id: 31] +Output [1]: [c_customer_sk#59] + +(91) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58] +Right output [1]: [c_customer_sk#59] +Arguments: [ss_customer_sk#55], [c_customer_sk#59], Inner, BuildRight + +(92) CometProject +Input [5]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, c_customer_sk#59] +Arguments: [ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, c_customer_sk#59], [ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, c_customer_sk#59] + +(93) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#60, d_year#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(94) CometFilter +Input [2]: [d_date_sk#60, d_year#61] +Condition : (d_year#61 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#60)) + +(95) CometProject +Input [2]: [d_date_sk#60, d_year#61] +Arguments: [d_date_sk#60], [d_date_sk#60] + +(96) CometBroadcastExchange +Input [1]: [d_date_sk#60] +Arguments: [d_date_sk#60] + +(97) CometBroadcastHashJoin +Left output [4]: [ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, c_customer_sk#59] +Right output [1]: [d_date_sk#60] +Arguments: [ss_sold_date_sk#58], [d_date_sk#60], Inner, BuildRight + +(98) CometProject +Input [5]: [ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, c_customer_sk#59, d_date_sk#60] +Arguments: [ss_quantity#56, ss_sales_price#57, c_customer_sk#59], [ss_quantity#56, ss_sales_price#57, c_customer_sk#59] + +(99) CometHashAggregate +Input [3]: [ss_quantity#56, ss_sales_price#57, c_customer_sk#59] +Keys [1]: [c_customer_sk#59] +Functions [1]: [partial_sum((cast(ss_quantity#56 as decimal(10,0)) * ss_sales_price#57))] + +(100) CometExchange +Input [3]: [c_customer_sk#59, sum#62, isEmpty#63] +Arguments: hashpartitioning(c_customer_sk#59, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(101) CometHashAggregate +Input [3]: [c_customer_sk#59, sum#62, isEmpty#63] +Keys [1]: [c_customer_sk#59] +Functions [1]: [sum((cast(ss_quantity#56 as decimal(10,0)) * ss_sales_price#57))] + +(102) CometHashAggregate +Input [1]: [csales#64] +Keys: [] +Functions [1]: [partial_max(csales#64)] + +(103) CometExchange +Input [1]: [max#65] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(104) CometHashAggregate +Input [1]: [max#65] +Keys: [] +Functions [1]: [max(csales#64)] + +(105) ColumnarToRow [codegen id : 1] +Input [1]: [tpcds_cmax#66] + +Subquery:2 Hosting operator id = 47 Hosting Expression = ReusedSubquery Subquery scalar-subquery#25, [id=#26] + +Subquery:3 Hosting operator id = 72 Hosting Expression = ReusedSubquery Subquery scalar-subquery#25, [id=#26] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..3784e93301 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q23b.native_iceberg_compat/simplified.txt @@ -0,0 +1,112 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_last_name,c_first_name,sales] + CometUnion [c_last_name,c_first_name,sales] + CometHashAggregate [c_last_name,c_first_name,sales,sum,isEmpty,sum((cast(cs_quantity as decimal(10,0)) * cs_list_price))] + CometExchange [c_last_name,c_first_name] #1 + CometHashAggregate [c_last_name,c_first_name,sum,isEmpty,cs_quantity,cs_list_price] + CometProject [cs_quantity,cs_list_price,c_first_name,c_last_name] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,c_first_name,c_last_name,d_date_sk] + CometProject [cs_quantity,cs_list_price,cs_sold_date_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk,c_customer_sk,c_first_name,c_last_name] + CometSortMergeJoin [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk,c_customer_sk] + CometSort [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometExchange [cs_bill_customer_sk] #2 + CometProject [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk,item_sk] + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastExchange [item_sk] #3 + CometProject [item_sk] + CometFilter [item_sk,cnt] + CometHashAggregate [item_sk,cnt,_groupingexpression,i_item_sk,d_date,count,count(1)] + CometExchange [_groupingexpression,i_item_sk,d_date] #4 + CometHashAggregate [_groupingexpression,i_item_sk,d_date,count] + CometProject [i_item_desc] [d_date,i_item_sk,_groupingexpression] + CometBroadcastHashJoin [ss_item_sk,d_date,i_item_sk,i_item_desc] + CometProject [ss_item_sk,d_date] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #5 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_year] + CometBroadcastExchange [i_item_sk,i_item_desc] #6 + CometFilter [i_item_sk,i_item_desc] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + CometSort [c_customer_sk] + CometProject [c_customer_sk] + CometFilter [c_customer_sk,ssales] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [tpcds_cmax,max,max(csales)] + CometExchange #9 + CometHashAggregate [max,csales] + CometHashAggregate [csales,c_customer_sk,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + CometExchange [c_customer_sk] #10 + CometHashAggregate [c_customer_sk,sum,isEmpty,ss_quantity,ss_sales_price] + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk,d_date_sk] + CometProject [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + ReusedExchange [c_customer_sk] #8 + CometBroadcastExchange [d_date_sk] #11 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometHashAggregate [c_customer_sk,ssales,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + CometExchange [c_customer_sk] #7 + CometHashAggregate [c_customer_sk,sum,isEmpty,ss_quantity,ss_sales_price] + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,c_customer_sk] + CometProject [ss_customer_sk,ss_quantity,ss_sales_price] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [c_customer_sk] #8 + CometFilter [c_customer_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name] #12 + CometSortMergeJoin [c_customer_sk,c_first_name,c_last_name,c_customer_sk] + CometSort [c_customer_sk,c_first_name,c_last_name] + CometExchange [c_customer_sk] #13 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] + CometSort [c_customer_sk] + CometProject [c_customer_sk] + CometFilter [c_customer_sk,ssales] + ReusedSubquery [tpcds_cmax] #1 + CometHashAggregate [c_customer_sk,ssales,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + CometBroadcastExchange [d_date_sk] #14 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometHashAggregate [c_last_name,c_first_name,sales,sum,isEmpty,sum((cast(ws_quantity as decimal(10,0)) * ws_list_price))] + CometExchange [c_last_name,c_first_name] #15 + CometHashAggregate [c_last_name,c_first_name,sum,isEmpty,ws_quantity,ws_list_price] + CometProject [ws_quantity,ws_list_price,c_first_name,c_last_name] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,c_first_name,c_last_name,d_date_sk] + CometProject [ws_quantity,ws_list_price,ws_sold_date_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,c_customer_sk,c_first_name,c_last_name] + CometSortMergeJoin [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,c_customer_sk] + CometSort [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometExchange [ws_bill_customer_sk] #16 + CometProject [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,item_sk] + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [item_sk] #3 + CometSort [c_customer_sk] + CometProject [c_customer_sk] + CometFilter [c_customer_sk,ssales] + ReusedSubquery [tpcds_cmax] #1 + CometHashAggregate [c_customer_sk,ssales,sum,isEmpty,sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price))] + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #12 + ReusedExchange [d_date_sk] #14 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_datafusion/explain.txt new file mode 100644 index 0000000000..5d63cafbe5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_datafusion/explain.txt @@ -0,0 +1,382 @@ +== Physical Plan == +* Filter (42) ++- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * ColumnarToRow (29) + : +- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (12) + : : : : +- CometSortMergeJoin (11) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometSort (10) + : : : : +- CometExchange (9) + : : : : +- CometProject (8) + : : : : +- CometFilter (7) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (6) + : : : +- CometBroadcastExchange (16) + : : : +- CometProject (15) + : : : +- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (13) + : : +- CometBroadcastExchange (21) + : : +- CometFilter (20) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (19) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(3) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(4) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(7) CometFilter +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(8) CometProject +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_item_sk#7, sr_ticket_number#8] + +(9) CometExchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST] + +(11) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Right output [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_ticket_number#4, ss_item_sk#1], [sr_ticket_number#8, sr_item_sk#7], Inner + +(12) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] + +(13) CometNativeScan: `spark_catalog`.`default`.`store` +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(14) CometFilter +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(15) CometProject +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14], [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(16) CometBroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Right output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(18) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14], [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] + +(19) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(20) CometFilter +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) + +(21) CometBroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(22) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Right output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(23) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20], [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(25) CometFilter +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) + +(26) CometBroadcastExchange +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(27) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Right output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_customer_sk#2], [c_customer_sk#21], Inner, BuildRight + +(28) CometProject +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24], [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(29) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: [ca_state#25, ca_zip#26, ca_country#27] + +(31) CometFilter +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(32) ColumnarToRow [codegen id : 1] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(33) BroadcastExchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 2] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(36) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] + +(37) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(39) HashAggregate [codegen id : 3] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] + +(40) Exchange +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(41) HashAggregate [codegen id : 4] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] + +(42) Filter [codegen id : 4] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (69) ++- Exchange (68) + +- * HashAggregate (67) + +- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * Project (63) + +- * BroadcastHashJoin Inner BuildRight (62) + :- * ColumnarToRow (60) + : +- CometProject (59) + : +- CometBroadcastHashJoin (58) + : :- CometProject (56) + : : +- CometBroadcastHashJoin (55) + : : :- CometProject (51) + : : : +- CometBroadcastHashJoin (50) + : : : :- CometProject (48) + : : : : +- CometSortMergeJoin (47) + : : : : :- CometSort (44) + : : : : : +- ReusedExchange (43) + : : : : +- CometSort (46) + : : : : +- ReusedExchange (45) + : : : +- ReusedExchange (49) + : : +- CometBroadcastExchange (54) + : : +- CometFilter (53) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (52) + : +- ReusedExchange (57) + +- ReusedExchange (61) + + +(43) ReusedExchange [Reuses operator id: 4] +Output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] + +(44) CometSort +Input [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44], [ss_ticket_number#43 ASC NULLS FIRST, ss_item_sk#40 ASC NULLS FIRST] + +(45) ReusedExchange [Reuses operator id: 9] +Output [2]: [sr_item_sk#45, sr_ticket_number#46] + +(46) CometSort +Input [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [sr_item_sk#45, sr_ticket_number#46], [sr_ticket_number#46 ASC NULLS FIRST, sr_item_sk#45 ASC NULLS FIRST] + +(47) CometSortMergeJoin +Left output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Right output [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_ticket_number#43, ss_item_sk#40], [sr_ticket_number#46, sr_item_sk#45], Inner + +(48) CometProject +Input [7]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44, sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44], [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] + +(49) ReusedExchange [Reuses operator id: 16] +Output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] + +(50) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] +Right output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_store_sk#42], [s_store_sk#47], Inner, BuildRight + +(51) CometProject +Input [8]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44, s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50], [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] + +(52) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(53) CometFilter +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Condition : isnotnull(i_item_sk#51) + +(54) CometBroadcastExchange +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(55) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] +Right output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_item_sk#40], [i_item_sk#51], Inner, BuildRight + +(56) CometProject +Input [12]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56], [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(57) ReusedExchange [Reuses operator id: 26] +Output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] + +(58) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Right output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_customer_sk#41], [c_customer_sk#57], Inner, BuildRight + +(59) CometProject +Input [14]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60], [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(60) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(61) ReusedExchange [Reuses operator id: 33] +Output [3]: [ca_state#61, ca_zip#62, ca_country#63] + +(62) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#60, s_zip#50] +Right keys [2]: [upper(ca_country#63), ca_zip#62] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 2] +Output [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Input [15]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60, ca_state#61, ca_zip#62, ca_country#63] + +(64) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum#64] +Results [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] + +(65) Exchange +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Arguments: hashpartitioning(c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(66) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#44))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#44))#30,17,2) AS netpaid#66] + +(67) HashAggregate [codegen id : 3] +Input [1]: [netpaid#66] +Keys: [] +Functions [1]: [partial_avg(netpaid#66)] +Aggregate Attributes [2]: [sum#67, count#68] +Results [2]: [sum#69, count#70] + +(68) Exchange +Input [2]: [sum#69, count#70] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(69) HashAggregate [codegen id : 4] +Input [2]: [sum#69, count#70] +Keys: [] +Functions [1]: [avg(netpaid#66)] +Aggregate Attributes [1]: [avg(netpaid#66)#71] +Results [1]: [(0.05 * avg(netpaid#66)#71) AS (0.05 * avg(netpaid))#72] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c1bf41667f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_datafusion/simplified.txt @@ -0,0 +1,86 @@ +WholeStageCodegen (4) + Filter [paid] + Subquery #1 + WholeStageCodegen (4) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen (3) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #10 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #3 + CometSort [sr_item_sk,sr_ticket_number] + ReusedExchange [sr_item_sk,sr_ticket_number] #4 + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #11 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + InputAdapter + ReusedExchange [ca_state,ca_zip,ca_country] #8 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometExchange [ss_ticket_number,ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #4 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometProject [s_store_sk,s_store_name,s_state,s_zip] + CometFilter [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #6 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + CometFilter [c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometFilter [ca_state,ca_zip,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..9dbf4af839 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_iceberg_compat/explain.txt @@ -0,0 +1,403 @@ +== Physical Plan == +* Filter (42) ++- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * ColumnarToRow (29) + : +- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (12) + : : : : +- CometSortMergeJoin (11) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : +- CometSort (10) + : : : : +- CometExchange (9) + : : : : +- CometProject (8) + : : : : +- CometFilter (7) + : : : : +- CometScan parquet spark_catalog.default.store_returns (6) + : : : +- CometBroadcastExchange (16) + : : : +- CometProject (15) + : : : +- CometFilter (14) + : : : +- CometScan parquet spark_catalog.default.store (13) + : : +- CometBroadcastExchange (21) + : : +- CometFilter (20) + : : +- CometScan parquet spark_catalog.default.item (19) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometScan parquet spark_catalog.default.customer (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometScan parquet spark_catalog.default.customer_address (30) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(3) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(4) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(6) CometScan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(7) CometFilter +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(8) CometProject +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_item_sk#7, sr_ticket_number#8] + +(9) CometExchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST] + +(11) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Right output [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_ticket_number#4, ss_item_sk#1], [sr_ticket_number#8, sr_item_sk#7], Inner + +(12) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] + +(13) CometScan parquet spark_catalog.default.store +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(14) CometFilter +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(15) CometProject +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14], [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(16) CometBroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Right output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(18) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14], [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] + +(19) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) CometFilter +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) + +(21) CometBroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(22) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Right output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(23) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20], [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(25) CometFilter +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) + +(26) CometBroadcastExchange +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(27) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Right output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_customer_sk#2], [c_customer_sk#21], Inner, BuildRight + +(28) CometProject +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24], [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(29) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(30) CometScan parquet spark_catalog.default.customer_address +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(31) CometFilter +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(32) ColumnarToRow [codegen id : 1] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(33) BroadcastExchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 2] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(36) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] + +(37) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(39) HashAggregate [codegen id : 3] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] + +(40) Exchange +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(41) HashAggregate [codegen id : 4] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] + +(42) Filter [codegen id : 4] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (69) ++- Exchange (68) + +- * HashAggregate (67) + +- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * Project (63) + +- * BroadcastHashJoin Inner BuildRight (62) + :- * ColumnarToRow (60) + : +- CometProject (59) + : +- CometBroadcastHashJoin (58) + : :- CometProject (56) + : : +- CometBroadcastHashJoin (55) + : : :- CometProject (51) + : : : +- CometBroadcastHashJoin (50) + : : : :- CometProject (48) + : : : : +- CometSortMergeJoin (47) + : : : : :- CometSort (44) + : : : : : +- ReusedExchange (43) + : : : : +- CometSort (46) + : : : : +- ReusedExchange (45) + : : : +- ReusedExchange (49) + : : +- CometBroadcastExchange (54) + : : +- CometFilter (53) + : : +- CometScan parquet spark_catalog.default.item (52) + : +- ReusedExchange (57) + +- ReusedExchange (61) + + +(43) ReusedExchange [Reuses operator id: 4] +Output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] + +(44) CometSort +Input [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44], [ss_ticket_number#43 ASC NULLS FIRST, ss_item_sk#40 ASC NULLS FIRST] + +(45) ReusedExchange [Reuses operator id: 9] +Output [2]: [sr_item_sk#45, sr_ticket_number#46] + +(46) CometSort +Input [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [sr_item_sk#45, sr_ticket_number#46], [sr_ticket_number#46 ASC NULLS FIRST, sr_item_sk#45 ASC NULLS FIRST] + +(47) CometSortMergeJoin +Left output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Right output [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_ticket_number#43, ss_item_sk#40], [sr_ticket_number#46, sr_item_sk#45], Inner + +(48) CometProject +Input [7]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44, sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44], [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] + +(49) ReusedExchange [Reuses operator id: 16] +Output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] + +(50) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] +Right output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_store_sk#42], [s_store_sk#47], Inner, BuildRight + +(51) CometProject +Input [8]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44, s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50], [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] + +(52) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(53) CometFilter +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Condition : isnotnull(i_item_sk#51) + +(54) CometBroadcastExchange +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(55) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] +Right output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_item_sk#40], [i_item_sk#51], Inner, BuildRight + +(56) CometProject +Input [12]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56], [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(57) ReusedExchange [Reuses operator id: 26] +Output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] + +(58) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Right output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_customer_sk#41], [c_customer_sk#57], Inner, BuildRight + +(59) CometProject +Input [14]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60], [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(60) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(61) ReusedExchange [Reuses operator id: 33] +Output [3]: [ca_state#61, ca_zip#62, ca_country#63] + +(62) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#60, s_zip#50] +Right keys [2]: [upper(ca_country#63), ca_zip#62] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 2] +Output [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Input [15]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60, ca_state#61, ca_zip#62, ca_country#63] + +(64) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum#64] +Results [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] + +(65) Exchange +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Arguments: hashpartitioning(c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(66) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#44))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#44))#30,17,2) AS netpaid#66] + +(67) HashAggregate [codegen id : 3] +Input [1]: [netpaid#66] +Keys: [] +Functions [1]: [partial_avg(netpaid#66)] +Aggregate Attributes [2]: [sum#67, count#68] +Results [2]: [sum#69, count#70] + +(68) Exchange +Input [2]: [sum#69, count#70] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(69) HashAggregate [codegen id : 4] +Input [2]: [sum#69, count#70] +Keys: [] +Functions [1]: [avg(netpaid#66)] +Aggregate Attributes [1]: [avg(netpaid#66)#71] +Results [1]: [(0.05 * avg(netpaid#66)#71) AS (0.05 * avg(netpaid))#72] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bd14d82504 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24a.native_iceberg_compat/simplified.txt @@ -0,0 +1,86 @@ +WholeStageCodegen (4) + Filter [paid] + Subquery #1 + WholeStageCodegen (4) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen (3) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #10 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #3 + CometSort [sr_item_sk,sr_ticket_number] + ReusedExchange [sr_item_sk,sr_ticket_number] #4 + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #11 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + InputAdapter + ReusedExchange [ca_state,ca_zip,ca_country] #8 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometExchange [ss_ticket_number,ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #4 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometProject [s_store_sk,s_store_name,s_state,s_zip] + CometFilter [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #6 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + CometFilter [c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometFilter [ca_state,ca_zip,ca_country] + CometScan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_datafusion/explain.txt new file mode 100644 index 0000000000..829f96c1fb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_datafusion/explain.txt @@ -0,0 +1,382 @@ +== Physical Plan == +* Filter (42) ++- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * ColumnarToRow (29) + : +- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (12) + : : : : +- CometSortMergeJoin (11) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometSort (10) + : : : : +- CometExchange (9) + : : : : +- CometProject (8) + : : : : +- CometFilter (7) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (6) + : : : +- CometBroadcastExchange (16) + : : : +- CometProject (15) + : : : +- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (13) + : : +- CometBroadcastExchange (21) + : : +- CometFilter (20) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (19) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(3) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(4) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(7) CometFilter +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(8) CometProject +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_item_sk#7, sr_ticket_number#8] + +(9) CometExchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST] + +(11) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Right output [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_ticket_number#4, ss_item_sk#1], [sr_ticket_number#8, sr_item_sk#7], Inner + +(12) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] + +(13) CometNativeScan: `spark_catalog`.`default`.`store` +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(14) CometFilter +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(15) CometProject +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14], [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(16) CometBroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Right output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(18) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14], [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] + +(19) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(20) CometFilter +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = chiffon )) AND isnotnull(i_item_sk#15)) + +(21) CometBroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(22) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Right output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(23) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20], [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(25) CometFilter +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) + +(26) CometBroadcastExchange +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(27) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Right output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_customer_sk#2], [c_customer_sk#21], Inner, BuildRight + +(28) CometProject +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24], [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(29) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: [ca_state#25, ca_zip#26, ca_country#27] + +(31) CometFilter +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(32) ColumnarToRow [codegen id : 1] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(33) BroadcastExchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 2] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(36) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] + +(37) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(39) HashAggregate [codegen id : 3] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] + +(40) Exchange +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(41) HashAggregate [codegen id : 4] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] + +(42) Filter [codegen id : 4] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (69) ++- Exchange (68) + +- * HashAggregate (67) + +- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * Project (63) + +- * BroadcastHashJoin Inner BuildRight (62) + :- * ColumnarToRow (60) + : +- CometProject (59) + : +- CometBroadcastHashJoin (58) + : :- CometProject (56) + : : +- CometBroadcastHashJoin (55) + : : :- CometProject (51) + : : : +- CometBroadcastHashJoin (50) + : : : :- CometProject (48) + : : : : +- CometSortMergeJoin (47) + : : : : :- CometSort (44) + : : : : : +- ReusedExchange (43) + : : : : +- CometSort (46) + : : : : +- ReusedExchange (45) + : : : +- ReusedExchange (49) + : : +- CometBroadcastExchange (54) + : : +- CometFilter (53) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (52) + : +- ReusedExchange (57) + +- ReusedExchange (61) + + +(43) ReusedExchange [Reuses operator id: 4] +Output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] + +(44) CometSort +Input [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44], [ss_ticket_number#43 ASC NULLS FIRST, ss_item_sk#40 ASC NULLS FIRST] + +(45) ReusedExchange [Reuses operator id: 9] +Output [2]: [sr_item_sk#45, sr_ticket_number#46] + +(46) CometSort +Input [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [sr_item_sk#45, sr_ticket_number#46], [sr_ticket_number#46 ASC NULLS FIRST, sr_item_sk#45 ASC NULLS FIRST] + +(47) CometSortMergeJoin +Left output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Right output [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_ticket_number#43, ss_item_sk#40], [sr_ticket_number#46, sr_item_sk#45], Inner + +(48) CometProject +Input [7]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44, sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44], [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] + +(49) ReusedExchange [Reuses operator id: 16] +Output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] + +(50) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] +Right output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_store_sk#42], [s_store_sk#47], Inner, BuildRight + +(51) CometProject +Input [8]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44, s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50], [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] + +(52) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(53) CometFilter +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Condition : isnotnull(i_item_sk#51) + +(54) CometBroadcastExchange +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(55) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] +Right output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_item_sk#40], [i_item_sk#51], Inner, BuildRight + +(56) CometProject +Input [12]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56], [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(57) ReusedExchange [Reuses operator id: 26] +Output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] + +(58) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Right output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_customer_sk#41], [c_customer_sk#57], Inner, BuildRight + +(59) CometProject +Input [14]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60], [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(60) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(61) ReusedExchange [Reuses operator id: 33] +Output [3]: [ca_state#61, ca_zip#62, ca_country#63] + +(62) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#60, s_zip#50] +Right keys [2]: [upper(ca_country#63), ca_zip#62] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 2] +Output [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Input [15]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60, ca_state#61, ca_zip#62, ca_country#63] + +(64) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum#64] +Results [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] + +(65) Exchange +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Arguments: hashpartitioning(c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(66) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#44))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#44))#30,17,2) AS netpaid#66] + +(67) HashAggregate [codegen id : 3] +Input [1]: [netpaid#66] +Keys: [] +Functions [1]: [partial_avg(netpaid#66)] +Aggregate Attributes [2]: [sum#67, count#68] +Results [2]: [sum#69, count#70] + +(68) Exchange +Input [2]: [sum#69, count#70] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(69) HashAggregate [codegen id : 4] +Input [2]: [sum#69, count#70] +Keys: [] +Functions [1]: [avg(netpaid#66)] +Aggregate Attributes [1]: [avg(netpaid#66)#71] +Results [1]: [(0.05 * avg(netpaid#66)#71) AS (0.05 * avg(netpaid))#72] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c1bf41667f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_datafusion/simplified.txt @@ -0,0 +1,86 @@ +WholeStageCodegen (4) + Filter [paid] + Subquery #1 + WholeStageCodegen (4) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen (3) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #10 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #3 + CometSort [sr_item_sk,sr_ticket_number] + ReusedExchange [sr_item_sk,sr_ticket_number] #4 + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #11 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + InputAdapter + ReusedExchange [ca_state,ca_zip,ca_country] #8 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometExchange [ss_ticket_number,ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #4 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometProject [s_store_sk,s_store_name,s_state,s_zip] + CometFilter [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #6 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + CometFilter [c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometFilter [ca_state,ca_zip,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f27ae40199 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_iceberg_compat/explain.txt @@ -0,0 +1,403 @@ +== Physical Plan == +* Filter (42) ++- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * ColumnarToRow (29) + : +- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (12) + : : : : +- CometSortMergeJoin (11) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : +- CometSort (10) + : : : : +- CometExchange (9) + : : : : +- CometProject (8) + : : : : +- CometFilter (7) + : : : : +- CometScan parquet spark_catalog.default.store_returns (6) + : : : +- CometBroadcastExchange (16) + : : : +- CometProject (15) + : : : +- CometFilter (14) + : : : +- CometScan parquet spark_catalog.default.store (13) + : : +- CometBroadcastExchange (21) + : : +- CometFilter (20) + : : +- CometScan parquet spark_catalog.default.item (19) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometScan parquet spark_catalog.default.customer (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometScan parquet spark_catalog.default.customer_address (30) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(3) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(4) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(6) CometScan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(7) CometFilter +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(8) CometProject +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_item_sk#7, sr_ticket_number#8] + +(9) CometExchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST] + +(11) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Right output [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_ticket_number#4, ss_item_sk#1], [sr_ticket_number#8, sr_item_sk#7], Inner + +(12) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] + +(13) CometScan parquet spark_catalog.default.store +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(14) CometFilter +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(15) CometProject +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14], [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(16) CometBroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Right output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(18) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14], [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] + +(19) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) CometFilter +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = chiffon )) AND isnotnull(i_item_sk#15)) + +(21) CometBroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(22) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Right output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(23) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20], [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(25) CometFilter +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) + +(26) CometBroadcastExchange +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(27) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Right output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_customer_sk#2], [c_customer_sk#21], Inner, BuildRight + +(28) CometProject +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24], [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(29) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(30) CometScan parquet spark_catalog.default.customer_address +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(31) CometFilter +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(32) ColumnarToRow [codegen id : 1] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(33) BroadcastExchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 2] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(36) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] + +(37) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(39) HashAggregate [codegen id : 3] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] + +(40) Exchange +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(41) HashAggregate [codegen id : 4] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] + +(42) Filter [codegen id : 4] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (69) ++- Exchange (68) + +- * HashAggregate (67) + +- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * Project (63) + +- * BroadcastHashJoin Inner BuildRight (62) + :- * ColumnarToRow (60) + : +- CometProject (59) + : +- CometBroadcastHashJoin (58) + : :- CometProject (56) + : : +- CometBroadcastHashJoin (55) + : : :- CometProject (51) + : : : +- CometBroadcastHashJoin (50) + : : : :- CometProject (48) + : : : : +- CometSortMergeJoin (47) + : : : : :- CometSort (44) + : : : : : +- ReusedExchange (43) + : : : : +- CometSort (46) + : : : : +- ReusedExchange (45) + : : : +- ReusedExchange (49) + : : +- CometBroadcastExchange (54) + : : +- CometFilter (53) + : : +- CometScan parquet spark_catalog.default.item (52) + : +- ReusedExchange (57) + +- ReusedExchange (61) + + +(43) ReusedExchange [Reuses operator id: 4] +Output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] + +(44) CometSort +Input [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44], [ss_ticket_number#43 ASC NULLS FIRST, ss_item_sk#40 ASC NULLS FIRST] + +(45) ReusedExchange [Reuses operator id: 9] +Output [2]: [sr_item_sk#45, sr_ticket_number#46] + +(46) CometSort +Input [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [sr_item_sk#45, sr_ticket_number#46], [sr_ticket_number#46 ASC NULLS FIRST, sr_item_sk#45 ASC NULLS FIRST] + +(47) CometSortMergeJoin +Left output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Right output [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_ticket_number#43, ss_item_sk#40], [sr_ticket_number#46, sr_item_sk#45], Inner + +(48) CometProject +Input [7]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44, sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44], [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] + +(49) ReusedExchange [Reuses operator id: 16] +Output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] + +(50) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] +Right output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_store_sk#42], [s_store_sk#47], Inner, BuildRight + +(51) CometProject +Input [8]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44, s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50], [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] + +(52) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(53) CometFilter +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Condition : isnotnull(i_item_sk#51) + +(54) CometBroadcastExchange +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(55) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] +Right output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_item_sk#40], [i_item_sk#51], Inner, BuildRight + +(56) CometProject +Input [12]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56], [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(57) ReusedExchange [Reuses operator id: 26] +Output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] + +(58) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Right output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_customer_sk#41], [c_customer_sk#57], Inner, BuildRight + +(59) CometProject +Input [14]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60], [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(60) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(61) ReusedExchange [Reuses operator id: 33] +Output [3]: [ca_state#61, ca_zip#62, ca_country#63] + +(62) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#60, s_zip#50] +Right keys [2]: [upper(ca_country#63), ca_zip#62] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 2] +Output [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Input [15]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60, ca_state#61, ca_zip#62, ca_country#63] + +(64) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum#64] +Results [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] + +(65) Exchange +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Arguments: hashpartitioning(c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(66) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#44))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#44))#30,17,2) AS netpaid#66] + +(67) HashAggregate [codegen id : 3] +Input [1]: [netpaid#66] +Keys: [] +Functions [1]: [partial_avg(netpaid#66)] +Aggregate Attributes [2]: [sum#67, count#68] +Results [2]: [sum#69, count#70] + +(68) Exchange +Input [2]: [sum#69, count#70] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(69) HashAggregate [codegen id : 4] +Input [2]: [sum#69, count#70] +Keys: [] +Functions [1]: [avg(netpaid#66)] +Aggregate Attributes [1]: [avg(netpaid#66)#71] +Results [1]: [(0.05 * avg(netpaid#66)#71) AS (0.05 * avg(netpaid))#72] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bd14d82504 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q24b.native_iceberg_compat/simplified.txt @@ -0,0 +1,86 @@ +WholeStageCodegen (4) + Filter [paid] + Subquery #1 + WholeStageCodegen (4) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen (3) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #10 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #3 + CometSort [sr_item_sk,sr_ticket_number] + ReusedExchange [sr_item_sk,sr_ticket_number] #4 + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #11 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + InputAdapter + ReusedExchange [ca_state,ca_zip,ca_country] #8 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometExchange [ss_ticket_number,ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #4 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometProject [s_store_sk,s_store_name,s_state,s_zip] + CometFilter [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #6 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + CometFilter [c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometFilter [ca_state,ca_zip,ca_country] + CometScan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_datafusion/explain.txt new file mode 100644 index 0000000000..ec5fe33185 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_datafusion/explain.txt @@ -0,0 +1,209 @@ +== Physical Plan == +* ColumnarToRow (40) ++- CometTakeOrderedAndProject (39) + +- CometHashAggregate (38) + +- CometExchange (37) + +- CometHashAggregate (36) + +- CometProject (35) + +- CometBroadcastHashJoin (34) + :- CometProject (32) + : +- CometBroadcastHashJoin (31) + : :- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (8) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometProject (15) + : : : : +- CometFilter (14) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : : : +- CometBroadcastExchange (22) + : : : +- CometProject (21) + : : : +- CometFilter (20) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (19) + : : +- ReusedExchange (25) + : +- CometBroadcastExchange (30) + : +- CometFilter (29) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (28) + +- ReusedExchange (33) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] + +(4) CometFilter +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(5) CometBroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] + +(6) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Right output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4], [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11] + +(8) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] + +(9) CometFilter +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(10) CometBroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] + +(11) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11] +Right output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [sr_customer_sk#8, sr_item_sk#7], [cs_bill_customer_sk#12, cs_item_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16, d_year#17, d_moy#18] + +(14) CometFilter +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 4)) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) + +(15) CometProject +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(17) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [ss_sold_date_sk#6], [d_date_sk#16], Inner, BuildRight + +(18) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] + +(19) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Arguments: [d_date_sk#19, d_year#20, d_moy#21] + +(20) CometFilter +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : (((((isnotnull(d_moy#21) AND isnotnull(d_year#20)) AND (d_moy#21 >= 4)) AND (d_moy#21 <= 10)) AND (d_year#20 = 2001)) AND isnotnull(d_date_sk#19)) + +(21) CometProject +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Arguments: [d_date_sk#19], [d_date_sk#19] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: [d_date_sk#19] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#19] +Arguments: [sr_returned_date_sk#11], [d_date_sk#19], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#19] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15] + +(25) ReusedExchange [Reuses operator id: 22] +Output [1]: [d_date_sk#22] + +(26) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#22] +Arguments: [cs_sold_date_sk#15], [d_date_sk#22], Inner, BuildRight + +(27) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#22] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14] + +(28) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [s_store_sk#23, s_store_id#24, s_store_name#25] + +(29) CometFilter +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Condition : isnotnull(s_store_sk#23) + +(30) CometBroadcastExchange +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [s_store_sk#23, s_store_id#24, s_store_name#25] + +(31) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14] +Right output [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [ss_store_sk#3], [s_store_sk#23], Inner, BuildRight + +(32) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25], [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25] + +(33) ReusedExchange [Reuses operator id: 30] +Output [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] + +(34) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25] +Right output [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Arguments: [ss_item_sk#1], [i_item_sk#26], Inner, BuildRight + +(35) CometProject +Input [9]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_sk#26, i_item_id#27, i_item_desc#28] +Arguments: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28], [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] + +(36) CometHashAggregate +Input [7]: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] +Keys [4]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#10)), partial_sum(UnscaledValue(cs_net_profit#14))] + +(37) CometExchange +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#29, sum#30, sum#31] +Arguments: hashpartitioning(i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(38) CometHashAggregate +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#29, sum#30, sum#31] +Keys [4]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25] +Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#10)), sum(UnscaledValue(cs_net_profit#14))] + +(39) CometTakeOrderedAndProject +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#32, store_returns_loss#33, catalog_sales_profit#34] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#27 ASC NULLS FIRST,i_item_desc#28 ASC NULLS FIRST,s_store_id#24 ASC NULLS FIRST,s_store_name#25 ASC NULLS FIRST], output=[i_item_id#27,i_item_desc#28,s_store_id#24,s_store_name#25,store_sales_profit#32,store_returns_loss#33,catalog_sales_profit#34]), [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#32, store_returns_loss#33, catalog_sales_profit#34], 100, [i_item_id#27 ASC NULLS FIRST, i_item_desc#28 ASC NULLS FIRST, s_store_id#24 ASC NULLS FIRST, s_store_name#25 ASC NULLS FIRST], [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#32, store_returns_loss#33, catalog_sales_profit#34] + +(40) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#32, store_returns_loss#33, catalog_sales_profit#34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_datafusion/simplified.txt new file mode 100644 index 0000000000..0074eafcf3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_datafusion/simplified.txt @@ -0,0 +1,42 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_profit,store_returns_loss,catalog_sales_profit] + CometHashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_profit,store_returns_loss,catalog_sales_profit,sum,sum,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(cs_net_profit))] + CometExchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + CometHashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum,ss_net_profit,sr_net_loss,cs_net_profit] + CometProject [ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc] + CometBroadcastHashJoin [ss_item_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_sk,i_item_id,i_item_desc] + CometProject [ss_item_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_sk,s_store_id,s_store_name] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_net_loss,sr_returned_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_net_loss,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] #3 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + ReusedExchange [d_date_sk] #5 + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #6 + CometFilter [s_store_sk,s_store_id,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id,s_store_name] + ReusedExchange [i_item_sk,i_item_id,i_item_desc] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f27dc81e1d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_iceberg_compat/explain.txt @@ -0,0 +1,244 @@ +== Physical Plan == +* ColumnarToRow (42) ++- CometTakeOrderedAndProject (41) + +- CometHashAggregate (40) + +- CometExchange (39) + +- CometHashAggregate (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (32) + : +- CometBroadcastHashJoin (31) + : :- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometScan parquet spark_catalog.default.store_returns (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (8) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometProject (15) + : : : : +- CometFilter (14) + : : : : +- CometScan parquet spark_catalog.default.date_dim (13) + : : : +- CometBroadcastExchange (22) + : : : +- CometProject (21) + : : : +- CometFilter (20) + : : : +- CometScan parquet spark_catalog.default.date_dim (19) + : : +- ReusedExchange (25) + : +- CometBroadcastExchange (30) + : +- CometFilter (29) + : +- CometScan parquet spark_catalog.default.store (28) + +- CometBroadcastExchange (35) + +- CometFilter (34) + +- CometScan parquet spark_catalog.default.item (33) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(3) CometScan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#11)] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(4) CometFilter +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(5) CometBroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] + +(6) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Right output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4], [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11] + +(8) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(9) CometFilter +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(10) CometBroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] + +(11) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11] +Right output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [sr_customer_sk#8, sr_item_sk#7], [cs_bill_customer_sk#12, cs_item_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 4)) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) + +(15) CometProject +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(17) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [ss_sold_date_sk#6], [d_date_sk#16], Inner, BuildRight + +(18) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] + +(19) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(20) CometFilter +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : (((((isnotnull(d_moy#21) AND isnotnull(d_year#20)) AND (d_moy#21 >= 4)) AND (d_moy#21 <= 10)) AND (d_year#20 = 2001)) AND isnotnull(d_date_sk#19)) + +(21) CometProject +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Arguments: [d_date_sk#19], [d_date_sk#19] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: [d_date_sk#19] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#19] +Arguments: [sr_returned_date_sk#11], [d_date_sk#19], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#19] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15] + +(25) ReusedExchange [Reuses operator id: 22] +Output [1]: [d_date_sk#22] + +(26) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#22] +Arguments: [cs_sold_date_sk#15], [d_date_sk#22], Inner, BuildRight + +(27) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#22] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14] + +(28) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(29) CometFilter +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Condition : isnotnull(s_store_sk#23) + +(30) CometBroadcastExchange +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [s_store_sk#23, s_store_id#24, s_store_name#25] + +(31) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14] +Right output [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [ss_store_sk#3], [s_store_sk#23], Inner, BuildRight + +(32) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25], [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25] + +(33) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(34) CometFilter +Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Condition : isnotnull(i_item_sk#26) + +(35) CometBroadcastExchange +Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Arguments: [i_item_sk#26, i_item_id#27, i_item_desc#28] + +(36) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25] +Right output [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Arguments: [ss_item_sk#1], [i_item_sk#26], Inner, BuildRight + +(37) CometProject +Input [9]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_sk#26, i_item_id#27, i_item_desc#28] +Arguments: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28], [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] + +(38) CometHashAggregate +Input [7]: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] +Keys [4]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#10)), partial_sum(UnscaledValue(cs_net_profit#14))] + +(39) CometExchange +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#29, sum#30, sum#31] +Arguments: hashpartitioning(i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(40) CometHashAggregate +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#29, sum#30, sum#31] +Keys [4]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25] +Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#10)), sum(UnscaledValue(cs_net_profit#14))] + +(41) CometTakeOrderedAndProject +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#32, store_returns_loss#33, catalog_sales_profit#34] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#27 ASC NULLS FIRST,i_item_desc#28 ASC NULLS FIRST,s_store_id#24 ASC NULLS FIRST,s_store_name#25 ASC NULLS FIRST], output=[i_item_id#27,i_item_desc#28,s_store_id#24,s_store_name#25,store_sales_profit#32,store_returns_loss#33,catalog_sales_profit#34]), [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#32, store_returns_loss#33, catalog_sales_profit#34], 100, [i_item_id#27 ASC NULLS FIRST, i_item_desc#28 ASC NULLS FIRST, s_store_id#24 ASC NULLS FIRST, s_store_name#25 ASC NULLS FIRST], [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#32, store_returns_loss#33, catalog_sales_profit#34] + +(42) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#32, store_returns_loss#33, catalog_sales_profit#34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..5664bea15a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q25.native_iceberg_compat/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_profit,store_returns_loss,catalog_sales_profit] + CometHashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_profit,store_returns_loss,catalog_sales_profit,sum,sum,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(cs_net_profit))] + CometExchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + CometHashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum,ss_net_profit,sr_net_loss,cs_net_profit] + CometProject [ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc] + CometBroadcastHashJoin [ss_item_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_sk,i_item_id,i_item_desc] + CometProject [ss_item_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_sk,s_store_id,s_store_name] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_net_loss,sr_returned_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_net_loss,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] #3 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + ReusedExchange [d_date_sk] #5 + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #6 + CometFilter [s_store_sk,s_store_id,s_store_name] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc] #7 + CometFilter [i_item_sk,i_item_id,i_item_desc] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_datafusion/explain.txt new file mode 100644 index 0000000000..a203045086 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_datafusion/explain.txt @@ -0,0 +1,158 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometTakeOrderedAndProject (29) + +- CometHashAggregate (28) + +- CometExchange (27) + +- CometHashAggregate (26) + +- CometProject (25) + +- CometBroadcastHashJoin (24) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (15) + +- CometBroadcastExchange (23) + +- CometProject (22) + +- CometFilter (21) + +- CometNativeScan: `spark_catalog`.`default`.`promotion` (20) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Arguments: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] + +(2) CometFilter +Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Condition : ((isnotnull(cs_bill_cdemo_sk#1) AND isnotnull(cs_item_sk#2)) AND isnotnull(cs_promo_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [cs_bill_cdemo_sk#1], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, cd_demo_sk#9] +Arguments: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8], [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [cs_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#13] +Arguments: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7], [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7] + +(15) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#15, i_item_id#16] +Arguments: [i_item_sk#15, i_item_id#16] + +(16) CometFilter +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) + +(17) CometBroadcastExchange +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: [i_item_sk#15, i_item_id#16] + +(18) CometBroadcastHashJoin +Left output [6]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7] +Right output [2]: [i_item_sk#15, i_item_id#16] +Arguments: [cs_item_sk#2], [i_item_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_sk#15, i_item_id#16] +Arguments: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16], [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] + +(20) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Arguments: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(21) CometFilter +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) + +(22) CometProject +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Arguments: [p_promo_sk#17], [p_promo_sk#17] + +(23) CometBroadcastExchange +Input [1]: [p_promo_sk#17] +Arguments: [p_promo_sk#17] + +(24) CometBroadcastHashJoin +Left output [6]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Right output [1]: [p_promo_sk#17] +Arguments: [cs_promo_sk#3], [p_promo_sk#17], Inner, BuildRight + +(25) CometProject +Input [7]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16, p_promo_sk#17] +Arguments: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] + +(26) CometHashAggregate +Input [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] +Functions [4]: [partial_avg(cs_quantity#4), partial_avg(UnscaledValue(cs_list_price#5)), partial_avg(UnscaledValue(cs_coupon_amt#7)), partial_avg(UnscaledValue(cs_sales_price#6))] + +(27) CometExchange +Input [9]: [i_item_id#16, sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(28) CometHashAggregate +Input [9]: [i_item_id#16, sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Keys [1]: [i_item_id#16] +Functions [4]: [avg(cs_quantity#4), avg(UnscaledValue(cs_list_price#5)), avg(UnscaledValue(cs_coupon_amt#7)), avg(UnscaledValue(cs_sales_price#6))] + +(29) CometTakeOrderedAndProject +Input [5]: [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#16 ASC NULLS FIRST], output=[i_item_id#16,agg1#28,agg2#29,agg3#30,agg4#31]), [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31], 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] + +(30) ColumnarToRow [codegen id : 1] +Input [5]: [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_datafusion/simplified.txt new file mode 100644 index 0000000000..09d0fd2812 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_datafusion/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + CometHashAggregate [i_item_id,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count,avg(cs_quantity),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_sales_price))] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price] + CometProject [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + CometBroadcastHashJoin [cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id,p_promo_sk] + CometProject [cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk,d_date_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + CometBroadcastHashJoin [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk,cd_demo_sk] + CometFilter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #4 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [p_promo_sk] #5 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_email,p_channel_event] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk,p_channel_email,p_channel_event] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..16da3274bc --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_iceberg_compat/explain.txt @@ -0,0 +1,174 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometTakeOrderedAndProject (29) + +- CometHashAggregate (28) + +- CometExchange (27) + +- CometHashAggregate (26) + +- CometProject (25) + +- CometBroadcastHashJoin (24) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.customer_demographics (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.date_dim (9) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometScan parquet spark_catalog.default.item (15) + +- CometBroadcastExchange (23) + +- CometProject (22) + +- CometFilter (21) + +- CometScan parquet spark_catalog.default.promotion (20) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Condition : ((isnotnull(cs_bill_cdemo_sk#1) AND isnotnull(cs_item_sk#2)) AND isnotnull(cs_promo_sk#3)) + +(3) CometScan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [cs_bill_cdemo_sk#1], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, cd_demo_sk#9] +Arguments: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8], [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] + +(9) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [cs_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#13] +Arguments: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7], [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7] + +(15) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(16) CometFilter +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) + +(17) CometBroadcastExchange +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: [i_item_sk#15, i_item_id#16] + +(18) CometBroadcastHashJoin +Left output [6]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7] +Right output [2]: [i_item_sk#15, i_item_id#16] +Arguments: [cs_item_sk#2], [i_item_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_sk#15, i_item_id#16] +Arguments: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16], [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] + +(20) CometScan parquet spark_catalog.default.promotion +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(21) CometFilter +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) + +(22) CometProject +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Arguments: [p_promo_sk#17], [p_promo_sk#17] + +(23) CometBroadcastExchange +Input [1]: [p_promo_sk#17] +Arguments: [p_promo_sk#17] + +(24) CometBroadcastHashJoin +Left output [6]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Right output [1]: [p_promo_sk#17] +Arguments: [cs_promo_sk#3], [p_promo_sk#17], Inner, BuildRight + +(25) CometProject +Input [7]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16, p_promo_sk#17] +Arguments: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] + +(26) CometHashAggregate +Input [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] +Functions [4]: [partial_avg(cs_quantity#4), partial_avg(UnscaledValue(cs_list_price#5)), partial_avg(UnscaledValue(cs_coupon_amt#7)), partial_avg(UnscaledValue(cs_sales_price#6))] + +(27) CometExchange +Input [9]: [i_item_id#16, sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(28) CometHashAggregate +Input [9]: [i_item_id#16, sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Keys [1]: [i_item_id#16] +Functions [4]: [avg(cs_quantity#4), avg(UnscaledValue(cs_list_price#5)), avg(UnscaledValue(cs_coupon_amt#7)), avg(UnscaledValue(cs_sales_price#6))] + +(29) CometTakeOrderedAndProject +Input [5]: [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#16 ASC NULLS FIRST], output=[i_item_id#16,agg1#28,agg2#29,agg3#30,agg4#31]), [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31], 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] + +(30) ColumnarToRow [codegen id : 1] +Input [5]: [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..fd4c659c73 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q26.native_iceberg_compat/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + CometHashAggregate [i_item_id,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count,avg(cs_quantity),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_sales_price))] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price] + CometProject [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + CometBroadcastHashJoin [cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id,p_promo_sk] + CometProject [cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk,d_date_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + CometBroadcastHashJoin [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk,cd_demo_sk] + CometFilter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #4 + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] + CometBroadcastExchange [p_promo_sk] #5 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_email,p_channel_event] + CometScan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_datafusion/explain.txt new file mode 100644 index 0000000000..05a0abf815 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_datafusion/explain.txt @@ -0,0 +1,158 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometTakeOrderedAndProject (29) + +- CometHashAggregate (28) + +- CometExchange (27) + +- CometHashAggregate (26) + +- CometExpand (25) + +- CometProject (24) + +- CometBroadcastHashJoin (23) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (15) + +- CometBroadcastExchange (22) + +- CometFilter (21) + +- CometNativeScan: `spark_catalog`.`default`.`item` (20) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [ss_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8], [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7], [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] + +(15) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#15, s_state#16] +Arguments: [s_store_sk#15, s_state#16] + +(16) CometFilter +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(17) CometBroadcastExchange +Input [2]: [s_store_sk#15, s_state#16] +Arguments: [s_store_sk#15, s_state#16] + +(18) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Right output [2]: [s_store_sk#15, s_state#16] +Arguments: [ss_store_sk#3], [s_store_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] +Arguments: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16], [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] + +(20) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#17, i_item_id#18] +Arguments: [i_item_sk#17, i_item_id#18] + +(21) CometFilter +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: [i_item_sk#17, i_item_id#18] + +(23) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Right output [2]: [i_item_sk#17, i_item_id#18] +Arguments: [ss_item_sk#1], [i_item_sk#17], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] +Arguments: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] + +(25) CometExpand +Input [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Arguments: [[ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16, 0], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, null, 1], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, null, null, 3]], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] + +(26) CometHashAggregate +Input [7]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] +Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] + +(27) CometExchange +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28, count#29] +Arguments: hashpartitioning(i_item_id#19, s_state#20, spark_grouping_id#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(28) CometHashAggregate +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28, count#29] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] +Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] + +(29) CometTakeOrderedAndProject +Input [7]: [i_item_id#19, s_state#20, g_state#30, agg1#31, agg2#32, agg3#33, agg4#34] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#19 ASC NULLS FIRST,s_state#20 ASC NULLS FIRST], output=[i_item_id#19,s_state#20,g_state#30,agg1#31,agg2#32,agg3#33,agg4#34]), [i_item_id#19, s_state#20, g_state#30, agg1#31, agg2#32, agg3#33, agg4#34], 100, [i_item_id#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST], [i_item_id#19, s_state#20, g_state#30, agg1#31, agg2#32, agg3#33, agg4#34] + +(30) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#19, s_state#20, g_state#30, agg1#31, agg2#32, agg3#33, agg4#34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_datafusion/simplified.txt new file mode 100644 index 0000000000..d1c71c8dc4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_datafusion/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + CometHashAggregate [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,avg(ss_quantity),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price))] + CometExchange [i_item_id,s_state,spark_grouping_id] #1 + CometHashAggregate [i_item_id,s_state,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] + CometExpand [i_item_id,s_state] [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state,spark_grouping_id] + CometProject [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_store_sk,s_state] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_state] #4 + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f1882e4ee8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_iceberg_compat/explain.txt @@ -0,0 +1,174 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometTakeOrderedAndProject (29) + +- CometHashAggregate (28) + +- CometExchange (27) + +- CometHashAggregate (26) + +- CometExpand (25) + +- CometProject (24) + +- CometBroadcastHashJoin (23) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.customer_demographics (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.date_dim (9) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometScan parquet spark_catalog.default.store (15) + +- CometBroadcastExchange (22) + +- CometFilter (21) + +- CometScan parquet spark_catalog.default.item (20) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [ss_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8], [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(9) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7], [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] + +(15) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(16) CometFilter +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(17) CometBroadcastExchange +Input [2]: [s_store_sk#15, s_state#16] +Arguments: [s_store_sk#15, s_state#16] + +(18) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Right output [2]: [s_store_sk#15, s_state#16] +Arguments: [ss_store_sk#3], [s_store_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] +Arguments: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16], [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] + +(20) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(21) CometFilter +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: [i_item_sk#17, i_item_id#18] + +(23) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Right output [2]: [i_item_sk#17, i_item_id#18] +Arguments: [ss_item_sk#1], [i_item_sk#17], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] +Arguments: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] + +(25) CometExpand +Input [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Arguments: [[ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16, 0], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, null, 1], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, null, null, 3]], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] + +(26) CometHashAggregate +Input [7]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] +Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] + +(27) CometExchange +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28, count#29] +Arguments: hashpartitioning(i_item_id#19, s_state#20, spark_grouping_id#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(28) CometHashAggregate +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28, count#29] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] +Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] + +(29) CometTakeOrderedAndProject +Input [7]: [i_item_id#19, s_state#20, g_state#30, agg1#31, agg2#32, agg3#33, agg4#34] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#19 ASC NULLS FIRST,s_state#20 ASC NULLS FIRST], output=[i_item_id#19,s_state#20,g_state#30,agg1#31,agg2#32,agg3#33,agg4#34]), [i_item_id#19, s_state#20, g_state#30, agg1#31, agg2#32, agg3#33, agg4#34], 100, [i_item_id#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST], [i_item_id#19, s_state#20, g_state#30, agg1#31, agg2#32, agg3#33, agg4#34] + +(30) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#19, s_state#20, g_state#30, agg1#31, agg2#32, agg3#33, agg4#34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..4ed32e8d26 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q27.native_iceberg_compat/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + CometHashAggregate [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,avg(ss_quantity),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price))] + CometExchange [i_item_id,s_state,spark_grouping_id] #1 + CometHashAggregate [i_item_id,s_state,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] + CometExpand [i_item_id,s_state] [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state,spark_grouping_id] + CometProject [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_store_sk,s_state] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_state] #4 + CometFilter [s_store_sk,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_state] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_datafusion/explain.txt new file mode 100644 index 0000000000..e537028de2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_datafusion/explain.txt @@ -0,0 +1,407 @@ +== Physical Plan == +* BroadcastNestedLoopJoin Inner BuildRight (70) +:- * BroadcastNestedLoopJoin Inner BuildRight (58) +: :- * BroadcastNestedLoopJoin Inner BuildRight (46) +: : :- * BroadcastNestedLoopJoin Inner BuildRight (34) +: : : :- * BroadcastNestedLoopJoin Inner BuildRight (22) +: : : : :- * HashAggregate (10) +: : : : : +- Exchange (9) +: : : : : +- * HashAggregate (8) +: : : : : +- * HashAggregate (7) +: : : : : +- * ColumnarToRow (6) +: : : : : +- CometExchange (5) +: : : : : +- CometHashAggregate (4) +: : : : : +- CometProject (3) +: : : : : +- CometFilter (2) +: : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) +: : : : +- BroadcastExchange (21) +: : : : +- * HashAggregate (20) +: : : : +- Exchange (19) +: : : : +- * HashAggregate (18) +: : : : +- * HashAggregate (17) +: : : : +- * ColumnarToRow (16) +: : : : +- CometExchange (15) +: : : : +- CometHashAggregate (14) +: : : : +- CometProject (13) +: : : : +- CometFilter (12) +: : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (11) +: : : +- BroadcastExchange (33) +: : : +- * HashAggregate (32) +: : : +- Exchange (31) +: : : +- * HashAggregate (30) +: : : +- * HashAggregate (29) +: : : +- * ColumnarToRow (28) +: : : +- CometExchange (27) +: : : +- CometHashAggregate (26) +: : : +- CometProject (25) +: : : +- CometFilter (24) +: : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (23) +: : +- BroadcastExchange (45) +: : +- * HashAggregate (44) +: : +- Exchange (43) +: : +- * HashAggregate (42) +: : +- * HashAggregate (41) +: : +- * ColumnarToRow (40) +: : +- CometExchange (39) +: : +- CometHashAggregate (38) +: : +- CometProject (37) +: : +- CometFilter (36) +: : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (35) +: +- BroadcastExchange (57) +: +- * HashAggregate (56) +: +- Exchange (55) +: +- * HashAggregate (54) +: +- * HashAggregate (53) +: +- * ColumnarToRow (52) +: +- CometExchange (51) +: +- CometHashAggregate (50) +: +- CometProject (49) +: +- CometFilter (48) +: +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (47) ++- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * HashAggregate (65) + +- * ColumnarToRow (64) + +- CometExchange (63) + +- CometHashAggregate (62) + +- CometProject (61) + +- CometFilter (60) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (59) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Arguments: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (ss_list_price#3 <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (ss_coupon_amt#4 <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (ss_wholesale_cost#2 <= 77.00)))) + +(3) CometProject +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Arguments: [ss_list_price#3], [ss_list_price#3] + +(4) CometHashAggregate +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] + +(5) CometExchange +Input [4]: [ss_list_price#3, sum#6, count#7, count#8] +Arguments: hashpartitioning(ss_list_price#3, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_list_price#3, sum#6, count#7, count#8] + +(7) HashAggregate [codegen id : 1] +Input [4]: [ss_list_price#3, sum#6, count#7, count#8] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#9, count(ss_list_price#3)#10] +Results [4]: [ss_list_price#3, sum#6, count#7, count#8] + +(8) HashAggregate [codegen id : 1] +Input [4]: [ss_list_price#3, sum#6, count#7, count#8] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#9, count(ss_list_price#3)#10, count(ss_list_price#3)#11] +Results [4]: [sum#6, count#7, count#8, count#12] + +(9) Exchange +Input [4]: [sum#6, count#7, count#8, count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(10) HashAggregate [codegen id : 12] +Input [4]: [sum#6, count#7, count#8, count#12] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#9, count(ss_list_price#3)#10, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#9 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#10 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15] + +(11) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Arguments: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] + +(12) CometFilter +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Condition : (((isnotnull(ss_quantity#16) AND (ss_quantity#16 >= 6)) AND (ss_quantity#16 <= 10)) AND ((((ss_list_price#18 >= 90.00) AND (ss_list_price#18 <= 100.00)) OR ((ss_coupon_amt#19 >= 2323.00) AND (ss_coupon_amt#19 <= 3323.00))) OR ((ss_wholesale_cost#17 >= 31.00) AND (ss_wholesale_cost#17 <= 51.00)))) + +(13) CometProject +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Arguments: [ss_list_price#18], [ss_list_price#18] + +(14) CometHashAggregate +Input [1]: [ss_list_price#18] +Keys [1]: [ss_list_price#18] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#18)), partial_count(ss_list_price#18)] + +(15) CometExchange +Input [4]: [ss_list_price#18, sum#21, count#22, count#23] +Arguments: hashpartitioning(ss_list_price#18, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(16) ColumnarToRow [codegen id : 2] +Input [4]: [ss_list_price#18, sum#21, count#22, count#23] + +(17) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#18, sum#21, count#22, count#23] +Keys [1]: [ss_list_price#18] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#24, count(ss_list_price#18)#25] +Results [4]: [ss_list_price#18, sum#21, count#22, count#23] + +(18) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#18, sum#21, count#22, count#23] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18), partial_count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#24, count(ss_list_price#18)#25, count(ss_list_price#18)#26] +Results [4]: [sum#21, count#22, count#23, count#27] + +(19) Exchange +Input [4]: [sum#21, count#22, count#23, count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(20) HashAggregate [codegen id : 3] +Input [4]: [sum#21, count#22, count#23, count#27] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#18)), count(ss_list_price#18), count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#24, count(ss_list_price#18)#25, count(ss_list_price#18)#26] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#18))#24 / 100.0) as decimal(11,6)) AS B2_LP#28, count(ss_list_price#18)#25 AS B2_CNT#29, count(ss_list_price#18)#26 AS B2_CNTD#30] + +(21) BroadcastExchange +Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] +Arguments: IdentityBroadcastMode, [plan_id=5] + +(22) BroadcastNestedLoopJoin [codegen id : 12] +Join type: Inner +Join condition: None + +(23) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Arguments: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] + +(24) CometFilter +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Condition : (((isnotnull(ss_quantity#31) AND (ss_quantity#31 >= 11)) AND (ss_quantity#31 <= 15)) AND ((((ss_list_price#33 >= 142.00) AND (ss_list_price#33 <= 152.00)) OR ((ss_coupon_amt#34 >= 12214.00) AND (ss_coupon_amt#34 <= 13214.00))) OR ((ss_wholesale_cost#32 >= 79.00) AND (ss_wholesale_cost#32 <= 99.00)))) + +(25) CometProject +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Arguments: [ss_list_price#33], [ss_list_price#33] + +(26) CometHashAggregate +Input [1]: [ss_list_price#33] +Keys [1]: [ss_list_price#33] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#33)), partial_count(ss_list_price#33)] + +(27) CometExchange +Input [4]: [ss_list_price#33, sum#36, count#37, count#38] +Arguments: hashpartitioning(ss_list_price#33, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(28) ColumnarToRow [codegen id : 4] +Input [4]: [ss_list_price#33, sum#36, count#37, count#38] + +(29) HashAggregate [codegen id : 4] +Input [4]: [ss_list_price#33, sum#36, count#37, count#38] +Keys [1]: [ss_list_price#33] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#39, count(ss_list_price#33)#40] +Results [4]: [ss_list_price#33, sum#36, count#37, count#38] + +(30) HashAggregate [codegen id : 4] +Input [4]: [ss_list_price#33, sum#36, count#37, count#38] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33), partial_count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#39, count(ss_list_price#33)#40, count(ss_list_price#33)#41] +Results [4]: [sum#36, count#37, count#38, count#42] + +(31) Exchange +Input [4]: [sum#36, count#37, count#38, count#42] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(32) HashAggregate [codegen id : 5] +Input [4]: [sum#36, count#37, count#38, count#42] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#33)), count(ss_list_price#33), count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#39, count(ss_list_price#33)#40, count(ss_list_price#33)#41] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#33))#39 / 100.0) as decimal(11,6)) AS B3_LP#43, count(ss_list_price#33)#40 AS B3_CNT#44, count(ss_list_price#33)#41 AS B3_CNTD#45] + +(33) BroadcastExchange +Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] +Arguments: IdentityBroadcastMode, [plan_id=8] + +(34) BroadcastNestedLoopJoin [codegen id : 12] +Join type: Inner +Join condition: None + +(35) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Arguments: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] + +(36) CometFilter +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Condition : (((isnotnull(ss_quantity#46) AND (ss_quantity#46 >= 16)) AND (ss_quantity#46 <= 20)) AND ((((ss_list_price#48 >= 135.00) AND (ss_list_price#48 <= 145.00)) OR ((ss_coupon_amt#49 >= 6071.00) AND (ss_coupon_amt#49 <= 7071.00))) OR ((ss_wholesale_cost#47 >= 38.00) AND (ss_wholesale_cost#47 <= 58.00)))) + +(37) CometProject +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Arguments: [ss_list_price#48], [ss_list_price#48] + +(38) CometHashAggregate +Input [1]: [ss_list_price#48] +Keys [1]: [ss_list_price#48] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#48)), partial_count(ss_list_price#48)] + +(39) CometExchange +Input [4]: [ss_list_price#48, sum#51, count#52, count#53] +Arguments: hashpartitioning(ss_list_price#48, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(40) ColumnarToRow [codegen id : 6] +Input [4]: [ss_list_price#48, sum#51, count#52, count#53] + +(41) HashAggregate [codegen id : 6] +Input [4]: [ss_list_price#48, sum#51, count#52, count#53] +Keys [1]: [ss_list_price#48] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#54, count(ss_list_price#48)#55] +Results [4]: [ss_list_price#48, sum#51, count#52, count#53] + +(42) HashAggregate [codegen id : 6] +Input [4]: [ss_list_price#48, sum#51, count#52, count#53] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48), partial_count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#54, count(ss_list_price#48)#55, count(ss_list_price#48)#56] +Results [4]: [sum#51, count#52, count#53, count#57] + +(43) Exchange +Input [4]: [sum#51, count#52, count#53, count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(44) HashAggregate [codegen id : 7] +Input [4]: [sum#51, count#52, count#53, count#57] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#48)), count(ss_list_price#48), count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#54, count(ss_list_price#48)#55, count(ss_list_price#48)#56] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#48))#54 / 100.0) as decimal(11,6)) AS B4_LP#58, count(ss_list_price#48)#55 AS B4_CNT#59, count(ss_list_price#48)#56 AS B4_CNTD#60] + +(45) BroadcastExchange +Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] +Arguments: IdentityBroadcastMode, [plan_id=11] + +(46) BroadcastNestedLoopJoin [codegen id : 12] +Join type: Inner +Join condition: None + +(47) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Arguments: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] + +(48) CometFilter +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Condition : (((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 21)) AND (ss_quantity#61 <= 25)) AND ((((ss_list_price#63 >= 122.00) AND (ss_list_price#63 <= 132.00)) OR ((ss_coupon_amt#64 >= 836.00) AND (ss_coupon_amt#64 <= 1836.00))) OR ((ss_wholesale_cost#62 >= 17.00) AND (ss_wholesale_cost#62 <= 37.00)))) + +(49) CometProject +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Arguments: [ss_list_price#63], [ss_list_price#63] + +(50) CometHashAggregate +Input [1]: [ss_list_price#63] +Keys [1]: [ss_list_price#63] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#63)), partial_count(ss_list_price#63)] + +(51) CometExchange +Input [4]: [ss_list_price#63, sum#66, count#67, count#68] +Arguments: hashpartitioning(ss_list_price#63, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=12] + +(52) ColumnarToRow [codegen id : 8] +Input [4]: [ss_list_price#63, sum#66, count#67, count#68] + +(53) HashAggregate [codegen id : 8] +Input [4]: [ss_list_price#63, sum#66, count#67, count#68] +Keys [1]: [ss_list_price#63] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#69, count(ss_list_price#63)#70] +Results [4]: [ss_list_price#63, sum#66, count#67, count#68] + +(54) HashAggregate [codegen id : 8] +Input [4]: [ss_list_price#63, sum#66, count#67, count#68] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63), partial_count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#69, count(ss_list_price#63)#70, count(ss_list_price#63)#71] +Results [4]: [sum#66, count#67, count#68, count#72] + +(55) Exchange +Input [4]: [sum#66, count#67, count#68, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] + +(56) HashAggregate [codegen id : 9] +Input [4]: [sum#66, count#67, count#68, count#72] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#63)), count(ss_list_price#63), count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#69, count(ss_list_price#63)#70, count(ss_list_price#63)#71] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#63))#69 / 100.0) as decimal(11,6)) AS B5_LP#73, count(ss_list_price#63)#70 AS B5_CNT#74, count(ss_list_price#63)#71 AS B5_CNTD#75] + +(57) BroadcastExchange +Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] +Arguments: IdentityBroadcastMode, [plan_id=14] + +(58) BroadcastNestedLoopJoin [codegen id : 12] +Join type: Inner +Join condition: None + +(59) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Arguments: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] + +(60) CometFilter +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Condition : (((isnotnull(ss_quantity#76) AND (ss_quantity#76 >= 26)) AND (ss_quantity#76 <= 30)) AND ((((ss_list_price#78 >= 154.00) AND (ss_list_price#78 <= 164.00)) OR ((ss_coupon_amt#79 >= 7326.00) AND (ss_coupon_amt#79 <= 8326.00))) OR ((ss_wholesale_cost#77 >= 7.00) AND (ss_wholesale_cost#77 <= 27.00)))) + +(61) CometProject +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Arguments: [ss_list_price#78], [ss_list_price#78] + +(62) CometHashAggregate +Input [1]: [ss_list_price#78] +Keys [1]: [ss_list_price#78] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#78)), partial_count(ss_list_price#78)] + +(63) CometExchange +Input [4]: [ss_list_price#78, sum#81, count#82, count#83] +Arguments: hashpartitioning(ss_list_price#78, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=15] + +(64) ColumnarToRow [codegen id : 10] +Input [4]: [ss_list_price#78, sum#81, count#82, count#83] + +(65) HashAggregate [codegen id : 10] +Input [4]: [ss_list_price#78, sum#81, count#82, count#83] +Keys [1]: [ss_list_price#78] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#84, count(ss_list_price#78)#85] +Results [4]: [ss_list_price#78, sum#81, count#82, count#83] + +(66) HashAggregate [codegen id : 10] +Input [4]: [ss_list_price#78, sum#81, count#82, count#83] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78), partial_count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#84, count(ss_list_price#78)#85, count(ss_list_price#78)#86] +Results [4]: [sum#81, count#82, count#83, count#87] + +(67) Exchange +Input [4]: [sum#81, count#82, count#83, count#87] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] + +(68) HashAggregate [codegen id : 11] +Input [4]: [sum#81, count#82, count#83, count#87] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#78)), count(ss_list_price#78), count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#84, count(ss_list_price#78)#85, count(ss_list_price#78)#86] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#78))#84 / 100.0) as decimal(11,6)) AS B6_LP#88, count(ss_list_price#78)#85 AS B6_CNT#89, count(ss_list_price#78)#86 AS B6_CNTD#90] + +(69) BroadcastExchange +Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] +Arguments: IdentityBroadcastMode, [plan_id=17] + +(70) BroadcastNestedLoopJoin [codegen id : 12] +Join type: Inner +Join condition: None + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_datafusion/simplified.txt new file mode 100644 index 0000000000..03812cf5b6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_datafusion/simplified.txt @@ -0,0 +1,99 @@ +WholeStageCodegen (12) + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #2 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] + InputAdapter + Exchange #4 + WholeStageCodegen (2) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #5 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] + InputAdapter + Exchange #7 + WholeStageCodegen (4) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #8 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] + InputAdapter + Exchange #10 + WholeStageCodegen (6) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #11 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (9) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] + InputAdapter + Exchange #13 + WholeStageCodegen (8) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #14 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (11) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + InputAdapter + Exchange #16 + WholeStageCodegen (10) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #17 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..8f0671c8f0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_iceberg_compat/explain.txt @@ -0,0 +1,425 @@ +== Physical Plan == +* BroadcastNestedLoopJoin Inner BuildRight (70) +:- * BroadcastNestedLoopJoin Inner BuildRight (58) +: :- * BroadcastNestedLoopJoin Inner BuildRight (46) +: : :- * BroadcastNestedLoopJoin Inner BuildRight (34) +: : : :- * BroadcastNestedLoopJoin Inner BuildRight (22) +: : : : :- * HashAggregate (10) +: : : : : +- Exchange (9) +: : : : : +- * HashAggregate (8) +: : : : : +- * HashAggregate (7) +: : : : : +- * ColumnarToRow (6) +: : : : : +- CometExchange (5) +: : : : : +- CometHashAggregate (4) +: : : : : +- CometProject (3) +: : : : : +- CometFilter (2) +: : : : : +- CometScan parquet spark_catalog.default.store_sales (1) +: : : : +- BroadcastExchange (21) +: : : : +- * HashAggregate (20) +: : : : +- Exchange (19) +: : : : +- * HashAggregate (18) +: : : : +- * HashAggregate (17) +: : : : +- * ColumnarToRow (16) +: : : : +- CometExchange (15) +: : : : +- CometHashAggregate (14) +: : : : +- CometProject (13) +: : : : +- CometFilter (12) +: : : : +- CometScan parquet spark_catalog.default.store_sales (11) +: : : +- BroadcastExchange (33) +: : : +- * HashAggregate (32) +: : : +- Exchange (31) +: : : +- * HashAggregate (30) +: : : +- * HashAggregate (29) +: : : +- * ColumnarToRow (28) +: : : +- CometExchange (27) +: : : +- CometHashAggregate (26) +: : : +- CometProject (25) +: : : +- CometFilter (24) +: : : +- CometScan parquet spark_catalog.default.store_sales (23) +: : +- BroadcastExchange (45) +: : +- * HashAggregate (44) +: : +- Exchange (43) +: : +- * HashAggregate (42) +: : +- * HashAggregate (41) +: : +- * ColumnarToRow (40) +: : +- CometExchange (39) +: : +- CometHashAggregate (38) +: : +- CometProject (37) +: : +- CometFilter (36) +: : +- CometScan parquet spark_catalog.default.store_sales (35) +: +- BroadcastExchange (57) +: +- * HashAggregate (56) +: +- Exchange (55) +: +- * HashAggregate (54) +: +- * HashAggregate (53) +: +- * ColumnarToRow (52) +: +- CometExchange (51) +: +- CometHashAggregate (50) +: +- CometProject (49) +: +- CometFilter (48) +: +- CometScan parquet spark_catalog.default.store_sales (47) ++- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * HashAggregate (65) + +- * ColumnarToRow (64) + +- CometExchange (63) + +- CometHashAggregate (62) + +- CometProject (61) + +- CometFilter (60) + +- CometScan parquet spark_catalog.default.store_sales (59) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5), Or(Or(And(GreaterThanOrEqual(ss_list_price,8.00),LessThanOrEqual(ss_list_price,18.00)),And(GreaterThanOrEqual(ss_coupon_amt,459.00),LessThanOrEqual(ss_coupon_amt,1459.00))),And(GreaterThanOrEqual(ss_wholesale_cost,57.00),LessThanOrEqual(ss_wholesale_cost,77.00)))] +ReadSchema: struct + +(2) CometFilter +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (ss_list_price#3 <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (ss_coupon_amt#4 <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (ss_wholesale_cost#2 <= 77.00)))) + +(3) CometProject +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Arguments: [ss_list_price#3], [ss_list_price#3] + +(4) CometHashAggregate +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] + +(5) CometExchange +Input [4]: [ss_list_price#3, sum#6, count#7, count#8] +Arguments: hashpartitioning(ss_list_price#3, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_list_price#3, sum#6, count#7, count#8] + +(7) HashAggregate [codegen id : 1] +Input [4]: [ss_list_price#3, sum#6, count#7, count#8] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#9, count(ss_list_price#3)#10] +Results [4]: [ss_list_price#3, sum#6, count#7, count#8] + +(8) HashAggregate [codegen id : 1] +Input [4]: [ss_list_price#3, sum#6, count#7, count#8] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#9, count(ss_list_price#3)#10, count(ss_list_price#3)#11] +Results [4]: [sum#6, count#7, count#8, count#12] + +(9) Exchange +Input [4]: [sum#6, count#7, count#8, count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(10) HashAggregate [codegen id : 12] +Input [4]: [sum#6, count#7, count#8, count#12] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#9, count(ss_list_price#3)#10, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#9 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#10 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15] + +(11) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10), Or(Or(And(GreaterThanOrEqual(ss_list_price,90.00),LessThanOrEqual(ss_list_price,100.00)),And(GreaterThanOrEqual(ss_coupon_amt,2323.00),LessThanOrEqual(ss_coupon_amt,3323.00))),And(GreaterThanOrEqual(ss_wholesale_cost,31.00),LessThanOrEqual(ss_wholesale_cost,51.00)))] +ReadSchema: struct + +(12) CometFilter +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Condition : (((isnotnull(ss_quantity#16) AND (ss_quantity#16 >= 6)) AND (ss_quantity#16 <= 10)) AND ((((ss_list_price#18 >= 90.00) AND (ss_list_price#18 <= 100.00)) OR ((ss_coupon_amt#19 >= 2323.00) AND (ss_coupon_amt#19 <= 3323.00))) OR ((ss_wholesale_cost#17 >= 31.00) AND (ss_wholesale_cost#17 <= 51.00)))) + +(13) CometProject +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Arguments: [ss_list_price#18], [ss_list_price#18] + +(14) CometHashAggregate +Input [1]: [ss_list_price#18] +Keys [1]: [ss_list_price#18] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#18)), partial_count(ss_list_price#18)] + +(15) CometExchange +Input [4]: [ss_list_price#18, sum#21, count#22, count#23] +Arguments: hashpartitioning(ss_list_price#18, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(16) ColumnarToRow [codegen id : 2] +Input [4]: [ss_list_price#18, sum#21, count#22, count#23] + +(17) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#18, sum#21, count#22, count#23] +Keys [1]: [ss_list_price#18] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#24, count(ss_list_price#18)#25] +Results [4]: [ss_list_price#18, sum#21, count#22, count#23] + +(18) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#18, sum#21, count#22, count#23] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18), partial_count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#24, count(ss_list_price#18)#25, count(ss_list_price#18)#26] +Results [4]: [sum#21, count#22, count#23, count#27] + +(19) Exchange +Input [4]: [sum#21, count#22, count#23, count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(20) HashAggregate [codegen id : 3] +Input [4]: [sum#21, count#22, count#23, count#27] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#18)), count(ss_list_price#18), count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#24, count(ss_list_price#18)#25, count(ss_list_price#18)#26] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#18))#24 / 100.0) as decimal(11,6)) AS B2_LP#28, count(ss_list_price#18)#25 AS B2_CNT#29, count(ss_list_price#18)#26 AS B2_CNTD#30] + +(21) BroadcastExchange +Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] +Arguments: IdentityBroadcastMode, [plan_id=5] + +(22) BroadcastNestedLoopJoin [codegen id : 12] +Join type: Inner +Join condition: None + +(23) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15), Or(Or(And(GreaterThanOrEqual(ss_list_price,142.00),LessThanOrEqual(ss_list_price,152.00)),And(GreaterThanOrEqual(ss_coupon_amt,12214.00),LessThanOrEqual(ss_coupon_amt,13214.00))),And(GreaterThanOrEqual(ss_wholesale_cost,79.00),LessThanOrEqual(ss_wholesale_cost,99.00)))] +ReadSchema: struct + +(24) CometFilter +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Condition : (((isnotnull(ss_quantity#31) AND (ss_quantity#31 >= 11)) AND (ss_quantity#31 <= 15)) AND ((((ss_list_price#33 >= 142.00) AND (ss_list_price#33 <= 152.00)) OR ((ss_coupon_amt#34 >= 12214.00) AND (ss_coupon_amt#34 <= 13214.00))) OR ((ss_wholesale_cost#32 >= 79.00) AND (ss_wholesale_cost#32 <= 99.00)))) + +(25) CometProject +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Arguments: [ss_list_price#33], [ss_list_price#33] + +(26) CometHashAggregate +Input [1]: [ss_list_price#33] +Keys [1]: [ss_list_price#33] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#33)), partial_count(ss_list_price#33)] + +(27) CometExchange +Input [4]: [ss_list_price#33, sum#36, count#37, count#38] +Arguments: hashpartitioning(ss_list_price#33, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(28) ColumnarToRow [codegen id : 4] +Input [4]: [ss_list_price#33, sum#36, count#37, count#38] + +(29) HashAggregate [codegen id : 4] +Input [4]: [ss_list_price#33, sum#36, count#37, count#38] +Keys [1]: [ss_list_price#33] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#39, count(ss_list_price#33)#40] +Results [4]: [ss_list_price#33, sum#36, count#37, count#38] + +(30) HashAggregate [codegen id : 4] +Input [4]: [ss_list_price#33, sum#36, count#37, count#38] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33), partial_count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#39, count(ss_list_price#33)#40, count(ss_list_price#33)#41] +Results [4]: [sum#36, count#37, count#38, count#42] + +(31) Exchange +Input [4]: [sum#36, count#37, count#38, count#42] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(32) HashAggregate [codegen id : 5] +Input [4]: [sum#36, count#37, count#38, count#42] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#33)), count(ss_list_price#33), count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#39, count(ss_list_price#33)#40, count(ss_list_price#33)#41] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#33))#39 / 100.0) as decimal(11,6)) AS B3_LP#43, count(ss_list_price#33)#40 AS B3_CNT#44, count(ss_list_price#33)#41 AS B3_CNTD#45] + +(33) BroadcastExchange +Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] +Arguments: IdentityBroadcastMode, [plan_id=8] + +(34) BroadcastNestedLoopJoin [codegen id : 12] +Join type: Inner +Join condition: None + +(35) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20), Or(Or(And(GreaterThanOrEqual(ss_list_price,135.00),LessThanOrEqual(ss_list_price,145.00)),And(GreaterThanOrEqual(ss_coupon_amt,6071.00),LessThanOrEqual(ss_coupon_amt,7071.00))),And(GreaterThanOrEqual(ss_wholesale_cost,38.00),LessThanOrEqual(ss_wholesale_cost,58.00)))] +ReadSchema: struct + +(36) CometFilter +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Condition : (((isnotnull(ss_quantity#46) AND (ss_quantity#46 >= 16)) AND (ss_quantity#46 <= 20)) AND ((((ss_list_price#48 >= 135.00) AND (ss_list_price#48 <= 145.00)) OR ((ss_coupon_amt#49 >= 6071.00) AND (ss_coupon_amt#49 <= 7071.00))) OR ((ss_wholesale_cost#47 >= 38.00) AND (ss_wholesale_cost#47 <= 58.00)))) + +(37) CometProject +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Arguments: [ss_list_price#48], [ss_list_price#48] + +(38) CometHashAggregate +Input [1]: [ss_list_price#48] +Keys [1]: [ss_list_price#48] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#48)), partial_count(ss_list_price#48)] + +(39) CometExchange +Input [4]: [ss_list_price#48, sum#51, count#52, count#53] +Arguments: hashpartitioning(ss_list_price#48, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(40) ColumnarToRow [codegen id : 6] +Input [4]: [ss_list_price#48, sum#51, count#52, count#53] + +(41) HashAggregate [codegen id : 6] +Input [4]: [ss_list_price#48, sum#51, count#52, count#53] +Keys [1]: [ss_list_price#48] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#54, count(ss_list_price#48)#55] +Results [4]: [ss_list_price#48, sum#51, count#52, count#53] + +(42) HashAggregate [codegen id : 6] +Input [4]: [ss_list_price#48, sum#51, count#52, count#53] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48), partial_count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#54, count(ss_list_price#48)#55, count(ss_list_price#48)#56] +Results [4]: [sum#51, count#52, count#53, count#57] + +(43) Exchange +Input [4]: [sum#51, count#52, count#53, count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(44) HashAggregate [codegen id : 7] +Input [4]: [sum#51, count#52, count#53, count#57] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#48)), count(ss_list_price#48), count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#54, count(ss_list_price#48)#55, count(ss_list_price#48)#56] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#48))#54 / 100.0) as decimal(11,6)) AS B4_LP#58, count(ss_list_price#48)#55 AS B4_CNT#59, count(ss_list_price#48)#56 AS B4_CNTD#60] + +(45) BroadcastExchange +Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] +Arguments: IdentityBroadcastMode, [plan_id=11] + +(46) BroadcastNestedLoopJoin [codegen id : 12] +Join type: Inner +Join condition: None + +(47) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25), Or(Or(And(GreaterThanOrEqual(ss_list_price,122.00),LessThanOrEqual(ss_list_price,132.00)),And(GreaterThanOrEqual(ss_coupon_amt,836.00),LessThanOrEqual(ss_coupon_amt,1836.00))),And(GreaterThanOrEqual(ss_wholesale_cost,17.00),LessThanOrEqual(ss_wholesale_cost,37.00)))] +ReadSchema: struct + +(48) CometFilter +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Condition : (((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 21)) AND (ss_quantity#61 <= 25)) AND ((((ss_list_price#63 >= 122.00) AND (ss_list_price#63 <= 132.00)) OR ((ss_coupon_amt#64 >= 836.00) AND (ss_coupon_amt#64 <= 1836.00))) OR ((ss_wholesale_cost#62 >= 17.00) AND (ss_wholesale_cost#62 <= 37.00)))) + +(49) CometProject +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Arguments: [ss_list_price#63], [ss_list_price#63] + +(50) CometHashAggregate +Input [1]: [ss_list_price#63] +Keys [1]: [ss_list_price#63] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#63)), partial_count(ss_list_price#63)] + +(51) CometExchange +Input [4]: [ss_list_price#63, sum#66, count#67, count#68] +Arguments: hashpartitioning(ss_list_price#63, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=12] + +(52) ColumnarToRow [codegen id : 8] +Input [4]: [ss_list_price#63, sum#66, count#67, count#68] + +(53) HashAggregate [codegen id : 8] +Input [4]: [ss_list_price#63, sum#66, count#67, count#68] +Keys [1]: [ss_list_price#63] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#69, count(ss_list_price#63)#70] +Results [4]: [ss_list_price#63, sum#66, count#67, count#68] + +(54) HashAggregate [codegen id : 8] +Input [4]: [ss_list_price#63, sum#66, count#67, count#68] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63), partial_count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#69, count(ss_list_price#63)#70, count(ss_list_price#63)#71] +Results [4]: [sum#66, count#67, count#68, count#72] + +(55) Exchange +Input [4]: [sum#66, count#67, count#68, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] + +(56) HashAggregate [codegen id : 9] +Input [4]: [sum#66, count#67, count#68, count#72] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#63)), count(ss_list_price#63), count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#69, count(ss_list_price#63)#70, count(ss_list_price#63)#71] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#63))#69 / 100.0) as decimal(11,6)) AS B5_LP#73, count(ss_list_price#63)#70 AS B5_CNT#74, count(ss_list_price#63)#71 AS B5_CNTD#75] + +(57) BroadcastExchange +Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] +Arguments: IdentityBroadcastMode, [plan_id=14] + +(58) BroadcastNestedLoopJoin [codegen id : 12] +Join type: Inner +Join condition: None + +(59) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30), Or(Or(And(GreaterThanOrEqual(ss_list_price,154.00),LessThanOrEqual(ss_list_price,164.00)),And(GreaterThanOrEqual(ss_coupon_amt,7326.00),LessThanOrEqual(ss_coupon_amt,8326.00))),And(GreaterThanOrEqual(ss_wholesale_cost,7.00),LessThanOrEqual(ss_wholesale_cost,27.00)))] +ReadSchema: struct + +(60) CometFilter +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Condition : (((isnotnull(ss_quantity#76) AND (ss_quantity#76 >= 26)) AND (ss_quantity#76 <= 30)) AND ((((ss_list_price#78 >= 154.00) AND (ss_list_price#78 <= 164.00)) OR ((ss_coupon_amt#79 >= 7326.00) AND (ss_coupon_amt#79 <= 8326.00))) OR ((ss_wholesale_cost#77 >= 7.00) AND (ss_wholesale_cost#77 <= 27.00)))) + +(61) CometProject +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Arguments: [ss_list_price#78], [ss_list_price#78] + +(62) CometHashAggregate +Input [1]: [ss_list_price#78] +Keys [1]: [ss_list_price#78] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#78)), partial_count(ss_list_price#78)] + +(63) CometExchange +Input [4]: [ss_list_price#78, sum#81, count#82, count#83] +Arguments: hashpartitioning(ss_list_price#78, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=15] + +(64) ColumnarToRow [codegen id : 10] +Input [4]: [ss_list_price#78, sum#81, count#82, count#83] + +(65) HashAggregate [codegen id : 10] +Input [4]: [ss_list_price#78, sum#81, count#82, count#83] +Keys [1]: [ss_list_price#78] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#84, count(ss_list_price#78)#85] +Results [4]: [ss_list_price#78, sum#81, count#82, count#83] + +(66) HashAggregate [codegen id : 10] +Input [4]: [ss_list_price#78, sum#81, count#82, count#83] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78), partial_count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#84, count(ss_list_price#78)#85, count(ss_list_price#78)#86] +Results [4]: [sum#81, count#82, count#83, count#87] + +(67) Exchange +Input [4]: [sum#81, count#82, count#83, count#87] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] + +(68) HashAggregate [codegen id : 11] +Input [4]: [sum#81, count#82, count#83, count#87] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#78)), count(ss_list_price#78), count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#84, count(ss_list_price#78)#85, count(ss_list_price#78)#86] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#78))#84 / 100.0) as decimal(11,6)) AS B6_LP#88, count(ss_list_price#78)#85 AS B6_CNT#89, count(ss_list_price#78)#86 AS B6_CNTD#90] + +(69) BroadcastExchange +Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] +Arguments: IdentityBroadcastMode, [plan_id=17] + +(70) BroadcastNestedLoopJoin [codegen id : 12] +Join type: Inner +Join condition: None + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..4a547c4e04 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q28.native_iceberg_compat/simplified.txt @@ -0,0 +1,99 @@ +WholeStageCodegen (12) + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #2 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] + InputAdapter + Exchange #4 + WholeStageCodegen (2) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #5 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] + InputAdapter + Exchange #7 + WholeStageCodegen (4) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #8 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] + InputAdapter + Exchange #10 + WholeStageCodegen (6) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #11 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (9) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] + InputAdapter + Exchange #13 + WholeStageCodegen (8) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #14 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (11) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + InputAdapter + Exchange #16 + WholeStageCodegen (10) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometExchange [ss_list_price] #17 + CometHashAggregate [ss_list_price,sum,count,count] + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_datafusion/explain.txt new file mode 100644 index 0000000000..bfe4cc564b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_datafusion/explain.txt @@ -0,0 +1,225 @@ +== Physical Plan == +* ColumnarToRow (43) ++- CometTakeOrderedAndProject (42) + +- CometHashAggregate (41) + +- CometExchange (40) + +- CometHashAggregate (39) + +- CometProject (38) + +- CometBroadcastHashJoin (37) + :- CometProject (35) + : +- CometBroadcastHashJoin (34) + : :- CometProject (30) + : : +- CometBroadcastHashJoin (29) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (8) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometProject (15) + : : : : +- CometFilter (14) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : : : +- CometBroadcastExchange (22) + : : : +- CometProject (21) + : : : +- CometFilter (20) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (19) + : : +- CometBroadcastExchange (28) + : : +- CometProject (27) + : : +- CometFilter (26) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (25) + : +- CometBroadcastExchange (33) + : +- CometFilter (32) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (31) + +- ReusedExchange (36) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(4) CometFilter +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(5) CometBroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Right output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4], [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] + +(8) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(9) CometFilter +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(10) CometBroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(11) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] +Right output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [sr_customer_sk#8, sr_item_sk#7], [cs_bill_customer_sk#12, cs_item_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16, d_year#17, d_moy#18] + +(14) CometFilter +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 9)) AND (d_year#17 = 1999)) AND isnotnull(d_date_sk#16)) + +(15) CometProject +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(17) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [ss_sold_date_sk#6], [d_date_sk#16], Inner, BuildRight + +(18) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(19) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Arguments: [d_date_sk#19, d_year#20, d_moy#21] + +(20) CometFilter +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : (((((isnotnull(d_moy#21) AND isnotnull(d_year#20)) AND (d_moy#21 >= 9)) AND (d_moy#21 <= 12)) AND (d_year#20 = 1999)) AND isnotnull(d_date_sk#19)) + +(21) CometProject +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Arguments: [d_date_sk#19], [d_date_sk#19] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: [d_date_sk#19] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#19] +Arguments: [sr_returned_date_sk#11], [d_date_sk#19], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#19] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] + +(25) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#22, d_year#23] +Arguments: [d_date_sk#22, d_year#23] + +(26) CometFilter +Input [2]: [d_date_sk#22, d_year#23] +Condition : (d_year#23 IN (1999,2000,2001) AND isnotnull(d_date_sk#22)) + +(27) CometProject +Input [2]: [d_date_sk#22, d_year#23] +Arguments: [d_date_sk#22], [d_date_sk#22] + +(28) CometBroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: [d_date_sk#22] + +(29) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#22] +Arguments: [cs_sold_date_sk#15], [d_date_sk#22], Inner, BuildRight + +(30) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#22] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] + +(31) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [s_store_sk#24, s_store_id#25, s_store_name#26] + +(32) CometFilter +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Condition : isnotnull(s_store_sk#24) + +(33) CometBroadcastExchange +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [s_store_sk#24, s_store_id#25, s_store_name#26] + +(34) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] +Right output [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [ss_store_sk#3], [s_store_sk#24], Inner, BuildRight + +(35) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26], [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26] + +(36) ReusedExchange [Reuses operator id: 33] +Output [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] + +(37) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26] +Right output [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Arguments: [ss_item_sk#1], [i_item_sk#27], Inner, BuildRight + +(38) CometProject +Input [9]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_sk#27, i_item_id#28, i_item_desc#29] +Arguments: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29], [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] + +(39) CometHashAggregate +Input [7]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] +Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26] +Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#10), partial_sum(cs_quantity#14)] + +(40) CometExchange +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(41) CometHashAggregate +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#30, sum#31, sum#32] +Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26] +Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#10), sum(cs_quantity#14)] + +(42) CometTakeOrderedAndProject +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#33, store_returns_quantity#34, catalog_sales_quantity#35] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#28 ASC NULLS FIRST,i_item_desc#29 ASC NULLS FIRST,s_store_id#25 ASC NULLS FIRST,s_store_name#26 ASC NULLS FIRST], output=[i_item_id#28,i_item_desc#29,s_store_id#25,s_store_name#26,store_sales_quantity#33,store_returns_quantity#34,catalog_sales_quantity#35]), [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#33, store_returns_quantity#34, catalog_sales_quantity#35], 100, [i_item_id#28 ASC NULLS FIRST, i_item_desc#29 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, s_store_name#26 ASC NULLS FIRST], [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#33, store_returns_quantity#34, catalog_sales_quantity#35] + +(43) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#33, store_returns_quantity#34, catalog_sales_quantity#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_datafusion/simplified.txt new file mode 100644 index 0000000000..33adc3854b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_datafusion/simplified.txt @@ -0,0 +1,45 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_quantity,store_returns_quantity,catalog_sales_quantity] + CometHashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_quantity,store_returns_quantity,catalog_sales_quantity,sum,sum,sum,sum(ss_quantity),sum(sr_return_quantity),sum(cs_quantity)] + CometExchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + CometHashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum,ss_quantity,sr_return_quantity,cs_quantity] + CometProject [ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_id,i_item_desc] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_sk,i_item_id,i_item_desc] + CometProject [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_sk,s_store_id,s_store_name] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] #3 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [d_date_sk] #6 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #7 + CometFilter [s_store_sk,s_store_id,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id,s_store_name] + ReusedExchange [i_item_sk,i_item_id,i_item_desc] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..2c7d749dc4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_iceberg_compat/explain.txt @@ -0,0 +1,263 @@ +== Physical Plan == +* ColumnarToRow (45) ++- CometTakeOrderedAndProject (44) + +- CometHashAggregate (43) + +- CometExchange (42) + +- CometHashAggregate (41) + +- CometProject (40) + +- CometBroadcastHashJoin (39) + :- CometProject (35) + : +- CometBroadcastHashJoin (34) + : :- CometProject (30) + : : +- CometBroadcastHashJoin (29) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometScan parquet spark_catalog.default.store_returns (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (8) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometProject (15) + : : : : +- CometFilter (14) + : : : : +- CometScan parquet spark_catalog.default.date_dim (13) + : : : +- CometBroadcastExchange (22) + : : : +- CometProject (21) + : : : +- CometFilter (20) + : : : +- CometScan parquet spark_catalog.default.date_dim (19) + : : +- CometBroadcastExchange (28) + : : +- CometProject (27) + : : +- CometFilter (26) + : : +- CometScan parquet spark_catalog.default.date_dim (25) + : +- CometBroadcastExchange (33) + : +- CometFilter (32) + : +- CometScan parquet spark_catalog.default.store (31) + +- CometBroadcastExchange (38) + +- CometFilter (37) + +- CometScan parquet spark_catalog.default.item (36) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(3) CometScan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#11)] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(4) CometFilter +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(5) CometBroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Right output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4], [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] + +(8) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(9) CometFilter +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(10) CometBroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(11) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] +Right output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [sr_customer_sk#8, sr_item_sk#7], [cs_bill_customer_sk#12, cs_item_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 9)) AND (d_year#17 = 1999)) AND isnotnull(d_date_sk#16)) + +(15) CometProject +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(17) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [ss_sold_date_sk#6], [d_date_sk#16], Inner, BuildRight + +(18) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(19) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(20) CometFilter +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : (((((isnotnull(d_moy#21) AND isnotnull(d_year#20)) AND (d_moy#21 >= 9)) AND (d_moy#21 <= 12)) AND (d_year#20 = 1999)) AND isnotnull(d_date_sk#19)) + +(21) CometProject +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Arguments: [d_date_sk#19], [d_date_sk#19] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: [d_date_sk#19] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#19] +Arguments: [sr_returned_date_sk#11], [d_date_sk#19], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#19] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] + +(25) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#22, d_year#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(26) CometFilter +Input [2]: [d_date_sk#22, d_year#23] +Condition : (d_year#23 IN (1999,2000,2001) AND isnotnull(d_date_sk#22)) + +(27) CometProject +Input [2]: [d_date_sk#22, d_year#23] +Arguments: [d_date_sk#22], [d_date_sk#22] + +(28) CometBroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: [d_date_sk#22] + +(29) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#22] +Arguments: [cs_sold_date_sk#15], [d_date_sk#22], Inner, BuildRight + +(30) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#22] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] + +(31) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(32) CometFilter +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Condition : isnotnull(s_store_sk#24) + +(33) CometBroadcastExchange +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [s_store_sk#24, s_store_id#25, s_store_name#26] + +(34) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] +Right output [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [ss_store_sk#3], [s_store_sk#24], Inner, BuildRight + +(35) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26], [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26] + +(36) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(37) CometFilter +Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Condition : isnotnull(i_item_sk#27) + +(38) CometBroadcastExchange +Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Arguments: [i_item_sk#27, i_item_id#28, i_item_desc#29] + +(39) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26] +Right output [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Arguments: [ss_item_sk#1], [i_item_sk#27], Inner, BuildRight + +(40) CometProject +Input [9]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_sk#27, i_item_id#28, i_item_desc#29] +Arguments: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29], [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] + +(41) CometHashAggregate +Input [7]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] +Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26] +Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#10), partial_sum(cs_quantity#14)] + +(42) CometExchange +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(43) CometHashAggregate +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#30, sum#31, sum#32] +Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26] +Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#10), sum(cs_quantity#14)] + +(44) CometTakeOrderedAndProject +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#33, store_returns_quantity#34, catalog_sales_quantity#35] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#28 ASC NULLS FIRST,i_item_desc#29 ASC NULLS FIRST,s_store_id#25 ASC NULLS FIRST,s_store_name#26 ASC NULLS FIRST], output=[i_item_id#28,i_item_desc#29,s_store_id#25,s_store_name#26,store_sales_quantity#33,store_returns_quantity#34,catalog_sales_quantity#35]), [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#33, store_returns_quantity#34, catalog_sales_quantity#35], 100, [i_item_id#28 ASC NULLS FIRST, i_item_desc#29 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, s_store_name#26 ASC NULLS FIRST], [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#33, store_returns_quantity#34, catalog_sales_quantity#35] + +(45) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#33, store_returns_quantity#34, catalog_sales_quantity#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..d407cf0bc7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q29.native_iceberg_compat/simplified.txt @@ -0,0 +1,47 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_quantity,store_returns_quantity,catalog_sales_quantity] + CometHashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_quantity,store_returns_quantity,catalog_sales_quantity,sum,sum,sum,sum(ss_quantity),sum(sr_return_quantity),sum(cs_quantity)] + CometExchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + CometHashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum,ss_quantity,sr_return_quantity,cs_quantity] + CometProject [ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_id,i_item_desc] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_sk,i_item_id,i_item_desc] + CometProject [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_sk,s_store_id,s_store_name] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] #3 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [d_date_sk] #6 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #7 + CometFilter [s_store_sk,s_store_id,s_store_name] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc] #8 + CometFilter [i_item_sk,i_item_id,i_item_desc] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_datafusion/explain.txt new file mode 100644 index 0000000000..527a1e97da --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_datafusion/explain.txt @@ -0,0 +1,101 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2, d_moy#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((isnotnull(d_moy#3) AND (d_moy#3 = 11)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2], [d_date_sk#1, d_year#2] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_year#2] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5], [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] + +(10) CometFilter +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Condition : ((isnotnull(i_manufact_id#10) AND (i_manufact_id#10 = 128)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9], [i_item_sk#7, i_brand_id#8, i_brand#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9], [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(15) CometHashAggregate +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] + +(16) CometExchange +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#11] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#11] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] + +(18) CometTakeOrderedAndProject +Input [4]: [d_year#2, brand_id#12, brand#13, sum_agg#14] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[d_year#2 ASC NULLS FIRST,sum_agg#14 DESC NULLS LAST,brand_id#12 ASC NULLS FIRST], output=[d_year#2,brand_id#12,brand#13,sum_agg#14]), [d_year#2, brand_id#12, brand#13, sum_agg#14], 100, [d_year#2 ASC NULLS FIRST, sum_agg#14 DESC NULLS LAST, brand_id#12 ASC NULLS FIRST], [d_year#2, brand_id#12, brand#13, sum_agg#14] + +(19) ColumnarToRow [codegen id : 1] +Input [4]: [d_year#2, brand_id#12, brand#13, sum_agg#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6828a328ea --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_datafusion/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [d_year,brand_id,brand,sum_agg] + CometHashAggregate [d_year,brand_id,brand,sum_agg,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [d_year,i_brand,i_brand_id] #1 + CometHashAggregate [d_year,i_brand,i_brand_id,sum,ss_ext_sales_price] + CometProject [d_year,ss_ext_sales_price,i_brand_id,i_brand] + CometBroadcastHashJoin [d_year,ss_item_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand] + CometProject [d_year,ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,d_year,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_year] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_brand,i_manufact_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..812c13b8e6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_iceberg_compat/explain.txt @@ -0,0 +1,111 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.date_dim (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometScan parquet spark_catalog.default.store_sales (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometScan parquet spark_catalog.default.item (9) + + +(1) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((isnotnull(d_moy#3) AND (d_moy#3 = 11)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2], [d_date_sk#1, d_year#2] + +(4) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_year#2] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5], [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Condition : ((isnotnull(i_manufact_id#10) AND (i_manufact_id#10 = 128)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9], [i_item_sk#7, i_brand_id#8, i_brand#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9], [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(15) CometHashAggregate +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] + +(16) CometExchange +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#11] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#11] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] + +(18) CometTakeOrderedAndProject +Input [4]: [d_year#2, brand_id#12, brand#13, sum_agg#14] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[d_year#2 ASC NULLS FIRST,sum_agg#14 DESC NULLS LAST,brand_id#12 ASC NULLS FIRST], output=[d_year#2,brand_id#12,brand#13,sum_agg#14]), [d_year#2, brand_id#12, brand#13, sum_agg#14], 100, [d_year#2 ASC NULLS FIRST, sum_agg#14 DESC NULLS LAST, brand_id#12 ASC NULLS FIRST], [d_year#2, brand_id#12, brand#13, sum_agg#14] + +(19) ColumnarToRow [codegen id : 1] +Input [4]: [d_year#2, brand_id#12, brand#13, sum_agg#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..f82fd24bdd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q3.native_iceberg_compat/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [d_year,brand_id,brand,sum_agg] + CometHashAggregate [d_year,brand_id,brand,sum_agg,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [d_year,i_brand,i_brand_id] #1 + CometHashAggregate [d_year,i_brand,i_brand_id,sum,ss_ext_sales_price] + CometProject [d_year,ss_ext_sales_price,i_brand_id,i_brand] + CometBroadcastHashJoin [d_year,ss_item_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand] + CometProject [d_year,ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,d_year,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_year] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manufact_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_datafusion/explain.txt new file mode 100644 index 0000000000..c5873bebbd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_datafusion/explain.txt @@ -0,0 +1,253 @@ +== Physical Plan == +* ColumnarToRow (48) ++- CometTakeOrderedAndProject (47) + +- CometProject (46) + +- CometBroadcastHashJoin (45) + :- CometProject (40) + : +- CometBroadcastHashJoin (39) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometFilter (17) + : : : +- CometHashAggregate (16) + : : : +- CometExchange (15) + : : : +- CometHashAggregate (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (9) + : : +- CometBroadcastExchange (33) + : : +- CometFilter (32) + : : +- CometHashAggregate (31) + : : +- CometExchange (30) + : : +- CometHashAggregate (29) + : : +- CometHashAggregate (28) + : : +- CometExchange (27) + : : +- CometHashAggregate (26) + : : +- CometProject (25) + : : +- CometBroadcastHashJoin (24) + : : :- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometFilter (19) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (18) + : : : +- ReusedExchange (20) + : : +- ReusedExchange (23) + : +- CometBroadcastExchange (38) + : +- CometFilter (37) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (36) + +- CometBroadcastExchange (44) + +- CometProject (43) + +- CometFilter (42) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (41) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Arguments: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] + +(2) CometFilter +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Condition : (isnotnull(wr_returning_addr_sk#2) AND isnotnull(wr_returning_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5, d_year#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [wr_returned_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4, d_date_sk#5] +Arguments: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3], [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#7, ca_state#8] +Arguments: [ca_address_sk#7, ca_state#8] + +(10) CometFilter +Input [2]: [ca_address_sk#7, ca_state#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) + +(11) CometBroadcastExchange +Input [2]: [ca_address_sk#7, ca_state#8] +Arguments: [ca_address_sk#7, ca_state#8] + +(12) CometBroadcastHashJoin +Left output [3]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3] +Right output [2]: [ca_address_sk#7, ca_state#8] +Arguments: [wr_returning_addr_sk#2], [ca_address_sk#7], Inner, BuildRight + +(13) CometProject +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, ca_address_sk#7, ca_state#8] +Arguments: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8], [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] + +(14) CometHashAggregate +Input [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#3))] + +(15) CometExchange +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#9] +Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#9] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(wr_return_amt#3))] + +(17) CometFilter +Input [3]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12] +Condition : isnotnull(ctr_total_return#12) + +(18) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [4]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, wr_returned_date_sk#16] +Arguments: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, wr_returned_date_sk#16] + +(19) CometFilter +Input [4]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, wr_returned_date_sk#16] +Condition : isnotnull(wr_returning_addr_sk#14) + +(20) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#17] + +(21) CometBroadcastHashJoin +Left output [4]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, wr_returned_date_sk#16] +Right output [1]: [d_date_sk#17] +Arguments: [wr_returned_date_sk#16], [d_date_sk#17], Inner, BuildRight + +(22) CometProject +Input [5]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, wr_returned_date_sk#16, d_date_sk#17] +Arguments: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15], [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15] + +(23) ReusedExchange [Reuses operator id: 11] +Output [2]: [ca_address_sk#18, ca_state#19] + +(24) CometBroadcastHashJoin +Left output [3]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15] +Right output [2]: [ca_address_sk#18, ca_state#19] +Arguments: [wr_returning_addr_sk#14], [ca_address_sk#18], Inner, BuildRight + +(25) CometProject +Input [5]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, ca_address_sk#18, ca_state#19] +Arguments: [wr_returning_customer_sk#13, wr_return_amt#15, ca_state#19], [wr_returning_customer_sk#13, wr_return_amt#15, ca_state#19] + +(26) CometHashAggregate +Input [3]: [wr_returning_customer_sk#13, wr_return_amt#15, ca_state#19] +Keys [2]: [wr_returning_customer_sk#13, ca_state#19] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#15))] + +(27) CometExchange +Input [3]: [wr_returning_customer_sk#13, ca_state#19, sum#20] +Arguments: hashpartitioning(wr_returning_customer_sk#13, ca_state#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(28) CometHashAggregate +Input [3]: [wr_returning_customer_sk#13, ca_state#19, sum#20] +Keys [2]: [wr_returning_customer_sk#13, ca_state#19] +Functions [1]: [sum(UnscaledValue(wr_return_amt#15))] + +(29) CometHashAggregate +Input [2]: [ctr_state#21, ctr_total_return#22] +Keys [1]: [ctr_state#21] +Functions [1]: [partial_avg(ctr_total_return#22)] + +(30) CometExchange +Input [3]: [ctr_state#21, sum#23, count#24] +Arguments: hashpartitioning(ctr_state#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(31) CometHashAggregate +Input [3]: [ctr_state#21, sum#23, count#24] +Keys [1]: [ctr_state#21] +Functions [1]: [avg(ctr_total_return#22)] + +(32) CometFilter +Input [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#25) + +(33) CometBroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] + +(34) CometBroadcastHashJoin +Left output [3]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12] +Right output [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [ctr_state#11], [ctr_state#21], Inner, (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#25), BuildRight + +(35) CometProject +Input [5]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [ctr_customer_sk#10, ctr_total_return#12], [ctr_customer_sk#10, ctr_total_return#12] + +(36) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [14]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Arguments: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] + +(37) CometFilter +Input [14]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Condition : (isnotnull(c_customer_sk#26) AND isnotnull(c_current_addr_sk#28)) + +(38) CometBroadcastExchange +Input [14]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Arguments: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] + +(39) CometBroadcastHashJoin +Left output [2]: [ctr_customer_sk#10, ctr_total_return#12] +Right output [14]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Arguments: [ctr_customer_sk#10], [c_customer_sk#26], Inner, BuildRight + +(40) CometProject +Input [16]: [ctr_customer_sk#10, ctr_total_return#12, c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Arguments: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39], [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] + +(41) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#40, ca_state#41] +Arguments: [ca_address_sk#40, ca_state#41] + +(42) CometFilter +Input [2]: [ca_address_sk#40, ca_state#41] +Condition : ((isnotnull(ca_state#41) AND (ca_state#41 = GA)) AND isnotnull(ca_address_sk#40)) + +(43) CometProject +Input [2]: [ca_address_sk#40, ca_state#41] +Arguments: [ca_address_sk#40], [ca_address_sk#40] + +(44) CometBroadcastExchange +Input [1]: [ca_address_sk#40] +Arguments: [ca_address_sk#40] + +(45) CometBroadcastHashJoin +Left output [14]: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Right output [1]: [ca_address_sk#40] +Arguments: [c_current_addr_sk#28], [ca_address_sk#40], Inner, BuildRight + +(46) CometProject +Input [15]: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ca_address_sk#40] +Arguments: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12], [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12] + +(47) CometTakeOrderedAndProject +Input [13]: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#27 ASC NULLS FIRST,c_salutation#29 ASC NULLS FIRST,c_first_name#30 ASC NULLS FIRST,c_last_name#31 ASC NULLS FIRST,c_preferred_cust_flag#32 ASC NULLS FIRST,c_birth_day#33 ASC NULLS FIRST,c_birth_month#34 ASC NULLS FIRST,c_birth_year#35 ASC NULLS FIRST,c_birth_country#36 ASC NULLS FIRST,c_login#37 ASC NULLS FIRST,c_email_address#38 ASC NULLS FIRST,c_last_review_date#39 ASC NULLS FIRST,ctr_total_return#12 ASC NULLS FIRST], output=[c_customer_id#27,c_salutation#29,c_first_name#30,c_last_name#31,c_preferred_cust_flag#32,c_birth_day#33,c_birth_month#34,c_birth_year#35,c_birth_country#36,c_login#37,c_email_address#38,c_last_review_date#39,ctr_total_return#12]), [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12], 100, [c_customer_id#27 ASC NULLS FIRST, c_salutation#29 ASC NULLS FIRST, c_first_name#30 ASC NULLS FIRST, c_last_name#31 ASC NULLS FIRST, c_preferred_cust_flag#32 ASC NULLS FIRST, c_birth_day#33 ASC NULLS FIRST, c_birth_month#34 ASC NULLS FIRST, c_birth_year#35 ASC NULLS FIRST, c_birth_country#36 ASC NULLS FIRST, c_login#37 ASC NULLS FIRST, c_email_address#38 ASC NULLS FIRST, c_last_review_date#39 ASC NULLS FIRST, ctr_total_return#12 ASC NULLS FIRST], [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12] + +(48) ColumnarToRow [codegen id : 1] +Input [13]: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_datafusion/simplified.txt new file mode 100644 index 0000000000..d36117ef8e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_datafusion/simplified.txt @@ -0,0 +1,50 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + CometProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + CometBroadcastHashJoin [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ca_address_sk] + CometProject [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + CometBroadcastHashJoin [ctr_customer_sk,ctr_total_return,c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + CometProject [ctr_customer_sk,ctr_total_return] + CometBroadcastHashJoin [ctr_customer_sk,ctr_state,ctr_total_return,(avg(ctr_total_return) * 1.2),ctr_state] + CometFilter [ctr_customer_sk,ctr_state,ctr_total_return] + CometHashAggregate [ctr_customer_sk,ctr_state,ctr_total_return,wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] + CometExchange [wr_returning_customer_sk,ca_state] #1 + CometHashAggregate [wr_returning_customer_sk,ca_state,sum,wr_return_amt] + CometProject [wr_returning_customer_sk,wr_return_amt,ca_state] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,ca_address_sk,ca_state] + CometProject [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk,d_date_sk] + CometFilter [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [ca_address_sk,ca_state] #3 + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [(avg(ctr_total_return) * 1.2),ctr_state] #4 + CometFilter [(avg(ctr_total_return) * 1.2),ctr_state] + CometHashAggregate [(avg(ctr_total_return) * 1.2),ctr_state,sum,count,avg(ctr_total_return)] + CometExchange [ctr_state] #5 + CometHashAggregate [ctr_state,sum,count,ctr_total_return] + CometHashAggregate [ctr_state,ctr_total_return,wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] + CometExchange [wr_returning_customer_sk,ca_state] #6 + CometHashAggregate [wr_returning_customer_sk,ca_state,sum,wr_return_amt] + CometProject [wr_returning_customer_sk,wr_return_amt,ca_state] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,ca_address_sk,ca_state] + CometProject [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk,d_date_sk] + CometFilter [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + ReusedExchange [d_date_sk] #2 + ReusedExchange [ca_address_sk,ca_state] #3 + CometBroadcastExchange [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] #7 + CometFilter [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + CometBroadcastExchange [ca_address_sk] #8 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..22684d93ed --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_iceberg_compat/explain.txt @@ -0,0 +1,273 @@ +== Physical Plan == +* ColumnarToRow (48) ++- CometTakeOrderedAndProject (47) + +- CometProject (46) + +- CometBroadcastHashJoin (45) + :- CometProject (40) + : +- CometBroadcastHashJoin (39) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometFilter (17) + : : : +- CometHashAggregate (16) + : : : +- CometExchange (15) + : : : +- CometHashAggregate (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.web_returns (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometScan parquet spark_catalog.default.customer_address (9) + : : +- CometBroadcastExchange (33) + : : +- CometFilter (32) + : : +- CometHashAggregate (31) + : : +- CometExchange (30) + : : +- CometHashAggregate (29) + : : +- CometHashAggregate (28) + : : +- CometExchange (27) + : : +- CometHashAggregate (26) + : : +- CometProject (25) + : : +- CometBroadcastHashJoin (24) + : : :- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometFilter (19) + : : : : +- CometScan parquet spark_catalog.default.web_returns (18) + : : : +- ReusedExchange (20) + : : +- ReusedExchange (23) + : +- CometBroadcastExchange (38) + : +- CometFilter (37) + : +- CometScan parquet spark_catalog.default.customer (36) + +- CometBroadcastExchange (44) + +- CometProject (43) + +- CometFilter (42) + +- CometScan parquet spark_catalog.default.customer_address (41) + + +(1) CometScan parquet spark_catalog.default.web_returns +Output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#4)] +PushedFilters: [IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Condition : (isnotnull(wr_returning_addr_sk#2) AND isnotnull(wr_returning_customer_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [wr_returned_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4, d_date_sk#5] +Arguments: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3], [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3] + +(9) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [ca_address_sk#7, ca_state#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) + +(11) CometBroadcastExchange +Input [2]: [ca_address_sk#7, ca_state#8] +Arguments: [ca_address_sk#7, ca_state#8] + +(12) CometBroadcastHashJoin +Left output [3]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3] +Right output [2]: [ca_address_sk#7, ca_state#8] +Arguments: [wr_returning_addr_sk#2], [ca_address_sk#7], Inner, BuildRight + +(13) CometProject +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, ca_address_sk#7, ca_state#8] +Arguments: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8], [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] + +(14) CometHashAggregate +Input [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#3))] + +(15) CometExchange +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#9] +Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#9] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(wr_return_amt#3))] + +(17) CometFilter +Input [3]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12] +Condition : isnotnull(ctr_total_return#12) + +(18) CometScan parquet spark_catalog.default.web_returns +Output [4]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, wr_returned_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#16)] +PushedFilters: [IsNotNull(wr_returning_addr_sk)] +ReadSchema: struct + +(19) CometFilter +Input [4]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, wr_returned_date_sk#16] +Condition : isnotnull(wr_returning_addr_sk#14) + +(20) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#17] + +(21) CometBroadcastHashJoin +Left output [4]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, wr_returned_date_sk#16] +Right output [1]: [d_date_sk#17] +Arguments: [wr_returned_date_sk#16], [d_date_sk#17], Inner, BuildRight + +(22) CometProject +Input [5]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, wr_returned_date_sk#16, d_date_sk#17] +Arguments: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15], [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15] + +(23) ReusedExchange [Reuses operator id: 11] +Output [2]: [ca_address_sk#18, ca_state#19] + +(24) CometBroadcastHashJoin +Left output [3]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15] +Right output [2]: [ca_address_sk#18, ca_state#19] +Arguments: [wr_returning_addr_sk#14], [ca_address_sk#18], Inner, BuildRight + +(25) CometProject +Input [5]: [wr_returning_customer_sk#13, wr_returning_addr_sk#14, wr_return_amt#15, ca_address_sk#18, ca_state#19] +Arguments: [wr_returning_customer_sk#13, wr_return_amt#15, ca_state#19], [wr_returning_customer_sk#13, wr_return_amt#15, ca_state#19] + +(26) CometHashAggregate +Input [3]: [wr_returning_customer_sk#13, wr_return_amt#15, ca_state#19] +Keys [2]: [wr_returning_customer_sk#13, ca_state#19] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#15))] + +(27) CometExchange +Input [3]: [wr_returning_customer_sk#13, ca_state#19, sum#20] +Arguments: hashpartitioning(wr_returning_customer_sk#13, ca_state#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(28) CometHashAggregate +Input [3]: [wr_returning_customer_sk#13, ca_state#19, sum#20] +Keys [2]: [wr_returning_customer_sk#13, ca_state#19] +Functions [1]: [sum(UnscaledValue(wr_return_amt#15))] + +(29) CometHashAggregate +Input [2]: [ctr_state#21, ctr_total_return#22] +Keys [1]: [ctr_state#21] +Functions [1]: [partial_avg(ctr_total_return#22)] + +(30) CometExchange +Input [3]: [ctr_state#21, sum#23, count#24] +Arguments: hashpartitioning(ctr_state#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(31) CometHashAggregate +Input [3]: [ctr_state#21, sum#23, count#24] +Keys [1]: [ctr_state#21] +Functions [1]: [avg(ctr_total_return#22)] + +(32) CometFilter +Input [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#25) + +(33) CometBroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] + +(34) CometBroadcastHashJoin +Left output [3]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12] +Right output [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [ctr_state#11], [ctr_state#21], Inner, (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#25), BuildRight + +(35) CometProject +Input [5]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [ctr_customer_sk#10, ctr_total_return#12], [ctr_customer_sk#10, ctr_total_return#12] + +(36) CometScan parquet spark_catalog.default.customer +Output [14]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(37) CometFilter +Input [14]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Condition : (isnotnull(c_customer_sk#26) AND isnotnull(c_current_addr_sk#28)) + +(38) CometBroadcastExchange +Input [14]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Arguments: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] + +(39) CometBroadcastHashJoin +Left output [2]: [ctr_customer_sk#10, ctr_total_return#12] +Right output [14]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Arguments: [ctr_customer_sk#10], [c_customer_sk#26], Inner, BuildRight + +(40) CometProject +Input [16]: [ctr_customer_sk#10, ctr_total_return#12, c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Arguments: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39], [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] + +(41) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#40, ca_state#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(42) CometFilter +Input [2]: [ca_address_sk#40, ca_state#41] +Condition : ((isnotnull(ca_state#41) AND (ca_state#41 = GA)) AND isnotnull(ca_address_sk#40)) + +(43) CometProject +Input [2]: [ca_address_sk#40, ca_state#41] +Arguments: [ca_address_sk#40], [ca_address_sk#40] + +(44) CometBroadcastExchange +Input [1]: [ca_address_sk#40] +Arguments: [ca_address_sk#40] + +(45) CometBroadcastHashJoin +Left output [14]: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39] +Right output [1]: [ca_address_sk#40] +Arguments: [c_current_addr_sk#28], [ca_address_sk#40], Inner, BuildRight + +(46) CometProject +Input [15]: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ca_address_sk#40] +Arguments: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12], [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12] + +(47) CometTakeOrderedAndProject +Input [13]: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#27 ASC NULLS FIRST,c_salutation#29 ASC NULLS FIRST,c_first_name#30 ASC NULLS FIRST,c_last_name#31 ASC NULLS FIRST,c_preferred_cust_flag#32 ASC NULLS FIRST,c_birth_day#33 ASC NULLS FIRST,c_birth_month#34 ASC NULLS FIRST,c_birth_year#35 ASC NULLS FIRST,c_birth_country#36 ASC NULLS FIRST,c_login#37 ASC NULLS FIRST,c_email_address#38 ASC NULLS FIRST,c_last_review_date#39 ASC NULLS FIRST,ctr_total_return#12 ASC NULLS FIRST], output=[c_customer_id#27,c_salutation#29,c_first_name#30,c_last_name#31,c_preferred_cust_flag#32,c_birth_day#33,c_birth_month#34,c_birth_year#35,c_birth_country#36,c_login#37,c_email_address#38,c_last_review_date#39,ctr_total_return#12]), [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12], 100, [c_customer_id#27 ASC NULLS FIRST, c_salutation#29 ASC NULLS FIRST, c_first_name#30 ASC NULLS FIRST, c_last_name#31 ASC NULLS FIRST, c_preferred_cust_flag#32 ASC NULLS FIRST, c_birth_day#33 ASC NULLS FIRST, c_birth_month#34 ASC NULLS FIRST, c_birth_year#35 ASC NULLS FIRST, c_birth_country#36 ASC NULLS FIRST, c_login#37 ASC NULLS FIRST, c_email_address#38 ASC NULLS FIRST, c_last_review_date#39 ASC NULLS FIRST, ctr_total_return#12 ASC NULLS FIRST], [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12] + +(48) ColumnarToRow [codegen id : 1] +Input [13]: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_day#33, c_birth_month#34, c_birth_year#35, c_birth_country#36, c_login#37, c_email_address#38, c_last_review_date#39, ctr_total_return#12] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..deb5920ef4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q30.native_iceberg_compat/simplified.txt @@ -0,0 +1,50 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + CometProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + CometBroadcastHashJoin [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ca_address_sk] + CometProject [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + CometBroadcastHashJoin [ctr_customer_sk,ctr_total_return,c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + CometProject [ctr_customer_sk,ctr_total_return] + CometBroadcastHashJoin [ctr_customer_sk,ctr_state,ctr_total_return,(avg(ctr_total_return) * 1.2),ctr_state] + CometFilter [ctr_customer_sk,ctr_state,ctr_total_return] + CometHashAggregate [ctr_customer_sk,ctr_state,ctr_total_return,wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] + CometExchange [wr_returning_customer_sk,ca_state] #1 + CometHashAggregate [wr_returning_customer_sk,ca_state,sum,wr_return_amt] + CometProject [wr_returning_customer_sk,wr_return_amt,ca_state] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,ca_address_sk,ca_state] + CometProject [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk,d_date_sk] + CometFilter [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [ca_address_sk,ca_state] #3 + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + CometBroadcastExchange [(avg(ctr_total_return) * 1.2),ctr_state] #4 + CometFilter [(avg(ctr_total_return) * 1.2),ctr_state] + CometHashAggregate [(avg(ctr_total_return) * 1.2),ctr_state,sum,count,avg(ctr_total_return)] + CometExchange [ctr_state] #5 + CometHashAggregate [ctr_state,sum,count,ctr_total_return] + CometHashAggregate [ctr_state,ctr_total_return,wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] + CometExchange [wr_returning_customer_sk,ca_state] #6 + CometHashAggregate [wr_returning_customer_sk,ca_state,sum,wr_return_amt] + CometProject [wr_returning_customer_sk,wr_return_amt,ca_state] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,ca_address_sk,ca_state] + CometProject [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk,d_date_sk] + CometFilter [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + ReusedExchange [d_date_sk] #2 + ReusedExchange [ca_address_sk,ca_state] #3 + CometBroadcastExchange [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] #7 + CometFilter [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + CometBroadcastExchange [ca_address_sk] #8 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_datafusion/explain.txt new file mode 100644 index 0000000000..efa61c768a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_datafusion/explain.txt @@ -0,0 +1,310 @@ +== Physical Plan == +* ColumnarToRow (59) ++- CometSort (58) + +- CometColumnarExchange (57) + +- CometProject (56) + +- CometBroadcastHashJoin (55) + :- CometProject (53) + : +- CometBroadcastHashJoin (52) + : :- CometBroadcastHashJoin (50) + : : :- CometProject (46) + : : : +- CometBroadcastHashJoin (45) + : : : :- CometBroadcastHashJoin (30) + : : : : :- CometHashAggregate (15) + : : : : : +- CometExchange (14) + : : : : : +- CometHashAggregate (13) + : : : : : +- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (8) + : : : : +- CometBroadcastExchange (29) + : : : : +- CometHashAggregate (28) + : : : : +- CometExchange (27) + : : : : +- CometHashAggregate (26) + : : : : +- CometProject (25) + : : : : +- CometBroadcastHashJoin (24) + : : : : :- CometProject (22) + : : : : : +- CometBroadcastHashJoin (21) + : : : : : :- CometFilter (17) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (16) + : : : : : +- CometBroadcastExchange (20) + : : : : : +- CometFilter (19) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (18) + : : : : +- ReusedExchange (23) + : : : +- CometBroadcastExchange (44) + : : : +- CometHashAggregate (43) + : : : +- CometExchange (42) + : : : +- CometHashAggregate (41) + : : : +- CometProject (40) + : : : +- CometBroadcastHashJoin (39) + : : : :- CometProject (37) + : : : : +- CometBroadcastHashJoin (36) + : : : : :- CometFilter (32) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (31) + : : : : +- CometBroadcastExchange (35) + : : : : +- CometFilter (34) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (33) + : : : +- ReusedExchange (38) + : : +- CometBroadcastExchange (49) + : : +- CometHashAggregate (48) + : : +- ReusedExchange (47) + : +- ReusedExchange (51) + +- ReusedExchange (54) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Arguments: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_addr_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [d_date_sk#4, d_year#5, d_qoy#6] + +(4) CometFilter +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 1)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) + +(5) CometBroadcastExchange +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Right output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(7) CometProject +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6], [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6] + +(8) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#7, ca_county#8] +Arguments: [ca_address_sk#7, ca_county#8] + +(9) CometFilter +Input [2]: [ca_address_sk#7, ca_county#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_county#8)) + +(10) CometBroadcastExchange +Input [2]: [ca_address_sk#7, ca_county#8] +Arguments: [ca_address_sk#7, ca_county#8] + +(11) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6] +Right output [2]: [ca_address_sk#7, ca_county#8] +Arguments: [ss_addr_sk#1], [ca_address_sk#7], Inner, BuildRight + +(12) CometProject +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_address_sk#7, ca_county#8] +Arguments: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8], [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] + +(13) CometHashAggregate +Input [4]: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] +Keys [3]: [ca_county#8, d_qoy#6, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] + +(14) CometExchange +Input [4]: [ca_county#8, d_qoy#6, d_year#5, sum#9] +Arguments: hashpartitioning(ca_county#8, d_qoy#6, d_year#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [4]: [ca_county#8, d_qoy#6, d_year#5, sum#9] +Keys [3]: [ca_county#8, d_qoy#6, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] + +(16) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_addr_sk#10, ss_ext_sales_price#11, ss_sold_date_sk#12] +Arguments: [ss_addr_sk#10, ss_ext_sales_price#11, ss_sold_date_sk#12] + +(17) CometFilter +Input [3]: [ss_addr_sk#10, ss_ext_sales_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_addr_sk#10) + +(18) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Arguments: [d_date_sk#13, d_year#14, d_qoy#15] + +(19) CometFilter +Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Condition : ((((isnotnull(d_qoy#15) AND isnotnull(d_year#14)) AND (d_qoy#15 = 2)) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(20) CometBroadcastExchange +Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Arguments: [d_date_sk#13, d_year#14, d_qoy#15] + +(21) CometBroadcastHashJoin +Left output [3]: [ss_addr_sk#10, ss_ext_sales_price#11, ss_sold_date_sk#12] +Right output [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Arguments: [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(22) CometProject +Input [6]: [ss_addr_sk#10, ss_ext_sales_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14, d_qoy#15] +Arguments: [ss_addr_sk#10, ss_ext_sales_price#11, d_year#14, d_qoy#15], [ss_addr_sk#10, ss_ext_sales_price#11, d_year#14, d_qoy#15] + +(23) ReusedExchange [Reuses operator id: 10] +Output [2]: [ca_address_sk#16, ca_county#17] + +(24) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#10, ss_ext_sales_price#11, d_year#14, d_qoy#15] +Right output [2]: [ca_address_sk#16, ca_county#17] +Arguments: [ss_addr_sk#10], [ca_address_sk#16], Inner, BuildRight + +(25) CometProject +Input [6]: [ss_addr_sk#10, ss_ext_sales_price#11, d_year#14, d_qoy#15, ca_address_sk#16, ca_county#17] +Arguments: [ss_ext_sales_price#11, d_year#14, d_qoy#15, ca_county#17], [ss_ext_sales_price#11, d_year#14, d_qoy#15, ca_county#17] + +(26) CometHashAggregate +Input [4]: [ss_ext_sales_price#11, d_year#14, d_qoy#15, ca_county#17] +Keys [3]: [ca_county#17, d_qoy#15, d_year#14] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#11))] + +(27) CometExchange +Input [4]: [ca_county#17, d_qoy#15, d_year#14, sum#18] +Arguments: hashpartitioning(ca_county#17, d_qoy#15, d_year#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(28) CometHashAggregate +Input [4]: [ca_county#17, d_qoy#15, d_year#14, sum#18] +Keys [3]: [ca_county#17, d_qoy#15, d_year#14] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#11))] + +(29) CometBroadcastExchange +Input [2]: [ca_county#17, store_sales#19] +Arguments: [ca_county#17, store_sales#19] + +(30) CometBroadcastHashJoin +Left output [3]: [ca_county#8, d_year#5, store_sales#20] +Right output [2]: [ca_county#17, store_sales#19] +Arguments: [ca_county#8], [ca_county#17], Inner, BuildRight + +(31) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_addr_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Arguments: [ss_addr_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(32) CometFilter +Input [3]: [ss_addr_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Condition : isnotnull(ss_addr_sk#21) + +(33) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#24, d_year#25, d_qoy#26] +Arguments: [d_date_sk#24, d_year#25, d_qoy#26] + +(34) CometFilter +Input [3]: [d_date_sk#24, d_year#25, d_qoy#26] +Condition : ((((isnotnull(d_qoy#26) AND isnotnull(d_year#25)) AND (d_qoy#26 = 3)) AND (d_year#25 = 2000)) AND isnotnull(d_date_sk#24)) + +(35) CometBroadcastExchange +Input [3]: [d_date_sk#24, d_year#25, d_qoy#26] +Arguments: [d_date_sk#24, d_year#25, d_qoy#26] + +(36) CometBroadcastHashJoin +Left output [3]: [ss_addr_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Right output [3]: [d_date_sk#24, d_year#25, d_qoy#26] +Arguments: [ss_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight + +(37) CometProject +Input [6]: [ss_addr_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23, d_date_sk#24, d_year#25, d_qoy#26] +Arguments: [ss_addr_sk#21, ss_ext_sales_price#22, d_year#25, d_qoy#26], [ss_addr_sk#21, ss_ext_sales_price#22, d_year#25, d_qoy#26] + +(38) ReusedExchange [Reuses operator id: 10] +Output [2]: [ca_address_sk#27, ca_county#28] + +(39) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#21, ss_ext_sales_price#22, d_year#25, d_qoy#26] +Right output [2]: [ca_address_sk#27, ca_county#28] +Arguments: [ss_addr_sk#21], [ca_address_sk#27], Inner, BuildRight + +(40) CometProject +Input [6]: [ss_addr_sk#21, ss_ext_sales_price#22, d_year#25, d_qoy#26, ca_address_sk#27, ca_county#28] +Arguments: [ss_ext_sales_price#22, d_year#25, d_qoy#26, ca_county#28], [ss_ext_sales_price#22, d_year#25, d_qoy#26, ca_county#28] + +(41) CometHashAggregate +Input [4]: [ss_ext_sales_price#22, d_year#25, d_qoy#26, ca_county#28] +Keys [3]: [ca_county#28, d_qoy#26, d_year#25] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#22))] + +(42) CometExchange +Input [4]: [ca_county#28, d_qoy#26, d_year#25, sum#29] +Arguments: hashpartitioning(ca_county#28, d_qoy#26, d_year#25, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(43) CometHashAggregate +Input [4]: [ca_county#28, d_qoy#26, d_year#25, sum#29] +Keys [3]: [ca_county#28, d_qoy#26, d_year#25] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#22))] + +(44) CometBroadcastExchange +Input [2]: [ca_county#28, store_sales#30] +Arguments: [ca_county#28, store_sales#30] + +(45) CometBroadcastHashJoin +Left output [5]: [ca_county#8, d_year#5, store_sales#20, ca_county#17, store_sales#19] +Right output [2]: [ca_county#28, store_sales#30] +Arguments: [ca_county#17], [ca_county#28], Inner, BuildRight + +(46) CometProject +Input [7]: [ca_county#8, d_year#5, store_sales#20, ca_county#17, store_sales#19, ca_county#28, store_sales#30] +Arguments: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30], [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30] + +(47) ReusedExchange [Reuses operator id: 14] +Output [4]: [ca_county#31, d_qoy#32, d_year#33, sum#34] + +(48) CometHashAggregate +Input [4]: [ca_county#31, d_qoy#32, d_year#33, sum#34] +Keys [3]: [ca_county#31, d_qoy#32, d_year#33] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] + +(49) CometBroadcastExchange +Input [2]: [ca_county#31, web_sales#36] +Arguments: [ca_county#31, web_sales#36] + +(50) CometBroadcastHashJoin +Left output [5]: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30] +Right output [2]: [ca_county#31, web_sales#36] +Arguments: [ca_county#8], [ca_county#31], Inner, BuildRight + +(51) ReusedExchange [Reuses operator id: 29] +Output [2]: [ca_county#37, web_sales#38] + +(52) CometBroadcastHashJoin +Left output [7]: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#31, web_sales#36] +Right output [2]: [ca_county#37, web_sales#38] +Arguments: [ca_county#31], [ca_county#37], Inner, (CASE WHEN (web_sales#36 > 0.00) THEN (web_sales#38 / web_sales#36) END > CASE WHEN (store_sales#20 > 0.00) THEN (store_sales#19 / store_sales#20) END), BuildRight + +(53) CometProject +Input [9]: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#31, web_sales#36, ca_county#37, web_sales#38] +Arguments: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#31, web_sales#36, web_sales#38], [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#31, web_sales#36, web_sales#38] + +(54) ReusedExchange [Reuses operator id: 44] +Output [2]: [ca_county#39, web_sales#40] + +(55) CometBroadcastHashJoin +Left output [8]: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#31, web_sales#36, web_sales#38] +Right output [2]: [ca_county#39, web_sales#40] +Arguments: [ca_county#31], [ca_county#39], Inner, (CASE WHEN (web_sales#38 > 0.00) THEN (web_sales#40 / web_sales#38) END > CASE WHEN (store_sales#19 > 0.00) THEN (store_sales#30 / store_sales#19) END), BuildRight + +(56) CometProject +Input [10]: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#31, web_sales#36, web_sales#38, ca_county#39, web_sales#40] +Arguments: [ca_county#8, d_year#5, web_q1_q2_increase#41, store_q1_q2_increase#42, web_q2_q3_increase#43, store_q2_q3_increase#44], [ca_county#8, d_year#5, (web_sales#38 / web_sales#36) AS web_q1_q2_increase#41, (store_sales#19 / store_sales#20) AS store_q1_q2_increase#42, (web_sales#40 / web_sales#38) AS web_q2_q3_increase#43, (store_sales#30 / store_sales#19) AS store_q2_q3_increase#44] + +(57) CometColumnarExchange +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#41, store_q1_q2_increase#42, web_q2_q3_increase#43, store_q2_q3_increase#44] +Arguments: rangepartitioning(ca_county#8 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] + +(58) CometSort +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#41, store_q1_q2_increase#42, web_q2_q3_increase#43, store_q2_q3_increase#44] +Arguments: [ca_county#8, d_year#5, web_q1_q2_increase#41, store_q1_q2_increase#42, web_q2_q3_increase#43, store_q2_q3_increase#44], [ca_county#8 ASC NULLS FIRST] + +(59) ColumnarToRow [codegen id : 1] +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#41, store_q1_q2_increase#42, web_q2_q3_increase#43, store_q2_q3_increase#44] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_datafusion/simplified.txt new file mode 100644 index 0000000000..161acbfd53 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_datafusion/simplified.txt @@ -0,0 +1,61 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [ca_county,d_year,web_q1_q2_increase,store_q1_q2_increase,web_q2_q3_increase,store_q2_q3_increase] + CometColumnarExchange [ca_county] #1 + CometProject [web_sales,web_sales,store_sales,store_sales,web_sales,store_sales] [ca_county,d_year,web_q1_q2_increase,store_q1_q2_increase,web_q2_q3_increase,store_q2_q3_increase] + CometBroadcastHashJoin [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,web_sales,ca_county,web_sales] + CometProject [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,web_sales] + CometBroadcastHashJoin [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,ca_county,web_sales] + CometBroadcastHashJoin [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales] + CometProject [ca_county,d_year,store_sales,store_sales,store_sales] + CometBroadcastHashJoin [ca_county,d_year,store_sales,ca_county,store_sales,ca_county,store_sales] + CometBroadcastHashJoin [ca_county,d_year,store_sales,ca_county,store_sales] + CometHashAggregate [ca_county,d_year,store_sales,d_qoy,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [ca_county,d_qoy,d_year] #2 + CometHashAggregate [ca_county,d_qoy,d_year,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #3 + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [ca_address_sk,ca_county] #4 + CometFilter [ca_address_sk,ca_county] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_county] + CometBroadcastExchange [ca_county,store_sales] #5 + CometHashAggregate [ca_county,store_sales,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [ca_county,d_qoy,d_year] #6 + CometHashAggregate [ca_county,d_qoy,d_year,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #7 + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + ReusedExchange [ca_address_sk,ca_county] #4 + CometBroadcastExchange [ca_county,store_sales] #8 + CometHashAggregate [ca_county,store_sales,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [ca_county,d_qoy,d_year] #9 + CometHashAggregate [ca_county,d_qoy,d_year,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #10 + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + ReusedExchange [ca_address_sk,ca_county] #4 + CometBroadcastExchange [ca_county,web_sales] #11 + CometHashAggregate [ca_county,web_sales,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] + ReusedExchange [ca_county,d_qoy,d_year,sum] #2 + ReusedExchange [ca_county,web_sales] #5 + ReusedExchange [ca_county,web_sales] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..93e5b08a9a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_iceberg_compat/explain.txt @@ -0,0 +1,509 @@ +== Physical Plan == +* ColumnarToRow (90) ++- CometSort (89) + +- CometColumnarExchange (88) + +- CometProject (87) + +- CometBroadcastHashJoin (86) + :- CometProject (73) + : +- CometBroadcastHashJoin (72) + : :- CometBroadcastHashJoin (59) + : : :- CometProject (46) + : : : +- CometBroadcastHashJoin (45) + : : : :- CometBroadcastHashJoin (30) + : : : : :- CometHashAggregate (15) + : : : : : +- CometExchange (14) + : : : : : +- CometHashAggregate (13) + : : : : : +- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometScan parquet spark_catalog.default.customer_address (8) + : : : : +- CometBroadcastExchange (29) + : : : : +- CometHashAggregate (28) + : : : : +- CometExchange (27) + : : : : +- CometHashAggregate (26) + : : : : +- CometProject (25) + : : : : +- CometBroadcastHashJoin (24) + : : : : :- CometProject (22) + : : : : : +- CometBroadcastHashJoin (21) + : : : : : :- CometFilter (17) + : : : : : : +- CometScan parquet spark_catalog.default.store_sales (16) + : : : : : +- CometBroadcastExchange (20) + : : : : : +- CometFilter (19) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (18) + : : : : +- ReusedExchange (23) + : : : +- CometBroadcastExchange (44) + : : : +- CometHashAggregate (43) + : : : +- CometExchange (42) + : : : +- CometHashAggregate (41) + : : : +- CometProject (40) + : : : +- CometBroadcastHashJoin (39) + : : : :- CometProject (37) + : : : : +- CometBroadcastHashJoin (36) + : : : : :- CometFilter (32) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (31) + : : : : +- CometBroadcastExchange (35) + : : : : +- CometFilter (34) + : : : : +- CometScan parquet spark_catalog.default.date_dim (33) + : : : +- ReusedExchange (38) + : : +- CometBroadcastExchange (58) + : : +- CometHashAggregate (57) + : : +- CometExchange (56) + : : +- CometHashAggregate (55) + : : +- CometProject (54) + : : +- CometBroadcastHashJoin (53) + : : :- CometProject (51) + : : : +- CometBroadcastHashJoin (50) + : : : :- CometFilter (48) + : : : : +- CometScan parquet spark_catalog.default.web_sales (47) + : : : +- ReusedExchange (49) + : : +- ReusedExchange (52) + : +- CometBroadcastExchange (71) + : +- CometHashAggregate (70) + : +- CometExchange (69) + : +- CometHashAggregate (68) + : +- CometProject (67) + : +- CometBroadcastHashJoin (66) + : :- CometProject (64) + : : +- CometBroadcastHashJoin (63) + : : :- CometFilter (61) + : : : +- CometScan parquet spark_catalog.default.web_sales (60) + : : +- ReusedExchange (62) + : +- ReusedExchange (65) + +- CometBroadcastExchange (85) + +- CometHashAggregate (84) + +- CometExchange (83) + +- CometHashAggregate (82) + +- CometProject (81) + +- CometBroadcastHashJoin (80) + :- CometProject (78) + : +- CometBroadcastHashJoin (77) + : :- CometFilter (75) + : : +- CometScan parquet spark_catalog.default.web_sales (74) + : +- ReusedExchange (76) + +- ReusedExchange (79) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_addr_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 1)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) + +(5) CometBroadcastExchange +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Right output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(7) CometProject +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6], [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6] + +(8) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_county#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [ca_address_sk#7, ca_county#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_county#8)) + +(10) CometBroadcastExchange +Input [2]: [ca_address_sk#7, ca_county#8] +Arguments: [ca_address_sk#7, ca_county#8] + +(11) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6] +Right output [2]: [ca_address_sk#7, ca_county#8] +Arguments: [ss_addr_sk#1], [ca_address_sk#7], Inner, BuildRight + +(12) CometProject +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_address_sk#7, ca_county#8] +Arguments: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8], [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] + +(13) CometHashAggregate +Input [4]: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] +Keys [3]: [ca_county#8, d_qoy#6, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] + +(14) CometExchange +Input [4]: [ca_county#8, d_qoy#6, d_year#5, sum#9] +Arguments: hashpartitioning(ca_county#8, d_qoy#6, d_year#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [4]: [ca_county#8, d_qoy#6, d_year#5, sum#9] +Keys [3]: [ca_county#8, d_qoy#6, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] + +(16) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_addr_sk#10, ss_ext_sales_price#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(17) CometFilter +Input [3]: [ss_addr_sk#10, ss_ext_sales_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_addr_sk#10) + +(18) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) CometFilter +Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Condition : ((((isnotnull(d_qoy#15) AND isnotnull(d_year#14)) AND (d_qoy#15 = 2)) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(20) CometBroadcastExchange +Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Arguments: [d_date_sk#13, d_year#14, d_qoy#15] + +(21) CometBroadcastHashJoin +Left output [3]: [ss_addr_sk#10, ss_ext_sales_price#11, ss_sold_date_sk#12] +Right output [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Arguments: [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(22) CometProject +Input [6]: [ss_addr_sk#10, ss_ext_sales_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14, d_qoy#15] +Arguments: [ss_addr_sk#10, ss_ext_sales_price#11, d_year#14, d_qoy#15], [ss_addr_sk#10, ss_ext_sales_price#11, d_year#14, d_qoy#15] + +(23) ReusedExchange [Reuses operator id: 10] +Output [2]: [ca_address_sk#16, ca_county#17] + +(24) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#10, ss_ext_sales_price#11, d_year#14, d_qoy#15] +Right output [2]: [ca_address_sk#16, ca_county#17] +Arguments: [ss_addr_sk#10], [ca_address_sk#16], Inner, BuildRight + +(25) CometProject +Input [6]: [ss_addr_sk#10, ss_ext_sales_price#11, d_year#14, d_qoy#15, ca_address_sk#16, ca_county#17] +Arguments: [ss_ext_sales_price#11, d_year#14, d_qoy#15, ca_county#17], [ss_ext_sales_price#11, d_year#14, d_qoy#15, ca_county#17] + +(26) CometHashAggregate +Input [4]: [ss_ext_sales_price#11, d_year#14, d_qoy#15, ca_county#17] +Keys [3]: [ca_county#17, d_qoy#15, d_year#14] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#11))] + +(27) CometExchange +Input [4]: [ca_county#17, d_qoy#15, d_year#14, sum#18] +Arguments: hashpartitioning(ca_county#17, d_qoy#15, d_year#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(28) CometHashAggregate +Input [4]: [ca_county#17, d_qoy#15, d_year#14, sum#18] +Keys [3]: [ca_county#17, d_qoy#15, d_year#14] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#11))] + +(29) CometBroadcastExchange +Input [2]: [ca_county#17, store_sales#19] +Arguments: [ca_county#17, store_sales#19] + +(30) CometBroadcastHashJoin +Left output [3]: [ca_county#8, d_year#5, store_sales#20] +Right output [2]: [ca_county#17, store_sales#19] +Arguments: [ca_county#8], [ca_county#17], Inner, BuildRight + +(31) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_addr_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#23)] +PushedFilters: [IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(32) CometFilter +Input [3]: [ss_addr_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Condition : isnotnull(ss_addr_sk#21) + +(33) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#24, d_year#25, d_qoy#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) CometFilter +Input [3]: [d_date_sk#24, d_year#25, d_qoy#26] +Condition : ((((isnotnull(d_qoy#26) AND isnotnull(d_year#25)) AND (d_qoy#26 = 3)) AND (d_year#25 = 2000)) AND isnotnull(d_date_sk#24)) + +(35) CometBroadcastExchange +Input [3]: [d_date_sk#24, d_year#25, d_qoy#26] +Arguments: [d_date_sk#24, d_year#25, d_qoy#26] + +(36) CometBroadcastHashJoin +Left output [3]: [ss_addr_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Right output [3]: [d_date_sk#24, d_year#25, d_qoy#26] +Arguments: [ss_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight + +(37) CometProject +Input [6]: [ss_addr_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23, d_date_sk#24, d_year#25, d_qoy#26] +Arguments: [ss_addr_sk#21, ss_ext_sales_price#22, d_year#25, d_qoy#26], [ss_addr_sk#21, ss_ext_sales_price#22, d_year#25, d_qoy#26] + +(38) ReusedExchange [Reuses operator id: 10] +Output [2]: [ca_address_sk#27, ca_county#28] + +(39) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#21, ss_ext_sales_price#22, d_year#25, d_qoy#26] +Right output [2]: [ca_address_sk#27, ca_county#28] +Arguments: [ss_addr_sk#21], [ca_address_sk#27], Inner, BuildRight + +(40) CometProject +Input [6]: [ss_addr_sk#21, ss_ext_sales_price#22, d_year#25, d_qoy#26, ca_address_sk#27, ca_county#28] +Arguments: [ss_ext_sales_price#22, d_year#25, d_qoy#26, ca_county#28], [ss_ext_sales_price#22, d_year#25, d_qoy#26, ca_county#28] + +(41) CometHashAggregate +Input [4]: [ss_ext_sales_price#22, d_year#25, d_qoy#26, ca_county#28] +Keys [3]: [ca_county#28, d_qoy#26, d_year#25] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#22))] + +(42) CometExchange +Input [4]: [ca_county#28, d_qoy#26, d_year#25, sum#29] +Arguments: hashpartitioning(ca_county#28, d_qoy#26, d_year#25, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(43) CometHashAggregate +Input [4]: [ca_county#28, d_qoy#26, d_year#25, sum#29] +Keys [3]: [ca_county#28, d_qoy#26, d_year#25] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#22))] + +(44) CometBroadcastExchange +Input [2]: [ca_county#28, store_sales#30] +Arguments: [ca_county#28, store_sales#30] + +(45) CometBroadcastHashJoin +Left output [5]: [ca_county#8, d_year#5, store_sales#20, ca_county#17, store_sales#19] +Right output [2]: [ca_county#28, store_sales#30] +Arguments: [ca_county#17], [ca_county#28], Inner, BuildRight + +(46) CometProject +Input [7]: [ca_county#8, d_year#5, store_sales#20, ca_county#17, store_sales#19, ca_county#28, store_sales#30] +Arguments: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30], [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30] + +(47) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#33)] +PushedFilters: [IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(48) CometFilter +Input [3]: [ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Condition : isnotnull(ws_bill_addr_sk#31) + +(49) ReusedExchange [Reuses operator id: 5] +Output [3]: [d_date_sk#34, d_year#35, d_qoy#36] + +(50) CometBroadcastHashJoin +Left output [3]: [ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Right output [3]: [d_date_sk#34, d_year#35, d_qoy#36] +Arguments: [ws_sold_date_sk#33], [d_date_sk#34], Inner, BuildRight + +(51) CometProject +Input [6]: [ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33, d_date_sk#34, d_year#35, d_qoy#36] +Arguments: [ws_bill_addr_sk#31, ws_ext_sales_price#32, d_year#35, d_qoy#36], [ws_bill_addr_sk#31, ws_ext_sales_price#32, d_year#35, d_qoy#36] + +(52) ReusedExchange [Reuses operator id: 10] +Output [2]: [ca_address_sk#37, ca_county#38] + +(53) CometBroadcastHashJoin +Left output [4]: [ws_bill_addr_sk#31, ws_ext_sales_price#32, d_year#35, d_qoy#36] +Right output [2]: [ca_address_sk#37, ca_county#38] +Arguments: [ws_bill_addr_sk#31], [ca_address_sk#37], Inner, BuildRight + +(54) CometProject +Input [6]: [ws_bill_addr_sk#31, ws_ext_sales_price#32, d_year#35, d_qoy#36, ca_address_sk#37, ca_county#38] +Arguments: [ws_ext_sales_price#32, d_year#35, d_qoy#36, ca_county#38], [ws_ext_sales_price#32, d_year#35, d_qoy#36, ca_county#38] + +(55) CometHashAggregate +Input [4]: [ws_ext_sales_price#32, d_year#35, d_qoy#36, ca_county#38] +Keys [3]: [ca_county#38, d_qoy#36, d_year#35] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#32))] + +(56) CometExchange +Input [4]: [ca_county#38, d_qoy#36, d_year#35, sum#39] +Arguments: hashpartitioning(ca_county#38, d_qoy#36, d_year#35, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(57) CometHashAggregate +Input [4]: [ca_county#38, d_qoy#36, d_year#35, sum#39] +Keys [3]: [ca_county#38, d_qoy#36, d_year#35] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#32))] + +(58) CometBroadcastExchange +Input [2]: [ca_county#38, web_sales#40] +Arguments: [ca_county#38, web_sales#40] + +(59) CometBroadcastHashJoin +Left output [5]: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30] +Right output [2]: [ca_county#38, web_sales#40] +Arguments: [ca_county#8], [ca_county#38], Inner, BuildRight + +(60) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, ws_sold_date_sk#43] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#43)] +PushedFilters: [IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(61) CometFilter +Input [3]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, ws_sold_date_sk#43] +Condition : isnotnull(ws_bill_addr_sk#41) + +(62) ReusedExchange [Reuses operator id: 20] +Output [3]: [d_date_sk#44, d_year#45, d_qoy#46] + +(63) CometBroadcastHashJoin +Left output [3]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, ws_sold_date_sk#43] +Right output [3]: [d_date_sk#44, d_year#45, d_qoy#46] +Arguments: [ws_sold_date_sk#43], [d_date_sk#44], Inner, BuildRight + +(64) CometProject +Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, ws_sold_date_sk#43, d_date_sk#44, d_year#45, d_qoy#46] +Arguments: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#45, d_qoy#46], [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#45, d_qoy#46] + +(65) ReusedExchange [Reuses operator id: 10] +Output [2]: [ca_address_sk#47, ca_county#48] + +(66) CometBroadcastHashJoin +Left output [4]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#45, d_qoy#46] +Right output [2]: [ca_address_sk#47, ca_county#48] +Arguments: [ws_bill_addr_sk#41], [ca_address_sk#47], Inner, BuildRight + +(67) CometProject +Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#45, d_qoy#46, ca_address_sk#47, ca_county#48] +Arguments: [ws_ext_sales_price#42, d_year#45, d_qoy#46, ca_county#48], [ws_ext_sales_price#42, d_year#45, d_qoy#46, ca_county#48] + +(68) CometHashAggregate +Input [4]: [ws_ext_sales_price#42, d_year#45, d_qoy#46, ca_county#48] +Keys [3]: [ca_county#48, d_qoy#46, d_year#45] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#42))] + +(69) CometExchange +Input [4]: [ca_county#48, d_qoy#46, d_year#45, sum#49] +Arguments: hashpartitioning(ca_county#48, d_qoy#46, d_year#45, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(70) CometHashAggregate +Input [4]: [ca_county#48, d_qoy#46, d_year#45, sum#49] +Keys [3]: [ca_county#48, d_qoy#46, d_year#45] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#42))] + +(71) CometBroadcastExchange +Input [2]: [ca_county#48, web_sales#50] +Arguments: [ca_county#48, web_sales#50] + +(72) CometBroadcastHashJoin +Left output [7]: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#38, web_sales#40] +Right output [2]: [ca_county#48, web_sales#50] +Arguments: [ca_county#38], [ca_county#48], Inner, (CASE WHEN (web_sales#40 > 0.00) THEN (web_sales#50 / web_sales#40) END > CASE WHEN (store_sales#20 > 0.00) THEN (store_sales#19 / store_sales#20) END), BuildRight + +(73) CometProject +Input [9]: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#38, web_sales#40, ca_county#48, web_sales#50] +Arguments: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#38, web_sales#40, web_sales#50], [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#38, web_sales#40, web_sales#50] + +(74) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_addr_sk#51, ws_ext_sales_price#52, ws_sold_date_sk#53] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#53)] +PushedFilters: [IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(75) CometFilter +Input [3]: [ws_bill_addr_sk#51, ws_ext_sales_price#52, ws_sold_date_sk#53] +Condition : isnotnull(ws_bill_addr_sk#51) + +(76) ReusedExchange [Reuses operator id: 35] +Output [3]: [d_date_sk#54, d_year#55, d_qoy#56] + +(77) CometBroadcastHashJoin +Left output [3]: [ws_bill_addr_sk#51, ws_ext_sales_price#52, ws_sold_date_sk#53] +Right output [3]: [d_date_sk#54, d_year#55, d_qoy#56] +Arguments: [ws_sold_date_sk#53], [d_date_sk#54], Inner, BuildRight + +(78) CometProject +Input [6]: [ws_bill_addr_sk#51, ws_ext_sales_price#52, ws_sold_date_sk#53, d_date_sk#54, d_year#55, d_qoy#56] +Arguments: [ws_bill_addr_sk#51, ws_ext_sales_price#52, d_year#55, d_qoy#56], [ws_bill_addr_sk#51, ws_ext_sales_price#52, d_year#55, d_qoy#56] + +(79) ReusedExchange [Reuses operator id: 10] +Output [2]: [ca_address_sk#57, ca_county#58] + +(80) CometBroadcastHashJoin +Left output [4]: [ws_bill_addr_sk#51, ws_ext_sales_price#52, d_year#55, d_qoy#56] +Right output [2]: [ca_address_sk#57, ca_county#58] +Arguments: [ws_bill_addr_sk#51], [ca_address_sk#57], Inner, BuildRight + +(81) CometProject +Input [6]: [ws_bill_addr_sk#51, ws_ext_sales_price#52, d_year#55, d_qoy#56, ca_address_sk#57, ca_county#58] +Arguments: [ws_ext_sales_price#52, d_year#55, d_qoy#56, ca_county#58], [ws_ext_sales_price#52, d_year#55, d_qoy#56, ca_county#58] + +(82) CometHashAggregate +Input [4]: [ws_ext_sales_price#52, d_year#55, d_qoy#56, ca_county#58] +Keys [3]: [ca_county#58, d_qoy#56, d_year#55] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#52))] + +(83) CometExchange +Input [4]: [ca_county#58, d_qoy#56, d_year#55, sum#59] +Arguments: hashpartitioning(ca_county#58, d_qoy#56, d_year#55, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(84) CometHashAggregate +Input [4]: [ca_county#58, d_qoy#56, d_year#55, sum#59] +Keys [3]: [ca_county#58, d_qoy#56, d_year#55] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#52))] + +(85) CometBroadcastExchange +Input [2]: [ca_county#58, web_sales#60] +Arguments: [ca_county#58, web_sales#60] + +(86) CometBroadcastHashJoin +Left output [8]: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#38, web_sales#40, web_sales#50] +Right output [2]: [ca_county#58, web_sales#60] +Arguments: [ca_county#38], [ca_county#58], Inner, (CASE WHEN (web_sales#50 > 0.00) THEN (web_sales#60 / web_sales#50) END > CASE WHEN (store_sales#19 > 0.00) THEN (store_sales#30 / store_sales#19) END), BuildRight + +(87) CometProject +Input [10]: [ca_county#8, d_year#5, store_sales#20, store_sales#19, store_sales#30, ca_county#38, web_sales#40, web_sales#50, ca_county#58, web_sales#60] +Arguments: [ca_county#8, d_year#5, web_q1_q2_increase#61, store_q1_q2_increase#62, web_q2_q3_increase#63, store_q2_q3_increase#64], [ca_county#8, d_year#5, (web_sales#50 / web_sales#40) AS web_q1_q2_increase#61, (store_sales#19 / store_sales#20) AS store_q1_q2_increase#62, (web_sales#60 / web_sales#50) AS web_q2_q3_increase#63, (store_sales#30 / store_sales#19) AS store_q2_q3_increase#64] + +(88) CometColumnarExchange +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#61, store_q1_q2_increase#62, web_q2_q3_increase#63, store_q2_q3_increase#64] +Arguments: rangepartitioning(ca_county#8 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=7] + +(89) CometSort +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#61, store_q1_q2_increase#62, web_q2_q3_increase#63, store_q2_q3_increase#64] +Arguments: [ca_county#8, d_year#5, web_q1_q2_increase#61, store_q1_q2_increase#62, web_q2_q3_increase#63, store_q2_q3_increase#64], [ca_county#8 ASC NULLS FIRST] + +(90) ColumnarToRow [codegen id : 1] +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#61, store_q1_q2_increase#62, web_q2_q3_increase#63, store_q2_q3_increase#64] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..0fd6e74342 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q31.native_iceberg_compat/simplified.txt @@ -0,0 +1,92 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [ca_county,d_year,web_q1_q2_increase,store_q1_q2_increase,web_q2_q3_increase,store_q2_q3_increase] + CometColumnarExchange [ca_county] #1 + CometProject [web_sales,web_sales,store_sales,store_sales,web_sales,store_sales] [ca_county,d_year,web_q1_q2_increase,store_q1_q2_increase,web_q2_q3_increase,store_q2_q3_increase] + CometBroadcastHashJoin [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,web_sales,ca_county,web_sales] + CometProject [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,web_sales] + CometBroadcastHashJoin [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,ca_county,web_sales] + CometBroadcastHashJoin [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales] + CometProject [ca_county,d_year,store_sales,store_sales,store_sales] + CometBroadcastHashJoin [ca_county,d_year,store_sales,ca_county,store_sales,ca_county,store_sales] + CometBroadcastHashJoin [ca_county,d_year,store_sales,ca_county,store_sales] + CometHashAggregate [ca_county,d_year,store_sales,d_qoy,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [ca_county,d_qoy,d_year] #2 + CometHashAggregate [ca_county,d_qoy,d_year,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #3 + CometFilter [d_date_sk,d_year,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [ca_address_sk,ca_county] #4 + CometFilter [ca_address_sk,ca_county] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county] + CometBroadcastExchange [ca_county,store_sales] #5 + CometHashAggregate [ca_county,store_sales,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [ca_county,d_qoy,d_year] #6 + CometHashAggregate [ca_county,d_qoy,d_year,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #7 + CometFilter [d_date_sk,d_year,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + ReusedExchange [ca_address_sk,ca_county] #4 + CometBroadcastExchange [ca_county,store_sales] #8 + CometHashAggregate [ca_county,store_sales,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [ca_county,d_qoy,d_year] #9 + CometHashAggregate [ca_county,d_qoy,d_year,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #10 + CometFilter [d_date_sk,d_year,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + ReusedExchange [ca_address_sk,ca_county] #4 + CometBroadcastExchange [ca_county,web_sales] #11 + CometHashAggregate [ca_county,web_sales,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [ca_county,d_qoy,d_year] #12 + CometHashAggregate [ca_county,d_qoy,d_year,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [d_date_sk,d_year,d_qoy] #3 + ReusedExchange [ca_address_sk,ca_county] #4 + CometBroadcastExchange [ca_county,web_sales] #13 + CometHashAggregate [ca_county,web_sales,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [ca_county,d_qoy,d_year] #14 + CometHashAggregate [ca_county,d_qoy,d_year,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [d_date_sk,d_year,d_qoy] #7 + ReusedExchange [ca_address_sk,ca_county] #4 + CometBroadcastExchange [ca_county,web_sales] #15 + CometHashAggregate [ca_county,web_sales,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [ca_county,d_qoy,d_year] #16 + CometHashAggregate [ca_county,d_qoy,d_year,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [d_date_sk,d_year,d_qoy] #10 + ReusedExchange [ca_address_sk,ca_county] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_datafusion/explain.txt new file mode 100644 index 0000000000..b08d289beb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_datafusion/explain.txt @@ -0,0 +1,159 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometHashAggregate (29) + +- CometExchange (28) + +- CometHashAggregate (27) + +- CometProject (26) + +- CometBroadcastHashJoin (25) + :- CometProject (23) + : +- CometBroadcastHashJoin (22) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : +- CometBroadcastExchange (21) + : +- CometFilter (20) + : +- CometHashAggregate (19) + : +- CometExchange (18) + : +- CometHashAggregate (17) + : +- CometProject (16) + : +- CometBroadcastHashJoin (15) + : :- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (9) + : +- CometBroadcastExchange (14) + : +- CometProject (13) + : +- CometFilter (12) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (11) + +- ReusedExchange (24) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Arguments: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] + +(2) CometFilter +Input [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Condition : (isnotnull(cs_item_sk#1) AND isnotnull(cs_ext_discount_amt#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#4, i_manufact_id#5] +Arguments: [i_item_sk#4, i_manufact_id#5] + +(4) CometFilter +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 977)) AND isnotnull(i_item_sk#4)) + +(5) CometProject +Input [2]: [i_item_sk#4, i_manufact_id#5] +Arguments: [i_item_sk#4], [i_item_sk#4] + +(6) CometBroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: [i_item_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Right output [1]: [i_item_sk#4] +Arguments: [cs_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] +Arguments: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4], [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Arguments: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] + +(10) CometFilter +Input [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Condition : isnotnull(cs_item_sk#6) + +(11) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9, d_date#10] + +(12) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-01-27)) AND (d_date#10 <= 2000-04-26)) AND isnotnull(d_date_sk#9)) + +(13) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(14) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(15) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [cs_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(16) CometProject +Input [4]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8, d_date_sk#9] +Arguments: [cs_item_sk#6, cs_ext_discount_amt#7], [cs_item_sk#6, cs_ext_discount_amt#7] + +(17) CometHashAggregate +Input [2]: [cs_item_sk#6, cs_ext_discount_amt#7] +Keys [1]: [cs_item_sk#6] +Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#7))] + +(18) CometExchange +Input [3]: [cs_item_sk#6, sum#11, count#12] +Arguments: hashpartitioning(cs_item_sk#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(19) CometHashAggregate +Input [3]: [cs_item_sk#6, sum#11, count#12] +Keys [1]: [cs_item_sk#6] +Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#7))] + +(20) CometFilter +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#6] +Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#13) + +(21) CometBroadcastExchange +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#6] +Arguments: [(1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#6] + +(22) CometBroadcastHashJoin +Left output [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] +Right output [2]: [(1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#6] +Arguments: [i_item_sk#4], [cs_item_sk#6], Inner, (cast(cs_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#13), BuildRight + +(23) CometProject +Input [5]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4, (1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#6] +Arguments: [cs_ext_discount_amt#2, cs_sold_date_sk#3], [cs_ext_discount_amt#2, cs_sold_date_sk#3] + +(24) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#14] + +(25) CometBroadcastHashJoin +Left output [2]: [cs_ext_discount_amt#2, cs_sold_date_sk#3] +Right output [1]: [d_date_sk#14] +Arguments: [cs_sold_date_sk#3], [d_date_sk#14], Inner, BuildRight + +(26) CometProject +Input [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, d_date_sk#14] +Arguments: [cs_ext_discount_amt#2], [cs_ext_discount_amt#2] + +(27) CometHashAggregate +Input [1]: [cs_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_discount_amt#2))] + +(28) CometExchange +Input [1]: [sum#15] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [1]: [sum#15] +Keys: [] +Functions [1]: [sum(UnscaledValue(cs_ext_discount_amt#2))] + +(30) ColumnarToRow [codegen id : 1] +Input [1]: [excess discount amount#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f7abc0b20d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_datafusion/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [excess discount amount,sum,sum(UnscaledValue(cs_ext_discount_amt))] + CometExchange #1 + CometHashAggregate [sum,cs_ext_discount_amt] + CometProject [cs_ext_discount_amt] + CometBroadcastHashJoin [cs_ext_discount_amt,cs_sold_date_sk,d_date_sk] + CometProject [cs_ext_discount_amt,cs_sold_date_sk] + CometBroadcastHashJoin [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk,(1.3 * avg(cs_ext_discount_amt)),cs_item_sk] + CometProject [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + CometBroadcastHashJoin [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + CometFilter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk] #2 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_manufact_id] + CometBroadcastExchange [(1.3 * avg(cs_ext_discount_amt)),cs_item_sk] #3 + CometFilter [(1.3 * avg(cs_ext_discount_amt)),cs_item_sk] + CometHashAggregate [(1.3 * avg(cs_ext_discount_amt)),cs_item_sk,sum,count,avg(UnscaledValue(cs_ext_discount_amt))] + CometExchange [cs_item_sk] #4 + CometHashAggregate [cs_item_sk,sum,count,cs_ext_discount_amt] + CometProject [cs_item_sk,cs_ext_discount_amt] + CometBroadcastHashJoin [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk,d_date_sk] + CometFilter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f7180c4188 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_iceberg_compat/explain.txt @@ -0,0 +1,173 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometHashAggregate (29) + +- CometExchange (28) + +- CometHashAggregate (27) + +- CometProject (26) + +- CometBroadcastHashJoin (25) + :- CometProject (23) + : +- CometBroadcastHashJoin (22) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.item (3) + : +- CometBroadcastExchange (21) + : +- CometFilter (20) + : +- CometHashAggregate (19) + : +- CometExchange (18) + : +- CometHashAggregate (17) + : +- CometProject (16) + : +- CometBroadcastHashJoin (15) + : :- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.catalog_sales (9) + : +- CometBroadcastExchange (14) + : +- CometProject (13) + : +- CometFilter (12) + : +- CometScan parquet spark_catalog.default.date_dim (11) + +- ReusedExchange (24) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Condition : (isnotnull(cs_item_sk#1) AND isnotnull(cs_ext_discount_amt#2)) + +(3) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 977)) AND isnotnull(i_item_sk#4)) + +(5) CometProject +Input [2]: [i_item_sk#4, i_manufact_id#5] +Arguments: [i_item_sk#4], [i_item_sk#4] + +(6) CometBroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: [i_item_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Right output [1]: [i_item_sk#4] +Arguments: [cs_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] +Arguments: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4], [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] + +(9) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Condition : isnotnull(cs_item_sk#6) + +(11) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-01-27)) AND (d_date#10 <= 2000-04-26)) AND isnotnull(d_date_sk#9)) + +(13) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(14) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(15) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [cs_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(16) CometProject +Input [4]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8, d_date_sk#9] +Arguments: [cs_item_sk#6, cs_ext_discount_amt#7], [cs_item_sk#6, cs_ext_discount_amt#7] + +(17) CometHashAggregate +Input [2]: [cs_item_sk#6, cs_ext_discount_amt#7] +Keys [1]: [cs_item_sk#6] +Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#7))] + +(18) CometExchange +Input [3]: [cs_item_sk#6, sum#11, count#12] +Arguments: hashpartitioning(cs_item_sk#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(19) CometHashAggregate +Input [3]: [cs_item_sk#6, sum#11, count#12] +Keys [1]: [cs_item_sk#6] +Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#7))] + +(20) CometFilter +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#6] +Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#13) + +(21) CometBroadcastExchange +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#6] +Arguments: [(1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#6] + +(22) CometBroadcastHashJoin +Left output [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] +Right output [2]: [(1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#6] +Arguments: [i_item_sk#4], [cs_item_sk#6], Inner, (cast(cs_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#13), BuildRight + +(23) CometProject +Input [5]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4, (1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#6] +Arguments: [cs_ext_discount_amt#2, cs_sold_date_sk#3], [cs_ext_discount_amt#2, cs_sold_date_sk#3] + +(24) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#14] + +(25) CometBroadcastHashJoin +Left output [2]: [cs_ext_discount_amt#2, cs_sold_date_sk#3] +Right output [1]: [d_date_sk#14] +Arguments: [cs_sold_date_sk#3], [d_date_sk#14], Inner, BuildRight + +(26) CometProject +Input [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, d_date_sk#14] +Arguments: [cs_ext_discount_amt#2], [cs_ext_discount_amt#2] + +(27) CometHashAggregate +Input [1]: [cs_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_discount_amt#2))] + +(28) CometExchange +Input [1]: [sum#15] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [1]: [sum#15] +Keys: [] +Functions [1]: [sum(UnscaledValue(cs_ext_discount_amt#2))] + +(30) ColumnarToRow [codegen id : 1] +Input [1]: [excess discount amount#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..566befe357 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q32.native_iceberg_compat/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [excess discount amount,sum,sum(UnscaledValue(cs_ext_discount_amt))] + CometExchange #1 + CometHashAggregate [sum,cs_ext_discount_amt] + CometProject [cs_ext_discount_amt] + CometBroadcastHashJoin [cs_ext_discount_amt,cs_sold_date_sk,d_date_sk] + CometProject [cs_ext_discount_amt,cs_sold_date_sk] + CometBroadcastHashJoin [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk,(1.3 * avg(cs_ext_discount_amt)),cs_item_sk] + CometProject [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + CometBroadcastHashJoin [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + CometFilter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk] #2 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_manufact_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_manufact_id] + CometBroadcastExchange [(1.3 * avg(cs_ext_discount_amt)),cs_item_sk] #3 + CometFilter [(1.3 * avg(cs_ext_discount_amt)),cs_item_sk] + CometHashAggregate [(1.3 * avg(cs_ext_discount_amt)),cs_item_sk,sum,count,avg(UnscaledValue(cs_ext_discount_amt))] + CometExchange [cs_item_sk] #4 + CometHashAggregate [cs_item_sk,sum,count,cs_ext_discount_amt] + CometProject [cs_item_sk,cs_ext_discount_amt] + CometBroadcastHashJoin [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk,d_date_sk] + CometFilter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_datafusion/explain.txt new file mode 100644 index 0000000000..2f4d022c1c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_datafusion/explain.txt @@ -0,0 +1,258 @@ +== Physical Plan == +* ColumnarToRow (49) ++- CometTakeOrderedAndProject (48) + +- CometHashAggregate (47) + +- CometExchange (46) + +- CometHashAggregate (45) + +- CometUnion (44) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (9) + : +- CometBroadcastExchange (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (16) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (15) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (17) + :- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometProject (38) + : +- CometBroadcastHashJoin (37) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometProject (32) + : : : +- CometBroadcastHashJoin (31) + : : : :- CometFilter (29) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (28) + : : : +- ReusedExchange (30) + : : +- ReusedExchange (33) + : +- ReusedExchange (36) + +- CometHashAggregate (43) + +- ReusedExchange (42) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5, d_year#6, d_moy#7] + +(4) CometFilter +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 5)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3], [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8, ca_gmt_offset#9] + +(10) CometFilter +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(11) CometProject +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8], [ca_address_sk#8] + +(12) CometBroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: [ca_address_sk#8] + +(13) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Right output [1]: [ca_address_sk#8] +Arguments: [ss_addr_sk#2], [ca_address_sk#8], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] +Arguments: [ss_item_sk#1, ss_ext_sales_price#3], [ss_item_sk#1, ss_ext_sales_price#3] + +(15) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#10, i_manufact_id#11] +Arguments: [i_item_sk#10, i_manufact_id#11] + +(16) CometFilter +Input [2]: [i_item_sk#10, i_manufact_id#11] +Condition : isnotnull(i_item_sk#10) + +(17) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_category#12, i_manufact_id#13] +Arguments: [i_category#12, i_manufact_id#13] + +(18) CometFilter +Input [2]: [i_category#12, i_manufact_id#13] +Condition : (isnotnull(i_category#12) AND (i_category#12 = Electronics )) + +(19) CometProject +Input [2]: [i_category#12, i_manufact_id#13] +Arguments: [i_manufact_id#13], [i_manufact_id#13] + +(20) CometBroadcastExchange +Input [1]: [i_manufact_id#13] +Arguments: [i_manufact_id#13] + +(21) CometBroadcastHashJoin +Left output [2]: [i_item_sk#10, i_manufact_id#11] +Right output [1]: [i_manufact_id#13] +Arguments: [i_manufact_id#11], [i_manufact_id#13], LeftSemi, BuildRight + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#10, i_manufact_id#11] +Arguments: [i_item_sk#10, i_manufact_id#11] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Right output [2]: [i_item_sk#10, i_manufact_id#11] +Arguments: [ss_item_sk#1], [i_item_sk#10], Inner, BuildRight + +(24) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_manufact_id#11] +Arguments: [ss_ext_sales_price#3, i_manufact_id#11], [ss_ext_sales_price#3, i_manufact_id#11] + +(25) CometHashAggregate +Input [2]: [ss_ext_sales_price#3, i_manufact_id#11] +Keys [1]: [i_manufact_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] + +(26) CometExchange +Input [2]: [i_manufact_id#11, sum#14] +Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [i_manufact_id#11, sum#14] +Keys [1]: [i_manufact_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] + +(28) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Arguments: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] + +(29) CometFilter +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_bill_addr_sk#15) AND isnotnull(cs_item_sk#16)) + +(30) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#19] + +(31) CometBroadcastHashJoin +Left output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [cs_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(32) CometProject +Input [5]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Arguments: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17], [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] + +(33) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#20] + +(34) CometBroadcastHashJoin +Left output [3]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] +Right output [1]: [ca_address_sk#20] +Arguments: [cs_bill_addr_sk#15], [ca_address_sk#20], Inner, BuildRight + +(35) CometProject +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, ca_address_sk#20] +Arguments: [cs_item_sk#16, cs_ext_sales_price#17], [cs_item_sk#16, cs_ext_sales_price#17] + +(36) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#21, i_manufact_id#22] + +(37) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#16, cs_ext_sales_price#17] +Right output [2]: [i_item_sk#21, i_manufact_id#22] +Arguments: [cs_item_sk#16], [i_item_sk#21], Inner, BuildRight + +(38) CometProject +Input [4]: [cs_item_sk#16, cs_ext_sales_price#17, i_item_sk#21, i_manufact_id#22] +Arguments: [cs_ext_sales_price#17, i_manufact_id#22], [cs_ext_sales_price#17, i_manufact_id#22] + +(39) CometHashAggregate +Input [2]: [cs_ext_sales_price#17, i_manufact_id#22] +Keys [1]: [i_manufact_id#22] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#17))] + +(40) CometExchange +Input [2]: [i_manufact_id#22, sum#23] +Arguments: hashpartitioning(i_manufact_id#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(41) CometHashAggregate +Input [2]: [i_manufact_id#22, sum#23] +Keys [1]: [i_manufact_id#22] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#17))] + +(42) ReusedExchange [Reuses operator id: 26] +Output [2]: [i_manufact_id#24, sum#25] + +(43) CometHashAggregate +Input [2]: [i_manufact_id#24, sum#25] +Keys [1]: [i_manufact_id#24] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#26))] + +(44) CometUnion +Child 0 Input [2]: [i_manufact_id#11, total_sales#27] +Child 1 Input [2]: [i_manufact_id#22, total_sales#28] +Child 2 Input [2]: [i_manufact_id#24, total_sales#29] + +(45) CometHashAggregate +Input [2]: [i_manufact_id#11, total_sales#27] +Keys [1]: [i_manufact_id#11] +Functions [1]: [partial_sum(total_sales#27)] + +(46) CometExchange +Input [3]: [i_manufact_id#11, sum#30, isEmpty#31] +Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(47) CometHashAggregate +Input [3]: [i_manufact_id#11, sum#30, isEmpty#31] +Keys [1]: [i_manufact_id#11] +Functions [1]: [sum(total_sales#27)] + +(48) CometTakeOrderedAndProject +Input [2]: [i_manufact_id#11, total_sales#32] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[total_sales#32 ASC NULLS FIRST], output=[i_manufact_id#11,total_sales#32]), [i_manufact_id#11, total_sales#32], 100, [total_sales#32 ASC NULLS FIRST], [i_manufact_id#11, total_sales#32] + +(49) ColumnarToRow [codegen id : 1] +Input [2]: [i_manufact_id#11, total_sales#32] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_datafusion/simplified.txt new file mode 100644 index 0000000000..dfa31808a4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_datafusion/simplified.txt @@ -0,0 +1,51 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_manufact_id,total_sales] + CometHashAggregate [i_manufact_id,total_sales,sum,isEmpty,sum(total_sales)] + CometExchange [i_manufact_id] #1 + CometHashAggregate [i_manufact_id,sum,isEmpty,total_sales] + CometUnion [i_manufact_id,total_sales] + CometHashAggregate [i_manufact_id,total_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_manufact_id] #2 + CometHashAggregate [i_manufact_id,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_manufact_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_manufact_id] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ca_address_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ca_address_sk] #4 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [i_item_sk,i_manufact_id] #5 + CometBroadcastHashJoin [i_item_sk,i_manufact_id,i_manufact_id] + CometFilter [i_item_sk,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_manufact_id] + CometBroadcastExchange [i_manufact_id] #6 + CometProject [i_manufact_id] + CometFilter [i_category,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_category,i_manufact_id] + CometHashAggregate [i_manufact_id,total_sales,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_manufact_id] #7 + CometHashAggregate [i_manufact_id,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,i_item_sk,i_manufact_id] + CometProject [cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,ca_address_sk] + CometProject [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_manufact_id] #5 + CometHashAggregate [i_manufact_id,total_sales,sum,sum(UnscaledValue(ws_ext_sales_price))] + ReusedExchange [i_manufact_id,sum] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..4f2911e868 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_iceberg_compat/explain.txt @@ -0,0 +1,344 @@ +== Physical Plan == +* ColumnarToRow (61) ++- CometTakeOrderedAndProject (60) + +- CometHashAggregate (59) + +- CometExchange (58) + +- CometHashAggregate (57) + +- CometUnion (56) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.customer_address (9) + : +- CometBroadcastExchange (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (16) + : : +- CometScan parquet spark_catalog.default.item (15) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometScan parquet spark_catalog.default.item (17) + :- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometProject (38) + : +- CometBroadcastHashJoin (37) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometProject (32) + : : : +- CometBroadcastHashJoin (31) + : : : :- CometFilter (29) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (28) + : : : +- ReusedExchange (30) + : : +- ReusedExchange (33) + : +- ReusedExchange (36) + +- CometHashAggregate (55) + +- CometExchange (54) + +- CometHashAggregate (53) + +- CometProject (52) + +- CometBroadcastHashJoin (51) + :- CometProject (49) + : +- CometBroadcastHashJoin (48) + : :- CometProject (46) + : : +- CometBroadcastHashJoin (45) + : : :- CometFilter (43) + : : : +- CometScan parquet spark_catalog.default.web_sales (42) + : : +- ReusedExchange (44) + : +- ReusedExchange (47) + +- ReusedExchange (50) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 5)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3], [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(9) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(11) CometProject +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8], [ca_address_sk#8] + +(12) CometBroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: [ca_address_sk#8] + +(13) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Right output [1]: [ca_address_sk#8] +Arguments: [ss_addr_sk#2], [ca_address_sk#8], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] +Arguments: [ss_item_sk#1, ss_ext_sales_price#3], [ss_item_sk#1, ss_ext_sales_price#3] + +(15) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(16) CometFilter +Input [2]: [i_item_sk#10, i_manufact_id#11] +Condition : isnotnull(i_item_sk#10) + +(17) CometScan parquet spark_catalog.default.item +Output [2]: [i_category#12, i_manufact_id#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics )] +ReadSchema: struct + +(18) CometFilter +Input [2]: [i_category#12, i_manufact_id#13] +Condition : (isnotnull(i_category#12) AND (i_category#12 = Electronics )) + +(19) CometProject +Input [2]: [i_category#12, i_manufact_id#13] +Arguments: [i_manufact_id#13], [i_manufact_id#13] + +(20) CometBroadcastExchange +Input [1]: [i_manufact_id#13] +Arguments: [i_manufact_id#13] + +(21) CometBroadcastHashJoin +Left output [2]: [i_item_sk#10, i_manufact_id#11] +Right output [1]: [i_manufact_id#13] +Arguments: [i_manufact_id#11], [i_manufact_id#13], LeftSemi, BuildRight + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#10, i_manufact_id#11] +Arguments: [i_item_sk#10, i_manufact_id#11] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Right output [2]: [i_item_sk#10, i_manufact_id#11] +Arguments: [ss_item_sk#1], [i_item_sk#10], Inner, BuildRight + +(24) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_manufact_id#11] +Arguments: [ss_ext_sales_price#3, i_manufact_id#11], [ss_ext_sales_price#3, i_manufact_id#11] + +(25) CometHashAggregate +Input [2]: [ss_ext_sales_price#3, i_manufact_id#11] +Keys [1]: [i_manufact_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] + +(26) CometExchange +Input [2]: [i_manufact_id#11, sum#14] +Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [i_manufact_id#11, sum#14] +Keys [1]: [i_manufact_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] + +(28) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#18)] +PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(29) CometFilter +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_bill_addr_sk#15) AND isnotnull(cs_item_sk#16)) + +(30) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#19] + +(31) CometBroadcastHashJoin +Left output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [cs_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(32) CometProject +Input [5]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Arguments: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17], [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] + +(33) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#20] + +(34) CometBroadcastHashJoin +Left output [3]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] +Right output [1]: [ca_address_sk#20] +Arguments: [cs_bill_addr_sk#15], [ca_address_sk#20], Inner, BuildRight + +(35) CometProject +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, ca_address_sk#20] +Arguments: [cs_item_sk#16, cs_ext_sales_price#17], [cs_item_sk#16, cs_ext_sales_price#17] + +(36) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#21, i_manufact_id#22] + +(37) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#16, cs_ext_sales_price#17] +Right output [2]: [i_item_sk#21, i_manufact_id#22] +Arguments: [cs_item_sk#16], [i_item_sk#21], Inner, BuildRight + +(38) CometProject +Input [4]: [cs_item_sk#16, cs_ext_sales_price#17, i_item_sk#21, i_manufact_id#22] +Arguments: [cs_ext_sales_price#17, i_manufact_id#22], [cs_ext_sales_price#17, i_manufact_id#22] + +(39) CometHashAggregate +Input [2]: [cs_ext_sales_price#17, i_manufact_id#22] +Keys [1]: [i_manufact_id#22] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#17))] + +(40) CometExchange +Input [2]: [i_manufact_id#22, sum#23] +Arguments: hashpartitioning(i_manufact_id#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(41) CometHashAggregate +Input [2]: [i_manufact_id#22, sum#23] +Keys [1]: [i_manufact_id#22] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#17))] + +(42) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#27)] +PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(43) CometFilter +Input [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27] +Condition : (isnotnull(ws_bill_addr_sk#25) AND isnotnull(ws_item_sk#24)) + +(44) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#28] + +(45) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27] +Right output [1]: [d_date_sk#28] +Arguments: [ws_sold_date_sk#27], [d_date_sk#28], Inner, BuildRight + +(46) CometProject +Input [5]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27, d_date_sk#28] +Arguments: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26], [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26] + +(47) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#29] + +(48) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26] +Right output [1]: [ca_address_sk#29] +Arguments: [ws_bill_addr_sk#25], [ca_address_sk#29], Inner, BuildRight + +(49) CometProject +Input [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ca_address_sk#29] +Arguments: [ws_item_sk#24, ws_ext_sales_price#26], [ws_item_sk#24, ws_ext_sales_price#26] + +(50) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#30, i_manufact_id#31] + +(51) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#24, ws_ext_sales_price#26] +Right output [2]: [i_item_sk#30, i_manufact_id#31] +Arguments: [ws_item_sk#24], [i_item_sk#30], Inner, BuildRight + +(52) CometProject +Input [4]: [ws_item_sk#24, ws_ext_sales_price#26, i_item_sk#30, i_manufact_id#31] +Arguments: [ws_ext_sales_price#26, i_manufact_id#31], [ws_ext_sales_price#26, i_manufact_id#31] + +(53) CometHashAggregate +Input [2]: [ws_ext_sales_price#26, i_manufact_id#31] +Keys [1]: [i_manufact_id#31] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#26))] + +(54) CometExchange +Input [2]: [i_manufact_id#31, sum#32] +Arguments: hashpartitioning(i_manufact_id#31, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(55) CometHashAggregate +Input [2]: [i_manufact_id#31, sum#32] +Keys [1]: [i_manufact_id#31] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#26))] + +(56) CometUnion +Child 0 Input [2]: [i_manufact_id#11, total_sales#33] +Child 1 Input [2]: [i_manufact_id#22, total_sales#34] +Child 2 Input [2]: [i_manufact_id#31, total_sales#35] + +(57) CometHashAggregate +Input [2]: [i_manufact_id#11, total_sales#33] +Keys [1]: [i_manufact_id#11] +Functions [1]: [partial_sum(total_sales#33)] + +(58) CometExchange +Input [3]: [i_manufact_id#11, sum#36, isEmpty#37] +Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(59) CometHashAggregate +Input [3]: [i_manufact_id#11, sum#36, isEmpty#37] +Keys [1]: [i_manufact_id#11] +Functions [1]: [sum(total_sales#33)] + +(60) CometTakeOrderedAndProject +Input [2]: [i_manufact_id#11, total_sales#38] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[total_sales#38 ASC NULLS FIRST], output=[i_manufact_id#11,total_sales#38]), [i_manufact_id#11, total_sales#38], 100, [total_sales#38 ASC NULLS FIRST], [i_manufact_id#11, total_sales#38] + +(61) ColumnarToRow [codegen id : 1] +Input [2]: [i_manufact_id#11, total_sales#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..ec4e84270d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q33.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_manufact_id,total_sales] + CometHashAggregate [i_manufact_id,total_sales,sum,isEmpty,sum(total_sales)] + CometExchange [i_manufact_id] #1 + CometHashAggregate [i_manufact_id,sum,isEmpty,total_sales] + CometUnion [i_manufact_id,total_sales] + CometHashAggregate [i_manufact_id,total_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_manufact_id] #2 + CometHashAggregate [i_manufact_id,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_manufact_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_manufact_id] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ca_address_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ca_address_sk] #4 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [i_item_sk,i_manufact_id] #5 + CometBroadcastHashJoin [i_item_sk,i_manufact_id,i_manufact_id] + CometFilter [i_item_sk,i_manufact_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_manufact_id] + CometBroadcastExchange [i_manufact_id] #6 + CometProject [i_manufact_id] + CometFilter [i_category,i_manufact_id] + CometScan parquet spark_catalog.default.item [i_category,i_manufact_id] + CometHashAggregate [i_manufact_id,total_sales,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_manufact_id] #7 + CometHashAggregate [i_manufact_id,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,i_item_sk,i_manufact_id] + CometProject [cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,ca_address_sk] + CometProject [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_manufact_id] #5 + CometHashAggregate [i_manufact_id,total_sales,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [i_manufact_id] #8 + CometHashAggregate [i_manufact_id,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,i_manufact_id] + CometBroadcastHashJoin [ws_item_sk,ws_ext_sales_price,i_item_sk,i_manufact_id] + CometProject [ws_item_sk,ws_ext_sales_price] + CometBroadcastHashJoin [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ca_address_sk] + CometProject [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + CometBroadcastHashJoin [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_manufact_id] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_datafusion/explain.txt new file mode 100644 index 0000000000..4d010531bc --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_datafusion/explain.txt @@ -0,0 +1,168 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometSort (31) + +- CometColumnarExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (24) + : +- CometHashAggregate (23) + : +- CometExchange (22) + : +- CometHashAggregate (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6, d_year#7, d_dom#8] + +(4) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9, s_county#10] + +(10) CometFilter +Input [2]: [s_store_sk#9, s_county#10] +Condition : ((isnotnull(s_county#10) AND (s_county#10 = Williamson County)) AND isnotnull(s_store_sk#9)) + +(11) CometProject +Input [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9], [s_store_sk#9] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: [s_store_sk#9] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Right output [1]: [s_store_sk#9] +Arguments: [ss_store_sk#3], [s_store_sk#9], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(16) CometFilter +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(hd_dep_count#13 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(hd_vehicle_count#14 as double)))))) > 1.2) END) AND isnotnull(hd_demo_sk#11)) + +(17) CometProject +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11], [hd_demo_sk#11] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: [hd_demo_sk#11] + +(19) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Right output [1]: [hd_demo_sk#11] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#11], Inner, BuildRight + +(20) CometProject +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Arguments: [ss_customer_sk#1, ss_ticket_number#4], [ss_customer_sk#1, ss_ticket_number#4] + +(21) CometHashAggregate +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] + +(22) CometExchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] + +(24) CometFilter +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Condition : ((cnt#16 >= 15) AND (cnt#16 <= 20)) + +(25) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(26) CometFilter +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Condition : isnotnull(c_customer_sk#17) + +(27) CometBroadcastExchange +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(28) CometBroadcastHashJoin +Left output [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Right output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [ss_customer_sk#1], [c_customer_sk#17], Inner, BuildRight + +(29) CometProject +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16, c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + +(30) CometColumnarExchange +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: rangepartitioning(c_last_name#20 ASC NULLS FIRST, c_first_name#19 ASC NULLS FIRST, c_salutation#18 ASC NULLS FIRST, c_preferred_cust_flag#21 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(31) CometSort +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20 ASC NULLS FIRST, c_first_name#19 ASC NULLS FIRST, c_salutation#18 ASC NULLS FIRST, c_preferred_cust_flag#21 DESC NULLS LAST] + +(32) ColumnarToRow [codegen id : 1] +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bf05de8c6e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometColumnarExchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] #1 + CometProject [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,cnt,c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometFilter [ss_ticket_number,ss_customer_sk,cnt] + CometHashAggregate [ss_ticket_number,ss_customer_sk,cnt,count,count(1)] + CometExchange [ss_ticket_number,ss_customer_sk] #2 + CometHashAggregate [ss_ticket_number,ss_customer_sk,count] + CometProject [ss_customer_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_county] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_county] + CometBroadcastExchange [hd_demo_sk] #5 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] #6 + CometFilter [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..88961ad066 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_iceberg_compat/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometSort (31) + +- CometColumnarExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (24) + : +- CometHashAggregate (23) + : +- CometExchange (22) + : +- CometHashAggregate (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.store (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometScan parquet spark_catalog.default.household_demographics (15) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometScan parquet spark_catalog.default.customer (25) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] + +(9) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#9, s_county#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [s_store_sk#9, s_county#10] +Condition : ((isnotnull(s_county#10) AND (s_county#10 = Williamson County)) AND isnotnull(s_store_sk#9)) + +(11) CometProject +Input [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9], [s_store_sk#9] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: [s_store_sk#9] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Right output [1]: [s_store_sk#9] +Arguments: [ss_store_sk#3], [s_store_sk#9], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] + +(15) CometScan parquet spark_catalog.default.household_demographics +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(16) CometFilter +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(hd_dep_count#13 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(hd_vehicle_count#14 as double)))))) > 1.2) END) AND isnotnull(hd_demo_sk#11)) + +(17) CometProject +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11], [hd_demo_sk#11] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: [hd_demo_sk#11] + +(19) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Right output [1]: [hd_demo_sk#11] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#11], Inner, BuildRight + +(20) CometProject +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Arguments: [ss_customer_sk#1, ss_ticket_number#4], [ss_customer_sk#1, ss_ticket_number#4] + +(21) CometHashAggregate +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] + +(22) CometExchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] + +(24) CometFilter +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Condition : ((cnt#16 >= 15) AND (cnt#16 <= 20)) + +(25) CometScan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(26) CometFilter +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Condition : isnotnull(c_customer_sk#17) + +(27) CometBroadcastExchange +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(28) CometBroadcastHashJoin +Left output [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Right output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [ss_customer_sk#1], [c_customer_sk#17], Inner, BuildRight + +(29) CometProject +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16, c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + +(30) CometColumnarExchange +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: rangepartitioning(c_last_name#20 ASC NULLS FIRST, c_first_name#19 ASC NULLS FIRST, c_salutation#18 ASC NULLS FIRST, c_preferred_cust_flag#21 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(31) CometSort +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20 ASC NULLS FIRST, c_first_name#19 ASC NULLS FIRST, c_salutation#18 ASC NULLS FIRST, c_preferred_cust_flag#21 DESC NULLS LAST] + +(32) ColumnarToRow [codegen id : 1] +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..5e40d03daf --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q34.native_iceberg_compat/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometColumnarExchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] #1 + CometProject [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,cnt,c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometFilter [ss_ticket_number,ss_customer_sk,cnt] + CometHashAggregate [ss_ticket_number,ss_customer_sk,cnt,count,count(1)] + CometExchange [ss_ticket_number,ss_customer_sk] #2 + CometHashAggregate [ss_ticket_number,ss_customer_sk,count] + CometProject [ss_customer_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_county] + CometScan parquet spark_catalog.default.store [s_store_sk,s_county] + CometBroadcastExchange [hd_demo_sk] #5 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] #6 + CometFilter [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_datafusion/explain.txt new file mode 100644 index 0000000000..aae420dfb5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_datafusion/explain.txt @@ -0,0 +1,209 @@ +== Physical Plan == +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * Filter (22) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (21) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (13) + : : : +- ReusedExchange (14) + : : +- ReusedExchange (20) + : +- BroadcastExchange (27) + : +- * ColumnarToRow (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(2) CometFilter +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#6, ss_sold_date_sk#7] + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Arguments: [d_date_sk#8, d_year#9, d_qoy#10] + +(5) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ss_customer_sk#6], [ss_customer_sk#6] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Right output [1]: [ss_customer_sk#6] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(13) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Arguments: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#13] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Right output [1]: [d_date_sk#13] +Arguments: [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Arguments: [ws_bill_customer_sk#11], [ws_bill_customer_sk#11] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#11] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(20) ReusedExchange [Reuses operator id: 18] +Output [1]: [cs_ship_customer_sk#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(22) Filter [codegen id : 5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(23) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#15, ca_state#16] +Arguments: [ca_address_sk#15, ca_state#16] + +(25) CometFilter +Input [2]: [ca_address_sk#15, ca_state#16] +Condition : isnotnull(ca_address_sk#15) + +(26) ColumnarToRow [codegen id : 3] +Input [2]: [ca_address_sk#15, ca_state#16] + +(27) BroadcastExchange +Input [2]: [ca_address_sk#15, ca_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#15] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, ca_state#16] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#15, ca_state#16] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Arguments: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] + +(31) CometFilter +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Condition : isnotnull(cd_demo_sk#17) + +(32) ColumnarToRow [codegen id : 4] +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] + +(33) BroadcastExchange +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 5] +Output [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Input [8]: [c_current_cdemo_sk#4, ca_state#16, cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] + +(36) HashAggregate [codegen id : 5] +Input [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Keys [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Functions [10]: [partial_count(1), partial_min(cd_dep_count#20), partial_max(cd_dep_count#20), partial_avg(cd_dep_count#20), partial_min(cd_dep_employed_count#21), partial_max(cd_dep_employed_count#21), partial_avg(cd_dep_employed_count#21), partial_min(cd_dep_college_count#22), partial_max(cd_dep_college_count#22), partial_avg(cd_dep_college_count#22)] +Aggregate Attributes [13]: [count#23, min#24, max#25, sum#26, count#27, min#28, max#29, sum#30, count#31, min#32, max#33, sum#34, count#35] +Results [19]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, count#36, min#37, max#38, sum#39, count#40, min#41, max#42, sum#43, count#44, min#45, max#46, sum#47, count#48] + +(37) Exchange +Input [19]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, count#36, min#37, max#38, sum#39, count#40, min#41, max#42, sum#43, count#44, min#45, max#46, sum#47, count#48] +Arguments: hashpartitioning(ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 6] +Input [19]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, count#36, min#37, max#38, sum#39, count#40, min#41, max#42, sum#43, count#44, min#45, max#46, sum#47, count#48] +Keys [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Functions [10]: [count(1), min(cd_dep_count#20), max(cd_dep_count#20), avg(cd_dep_count#20), min(cd_dep_employed_count#21), max(cd_dep_employed_count#21), avg(cd_dep_employed_count#21), min(cd_dep_college_count#22), max(cd_dep_college_count#22), avg(cd_dep_college_count#22)] +Aggregate Attributes [10]: [count(1)#49, min(cd_dep_count#20)#50, max(cd_dep_count#20)#51, avg(cd_dep_count#20)#52, min(cd_dep_employed_count#21)#53, max(cd_dep_employed_count#21)#54, avg(cd_dep_employed_count#21)#55, min(cd_dep_college_count#22)#56, max(cd_dep_college_count#22)#57, avg(cd_dep_college_count#22)#58] +Results [18]: [ca_state#16, cd_gender#18, cd_marital_status#19, count(1)#49 AS cnt1#59, min(cd_dep_count#20)#50 AS min(cd_dep_count)#60, max(cd_dep_count#20)#51 AS max(cd_dep_count)#61, avg(cd_dep_count#20)#52 AS avg(cd_dep_count)#62, cd_dep_employed_count#21, count(1)#49 AS cnt2#63, min(cd_dep_employed_count#21)#53 AS min(cd_dep_employed_count)#64, max(cd_dep_employed_count#21)#54 AS max(cd_dep_employed_count)#65, avg(cd_dep_employed_count#21)#55 AS avg(cd_dep_employed_count)#66, cd_dep_college_count#22, count(1)#49 AS cnt3#67, min(cd_dep_college_count#22)#56 AS min(cd_dep_college_count)#68, max(cd_dep_college_count#22)#57 AS max(cd_dep_college_count)#69, avg(cd_dep_college_count#22)#58 AS avg(cd_dep_college_count)#70, cd_dep_count#20] + +(39) TakeOrderedAndProject +Input [18]: [ca_state#16, cd_gender#18, cd_marital_status#19, cnt1#59, min(cd_dep_count)#60, max(cd_dep_count)#61, avg(cd_dep_count)#62, cd_dep_employed_count#21, cnt2#63, min(cd_dep_employed_count)#64, max(cd_dep_employed_count)#65, avg(cd_dep_employed_count)#66, cd_dep_college_count#22, cnt3#67, min(cd_dep_college_count)#68, max(cd_dep_college_count)#69, avg(cd_dep_college_count)#70, cd_dep_count#20] +Arguments: 100, [ca_state#16 ASC NULLS FIRST, cd_gender#18 ASC NULLS FIRST, cd_marital_status#19 ASC NULLS FIRST, cd_dep_count#20 ASC NULLS FIRST, cd_dep_employed_count#21 ASC NULLS FIRST, cd_dep_college_count#22 ASC NULLS FIRST], [ca_state#16, cd_gender#18, cd_marital_status#19, cnt1#59, min(cd_dep_count)#60, max(cd_dep_count)#61, avg(cd_dep_count)#62, cd_dep_employed_count#21, cnt2#63, min(cd_dep_employed_count)#64, max(cd_dep_employed_count)#65, avg(cd_dep_employed_count)#66, cd_dep_college_count#22, cnt3#67, min(cd_dep_college_count)#68, max(cd_dep_college_count)#69, avg(cd_dep_college_count)#70] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6767df5cc1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_datafusion/simplified.txt @@ -0,0 +1,53 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count)] + WholeStageCodegen (6) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] [count(1),min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (5) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,min,max,sum,count,min,max,sum,count,min,max,sum,count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [cs_ship_customer_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..31c580d314 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_iceberg_compat/explain.txt @@ -0,0 +1,255 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (34) + : +- * BroadcastHashJoin Inner BuildRight (33) + : :- * Project (28) + : : +- * Filter (27) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (26) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometScan parquet spark_catalog.default.store_sales (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometScan parquet spark_catalog.default.date_dim (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometScan parquet spark_catalog.default.web_sales (13) + : : : +- ReusedExchange (14) + : : +- BroadcastExchange (25) + : : +- * ColumnarToRow (24) + : : +- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometScan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (21) + : +- BroadcastExchange (32) + : +- * ColumnarToRow (31) + : +- CometFilter (30) + : +- CometScan parquet spark_catalog.default.customer_address (29) + +- BroadcastExchange (38) + +- * ColumnarToRow (37) + +- CometFilter (36) + +- CometScan parquet spark_catalog.default.customer_demographics (35) + + +(1) CometScan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +ReadSchema: struct + +(4) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ss_customer_sk#6], [ss_customer_sk#6] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Right output [1]: [ss_customer_sk#6] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(13) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#12)] +ReadSchema: struct + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#13] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Right output [1]: [d_date_sk#13] +Arguments: [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Arguments: [ws_bill_customer_sk#11], [ws_bill_customer_sk#11] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#11] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(20) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +ReadSchema: struct + +(21) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#16] + +(22) CometBroadcastHashJoin +Left output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [cs_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight + +(23) CometProject +Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [cs_ship_customer_sk#14], [cs_ship_customer_sk#14] + +(24) ColumnarToRow [codegen id : 2] +Input [1]: [cs_ship_customer_sk#14] + +(25) BroadcastExchange +Input [1]: [cs_ship_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(27) Filter [codegen id : 5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(28) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(29) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(30) CometFilter +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) + +(31) ColumnarToRow [codegen id : 3] +Input [2]: [ca_address_sk#17, ca_state#18] + +(32) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(33) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, ca_state#18] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#17, ca_state#18] + +(35) CometScan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(36) CometFilter +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Condition : isnotnull(cd_demo_sk#19) + +(37) ColumnarToRow [codegen id : 4] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(38) BroadcastExchange +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(39) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 5] +Output [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Input [8]: [c_current_cdemo_sk#4, ca_state#18, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(41) HashAggregate [codegen id : 5] +Input [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [partial_count(1), partial_min(cd_dep_count#22), partial_max(cd_dep_count#22), partial_avg(cd_dep_count#22), partial_min(cd_dep_employed_count#23), partial_max(cd_dep_employed_count#23), partial_avg(cd_dep_employed_count#23), partial_min(cd_dep_college_count#24), partial_max(cd_dep_college_count#24), partial_avg(cd_dep_college_count#24)] +Aggregate Attributes [13]: [count#25, min#26, max#27, sum#28, count#29, min#30, max#31, sum#32, count#33, min#34, max#35, sum#36, count#37] +Results [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, min#39, max#40, sum#41, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50] + +(42) Exchange +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, min#39, max#40, sum#41, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50] +Arguments: hashpartitioning(ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(43) HashAggregate [codegen id : 6] +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, min#39, max#40, sum#41, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [count(1), min(cd_dep_count#22), max(cd_dep_count#22), avg(cd_dep_count#22), min(cd_dep_employed_count#23), max(cd_dep_employed_count#23), avg(cd_dep_employed_count#23), min(cd_dep_college_count#24), max(cd_dep_college_count#24), avg(cd_dep_college_count#24)] +Aggregate Attributes [10]: [count(1)#51, min(cd_dep_count#22)#52, max(cd_dep_count#22)#53, avg(cd_dep_count#22)#54, min(cd_dep_employed_count#23)#55, max(cd_dep_employed_count#23)#56, avg(cd_dep_employed_count#23)#57, min(cd_dep_college_count#24)#58, max(cd_dep_college_count#24)#59, avg(cd_dep_college_count#24)#60] +Results [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, count(1)#51 AS cnt1#61, min(cd_dep_count#22)#52 AS min(cd_dep_count)#62, max(cd_dep_count#22)#53 AS max(cd_dep_count)#63, avg(cd_dep_count#22)#54 AS avg(cd_dep_count)#64, cd_dep_employed_count#23, count(1)#51 AS cnt2#65, min(cd_dep_employed_count#23)#55 AS min(cd_dep_employed_count)#66, max(cd_dep_employed_count#23)#56 AS max(cd_dep_employed_count)#67, avg(cd_dep_employed_count#23)#57 AS avg(cd_dep_employed_count)#68, cd_dep_college_count#24, count(1)#51 AS cnt3#69, min(cd_dep_college_count#24)#58 AS min(cd_dep_college_count)#70, max(cd_dep_college_count#24)#59 AS max(cd_dep_college_count)#71, avg(cd_dep_college_count#24)#60 AS avg(cd_dep_college_count)#72, cd_dep_count#22] + +(44) TakeOrderedAndProject +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cnt1#61, min(cd_dep_count)#62, max(cd_dep_count)#63, avg(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, min(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, avg(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, min(cd_dep_college_count)#70, max(cd_dep_college_count)#71, avg(cd_dep_college_count)#72, cd_dep_count#22] +Arguments: 100, [ca_state#18 ASC NULLS FIRST, cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_dep_count#22 ASC NULLS FIRST, cd_dep_employed_count#23 ASC NULLS FIRST, cd_dep_college_count#24 ASC NULLS FIRST], [ca_state#18, cd_gender#20, cd_marital_status#21, cnt1#61, min(cd_dep_count)#62, max(cd_dep_count)#63, avg(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, min(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, avg(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, min(cd_dep_college_count)#70, max(cd_dep_college_count)#71, avg(cd_dep_college_count)#72] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..ea875666a7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q35.native_iceberg_compat/simplified.txt @@ -0,0 +1,60 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count)] + WholeStageCodegen (6) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] [count(1),min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (5) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,min,max,sum,count,min,max,sum,count,min,max,sum,count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometProject [cs_ship_customer_sk] + CometBroadcastHashJoin [cs_ship_customer_sk,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_datafusion/explain.txt new file mode 100644 index 0000000000..ce8b189c76 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_datafusion/explain.txt @@ -0,0 +1,152 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * Project (28) + +- Window (27) + +- * ColumnarToRow (26) + +- CometSort (25) + +- CometExchange (24) + +- CometHashAggregate (23) + +- CometExchange (22) + +- CometHashAggregate (21) + +- CometExpand (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + +- CometBroadcastExchange (17) + +- CometProject (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`store` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#6, d_year#7] +Arguments: [d_date_sk#6, d_year#7] + +(4) CometFilter +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [2]: [d_date_sk#6, d_year#7] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4], [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [i_item_sk#8, i_class#9, i_category#10] + +(10) CometFilter +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Condition : isnotnull(i_item_sk#8) + +(11) CometBroadcastExchange +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [i_item_sk#8, i_class#9, i_category#10] + +(12) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Right output [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [ss_item_sk#1], [i_item_sk#8], Inner, BuildRight + +(13) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] +Arguments: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10], [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] + +(14) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#11, s_state#12] +Arguments: [s_store_sk#11, s_state#12] + +(15) CometFilter +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) + +(16) CometProject +Input [2]: [s_store_sk#11, s_state#12] +Arguments: [s_store_sk#11], [s_store_sk#11] + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(18) CometBroadcastHashJoin +Left output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#2], [s_store_sk#11], Inner, BuildRight + +(19) CometProject +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] +Arguments: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9], [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] + +(20) CometExpand +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] +Arguments: [[ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9, 0], [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, null, 1], [ss_ext_sales_price#3, ss_net_profit#4, null, null, 3]], [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] + +(21) CometHashAggregate +Input [5]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] + +(22) CometExchange +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#16, sum#17] +Arguments: hashpartitioning(i_category#13, i_class#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#16, sum#17] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] + +(24) CometExchange +Input [7]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22] +Arguments: hashpartitioning(_w1#21, _w2#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(25) CometSort +Input [7]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22] +Arguments: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22], [_w1#21 ASC NULLS FIRST, _w2#22 ASC NULLS FIRST, _w0#20 ASC NULLS FIRST] + +(26) ColumnarToRow [codegen id : 1] +Input [7]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22] + +(27) Window +Input [7]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22] +Arguments: [rank(_w0#20) windowspecdefinition(_w1#21, _w2#22, _w0#20 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#23], [_w1#21, _w2#22], [_w0#20 ASC NULLS FIRST] + +(28) Project [codegen id : 2] +Output [5]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, rank_within_parent#23] +Input [8]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22, rank_within_parent#23] + +(29) TakeOrderedAndProject +Input [5]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, rank_within_parent#23] +Arguments: 100, [lochierarchy#19 DESC NULLS LAST, CASE WHEN (lochierarchy#19 = 0) THEN i_category#13 END ASC NULLS FIRST, rank_within_parent#23 ASC NULLS FIRST], [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, rank_within_parent#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8ceb83677c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_datafusion/simplified.txt @@ -0,0 +1,33 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] + WholeStageCodegen (2) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [gross_margin,i_category,i_class,lochierarchy,_w0,_w1,_w2] + CometExchange [_w1,_w2] #1 + CometHashAggregate [gross_margin,i_category,i_class,lochierarchy,_w0,_w1,_w2,spark_grouping_id,sum,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_category,i_class,spark_grouping_id] #2 + CometHashAggregate [i_category,i_class,spark_grouping_id,sum,sum,ss_net_profit,ss_ext_sales_price] + CometExpand [i_category,i_class] [ss_ext_sales_price,ss_net_profit,i_category,i_class,spark_grouping_id] + CometProject [ss_ext_sales_price,ss_net_profit,i_category,i_class] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category,s_store_sk] + CometProject [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,i_item_sk,i_class,i_category] + CometProject [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_class,i_category] #4 + CometFilter [i_item_sk,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_class,i_category] + CometBroadcastExchange [s_store_sk] #5 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..b75b61579c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_iceberg_compat/explain.txt @@ -0,0 +1,165 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * Project (28) + +- Window (27) + +- * ColumnarToRow (26) + +- CometSort (25) + +- CometExchange (24) + +- CometHashAggregate (23) + +- CometExchange (22) + +- CometHashAggregate (21) + +- CometExpand (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.date_dim (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometScan parquet spark_catalog.default.item (9) + +- CometBroadcastExchange (17) + +- CometProject (16) + +- CometFilter (15) + +- CometScan parquet spark_catalog.default.store (14) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [2]: [d_date_sk#6, d_year#7] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4], [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(9) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#8, i_class#9, i_category#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Condition : isnotnull(i_item_sk#8) + +(11) CometBroadcastExchange +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [i_item_sk#8, i_class#9, i_category#10] + +(12) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Right output [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [ss_item_sk#1], [i_item_sk#8], Inner, BuildRight + +(13) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] +Arguments: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10], [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] + +(14) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_state#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(15) CometFilter +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) + +(16) CometProject +Input [2]: [s_store_sk#11, s_state#12] +Arguments: [s_store_sk#11], [s_store_sk#11] + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(18) CometBroadcastHashJoin +Left output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#2], [s_store_sk#11], Inner, BuildRight + +(19) CometProject +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] +Arguments: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9], [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] + +(20) CometExpand +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] +Arguments: [[ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9, 0], [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, null, 1], [ss_ext_sales_price#3, ss_net_profit#4, null, null, 3]], [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] + +(21) CometHashAggregate +Input [5]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] + +(22) CometExchange +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#16, sum#17] +Arguments: hashpartitioning(i_category#13, i_class#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#16, sum#17] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] + +(24) CometExchange +Input [7]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22] +Arguments: hashpartitioning(_w1#21, _w2#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(25) CometSort +Input [7]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22] +Arguments: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22], [_w1#21 ASC NULLS FIRST, _w2#22 ASC NULLS FIRST, _w0#20 ASC NULLS FIRST] + +(26) ColumnarToRow [codegen id : 1] +Input [7]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22] + +(27) Window +Input [7]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22] +Arguments: [rank(_w0#20) windowspecdefinition(_w1#21, _w2#22, _w0#20 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#23], [_w1#21, _w2#22], [_w0#20 ASC NULLS FIRST] + +(28) Project [codegen id : 2] +Output [5]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, rank_within_parent#23] +Input [8]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, _w0#20, _w1#21, _w2#22, rank_within_parent#23] + +(29) TakeOrderedAndProject +Input [5]: [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, rank_within_parent#23] +Arguments: 100, [lochierarchy#19 DESC NULLS LAST, CASE WHEN (lochierarchy#19 = 0) THEN i_category#13 END ASC NULLS FIRST, rank_within_parent#23 ASC NULLS FIRST], [gross_margin#18, i_category#13, i_class#14, lochierarchy#19, rank_within_parent#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..893cffade2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q36.native_iceberg_compat/simplified.txt @@ -0,0 +1,33 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] + WholeStageCodegen (2) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [gross_margin,i_category,i_class,lochierarchy,_w0,_w1,_w2] + CometExchange [_w1,_w2] #1 + CometHashAggregate [gross_margin,i_category,i_class,lochierarchy,_w0,_w1,_w2,spark_grouping_id,sum,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_category,i_class,spark_grouping_id] #2 + CometHashAggregate [i_category,i_class,spark_grouping_id,sum,sum,ss_net_profit,ss_ext_sales_price] + CometExpand [i_category,i_class] [ss_ext_sales_price,ss_net_profit,i_category,i_class,spark_grouping_id] + CometProject [ss_ext_sales_price,ss_net_profit,i_category,i_class] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category,s_store_sk] + CometProject [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,i_item_sk,i_class,i_category] + CometProject [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_class,i_category] #4 + CometFilter [i_item_sk,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + CometBroadcastExchange [s_store_sk] #5 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_state] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_datafusion/explain.txt new file mode 100644 index 0000000000..bc7fc8f71a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_datafusion/explain.txt @@ -0,0 +1,137 @@ +== Physical Plan == +* ColumnarToRow (26) ++- CometTakeOrderedAndProject (25) + +- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometBroadcastHashJoin (20) + :- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : +- CometBroadcastExchange (7) + : : +- CometProject (6) + : : +- CometFilter (5) + : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (4) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (10) + +- CometProject (19) + +- CometFilter (18) + +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (17) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(2) CometFilter +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 68.00)) AND (i_current_price#4 <= 98.00)) AND i_manufact_id#5 IN (677,940,694,808)) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(4) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(5) CometFilter +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) + +(6) CometProject +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8], [inv_item_sk#6, inv_date_sk#8] + +(7) CometBroadcastExchange +Input [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1], [inv_item_sk#6], Inner, BuildRight + +(9) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] + +(10) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9, d_date#10] + +(11) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-02-01)) AND (d_date#10 <= 2000-04-01)) AND isnotnull(d_date_sk#9)) + +(12) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(14) CometBroadcastHashJoin +Left output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [inv_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(15) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#9] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(16) CometBroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(17) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Arguments: [cs_item_sk#11, cs_sold_date_sk#12] + +(18) CometFilter +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Condition : isnotnull(cs_item_sk#11) + +(19) CometProject +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Arguments: [cs_item_sk#11], [cs_item_sk#11] + +(20) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [1]: [cs_item_sk#11] +Arguments: [i_item_sk#1], [cs_item_sk#11], Inner, BuildLeft + +(21) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#11] +Arguments: [i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(22) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(23) CometExchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(25) CometTakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#2 ASC NULLS FIRST], output=[i_item_id#2,i_item_desc#3,i_current_price#4]), [i_item_id#2, i_item_desc#3, i_current_price#4], 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(26) ColumnarToRow [codegen id : 1] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2a8cf3e62b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_datafusion/simplified.txt @@ -0,0 +1,28 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometExchange [i_item_id,i_item_desc,i_current_price] #1 + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometProject [i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,cs_item_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price] #2 + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk,d_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_item_sk,inv_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometBroadcastExchange [inv_item_sk,inv_date_sk] #3 + CometProject [inv_item_sk,inv_date_sk] + CometFilter [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..af1a1c3283 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_iceberg_compat/explain.txt @@ -0,0 +1,150 @@ +== Physical Plan == +* ColumnarToRow (26) ++- CometTakeOrderedAndProject (25) + +- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometBroadcastHashJoin (20) + :- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.item (1) + : : +- CometBroadcastExchange (7) + : : +- CometProject (6) + : : +- CometFilter (5) + : : +- CometScan parquet spark_catalog.default.inventory (4) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometScan parquet spark_catalog.default.date_dim (10) + +- CometProject (19) + +- CometFilter (18) + +- CometScan parquet spark_catalog.default.catalog_sales (17) + + +(1) CometScan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), LessThanOrEqual(i_current_price,98.00), In(i_manufact_id, [677,694,808,940]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 68.00)) AND (i_current_price#4 <= 98.00)) AND i_manufact_id#5 IN (677,940,694,808)) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(4) CometScan parquet spark_catalog.default.inventory +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#8)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) + +(6) CometProject +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8], [inv_item_sk#6, inv_date_sk#8] + +(7) CometBroadcastExchange +Input [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1], [inv_item_sk#6], Inner, BuildRight + +(9) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] + +(10) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-02-01)) AND (d_date#10 <= 2000-04-01)) AND isnotnull(d_date_sk#9)) + +(12) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(14) CometBroadcastHashJoin +Left output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [inv_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(15) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#9] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(16) CometBroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(17) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(18) CometFilter +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Condition : isnotnull(cs_item_sk#11) + +(19) CometProject +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Arguments: [cs_item_sk#11], [cs_item_sk#11] + +(20) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [1]: [cs_item_sk#11] +Arguments: [i_item_sk#1], [cs_item_sk#11], Inner, BuildLeft + +(21) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#11] +Arguments: [i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(22) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(23) CometExchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(25) CometTakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#2 ASC NULLS FIRST], output=[i_item_id#2,i_item_desc#3,i_current_price#4]), [i_item_id#2, i_item_desc#3, i_current_price#4], 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(26) ColumnarToRow [codegen id : 1] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c31aa1e813 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q37.native_iceberg_compat/simplified.txt @@ -0,0 +1,28 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometExchange [i_item_id,i_item_desc,i_current_price] #1 + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometProject [i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,cs_item_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price] #2 + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk,d_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_item_sk,inv_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometBroadcastExchange [inv_item_sk,inv_date_sk] #3 + CometProject [inv_item_sk,inv_date_sk] + CometFilter [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_datafusion/explain.txt new file mode 100644 index 0000000000..dab0b8c9ad --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_datafusion/explain.txt @@ -0,0 +1,143 @@ +== Physical Plan == +* ColumnarToRow (27) ++- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometProject (23) + +- CometBroadcastHashJoin (22) + :- CometBroadcastHashJoin (20) + : :- CometHashAggregate (16) + : : +- CometExchange (15) + : : +- CometHashAggregate (14) + : : +- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (9) + : +- CometBroadcastExchange (19) + : +- CometHashAggregate (18) + : +- ReusedExchange (17) + +- ReusedExchange (21) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Arguments: [ss_customer_sk#1, ss_sold_date_sk#2] + +(2) CometFilter +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Condition : isnotnull(ss_customer_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Arguments: [d_date_sk#3, d_date#4, d_month_seq#5] + +(4) CometFilter +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(5) CometProject +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Arguments: [d_date_sk#3, d_date#4], [d_date_sk#3, d_date#4] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: [d_date_sk#3, d_date#4] + +(7) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Right output [2]: [d_date_sk#3, d_date#4] +Arguments: [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#3, d_date#4] +Arguments: [ss_customer_sk#1, d_date#4], [ss_customer_sk#1, d_date#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(10) CometFilter +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) + +(11) CometBroadcastExchange +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, d_date#4] +Right output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [ss_customer_sk#1], [c_customer_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ss_customer_sk#1, d_date#4, c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_last_name#8, c_first_name#7, d_date#4], [c_last_name#8, c_first_name#7, d_date#4] + +(14) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(15) CometExchange +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(17) ReusedExchange [Reuses operator id: 15] +Output [3]: [c_last_name#9, c_first_name#10, d_date#11] + +(18) CometHashAggregate +Input [3]: [c_last_name#9, c_first_name#10, d_date#11] +Keys [3]: [c_last_name#9, c_first_name#10, d_date#11] +Functions: [] + +(19) CometBroadcastExchange +Input [3]: [c_last_name#9, c_first_name#10, d_date#11] +Arguments: [c_last_name#9, c_first_name#10, d_date#11] + +(20) CometBroadcastHashJoin +Left output [3]: [c_last_name#8, c_first_name#7, d_date#4] +Right output [3]: [c_last_name#9, c_first_name#10, d_date#11] +Arguments: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)], [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#10, ), isnull(c_first_name#10), coalesce(d_date#11, 1970-01-01), isnull(d_date#11)], LeftSemi, BuildRight + +(21) ReusedExchange [Reuses operator id: 19] +Output [3]: [c_last_name#12, c_first_name#13, d_date#14] + +(22) CometBroadcastHashJoin +Left output [3]: [c_last_name#8, c_first_name#7, d_date#4] +Right output [3]: [c_last_name#12, c_first_name#13, d_date#14] +Arguments: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)], [coalesce(c_last_name#12, ), isnull(c_last_name#12), coalesce(c_first_name#13, ), isnull(c_first_name#13), coalesce(d_date#14, 1970-01-01), isnull(d_date#14)], LeftSemi, BuildRight + +(23) CometProject +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(24) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(25) CometExchange +Input [1]: [count#15] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(26) CometHashAggregate +Input [1]: [count#15] +Keys: [] +Functions [1]: [count(1)] + +(27) ColumnarToRow [codegen id : 1] +Input [1]: [count(1)#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c2697a2c1c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_datafusion/simplified.txt @@ -0,0 +1,29 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [count(1),count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + CometHashAggregate [c_last_name,c_first_name,d_date] + CometExchange [c_last_name,c_first_name,d_date] #2 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometProject [c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [ss_customer_sk,d_date,c_customer_sk,c_first_name,c_last_name] + CometProject [ss_customer_sk,d_date] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_customer_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #3 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_month_seq] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name] #4 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name] + CometBroadcastExchange [c_last_name,c_first_name,d_date] #5 + CometHashAggregate [c_last_name,c_first_name,d_date] + ReusedExchange [c_last_name,c_first_name,d_date] #2 + ReusedExchange [c_last_name,c_first_name,d_date] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ecf09535b4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_iceberg_compat/explain.txt @@ -0,0 +1,266 @@ +== Physical Plan == +* ColumnarToRow (47) ++- CometHashAggregate (46) + +- CometExchange (45) + +- CometHashAggregate (44) + +- CometProject (43) + +- CometBroadcastHashJoin (42) + :- CometBroadcastHashJoin (29) + : :- CometHashAggregate (16) + : : +- CometExchange (15) + : : +- CometHashAggregate (14) + : : +- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.customer (9) + : +- CometBroadcastExchange (28) + : +- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (21) + : : +- CometBroadcastHashJoin (20) + : : :- CometFilter (18) + : : : +- CometScan parquet spark_catalog.default.catalog_sales (17) + : : +- ReusedExchange (19) + : +- ReusedExchange (22) + +- CometBroadcastExchange (41) + +- CometHashAggregate (40) + +- CometExchange (39) + +- CometHashAggregate (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (34) + : +- CometBroadcastHashJoin (33) + : :- CometFilter (31) + : : +- CometScan parquet spark_catalog.default.web_sales (30) + : +- ReusedExchange (32) + +- ReusedExchange (35) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#2)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Condition : isnotnull(ss_customer_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(5) CometProject +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Arguments: [d_date_sk#3, d_date#4], [d_date_sk#3, d_date#4] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: [d_date_sk#3, d_date#4] + +(7) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Right output [2]: [d_date_sk#3, d_date#4] +Arguments: [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#3, d_date#4] +Arguments: [ss_customer_sk#1, d_date#4], [ss_customer_sk#1, d_date#4] + +(9) CometScan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) + +(11) CometBroadcastExchange +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, d_date#4] +Right output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [ss_customer_sk#1], [c_customer_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ss_customer_sk#1, d_date#4, c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_last_name#8, c_first_name#7, d_date#4], [c_last_name#8, c_first_name#7, d_date#4] + +(14) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(15) CometExchange +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(17) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#10)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(18) CometFilter +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_bill_customer_sk#9) + +(19) ReusedExchange [Reuses operator id: 6] +Output [2]: [d_date_sk#11, d_date#12] + +(20) CometBroadcastHashJoin +Left output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Right output [2]: [d_date_sk#11, d_date#12] +Arguments: [cs_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(21) CometProject +Input [4]: [cs_bill_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11, d_date#12] +Arguments: [cs_bill_customer_sk#9, d_date#12], [cs_bill_customer_sk#9, d_date#12] + +(22) ReusedExchange [Reuses operator id: 11] +Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] + +(23) CometBroadcastHashJoin +Left output [2]: [cs_bill_customer_sk#9, d_date#12] +Right output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] +Arguments: [cs_bill_customer_sk#9], [c_customer_sk#13], Inner, BuildRight + +(24) CometProject +Input [5]: [cs_bill_customer_sk#9, d_date#12, c_customer_sk#13, c_first_name#14, c_last_name#15] +Arguments: [c_last_name#15, c_first_name#14, d_date#12], [c_last_name#15, c_first_name#14, d_date#12] + +(25) CometHashAggregate +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] + +(26) CometExchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, d_date#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(27) CometHashAggregate +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] + +(28) CometBroadcastExchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: [c_last_name#15, c_first_name#14, d_date#12] + +(29) CometBroadcastHashJoin +Left output [3]: [c_last_name#8, c_first_name#7, d_date#4] +Right output [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)], [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)], LeftSemi, BuildRight + +(30) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#17)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(31) CometFilter +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Condition : isnotnull(ws_bill_customer_sk#16) + +(32) ReusedExchange [Reuses operator id: 6] +Output [2]: [d_date_sk#18, d_date#19] + +(33) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Right output [2]: [d_date_sk#18, d_date#19] +Arguments: [ws_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + +(34) CometProject +Input [4]: [ws_bill_customer_sk#16, ws_sold_date_sk#17, d_date_sk#18, d_date#19] +Arguments: [ws_bill_customer_sk#16, d_date#19], [ws_bill_customer_sk#16, d_date#19] + +(35) ReusedExchange [Reuses operator id: 11] +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] + +(36) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#16, d_date#19] +Right output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [ws_bill_customer_sk#16], [c_customer_sk#20], Inner, BuildRight + +(37) CometProject +Input [5]: [ws_bill_customer_sk#16, d_date#19, c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [c_last_name#22, c_first_name#21, d_date#19], [c_last_name#22, c_first_name#21, d_date#19] + +(38) CometHashAggregate +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] + +(39) CometExchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(c_last_name#22, c_first_name#21, d_date#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(40) CometHashAggregate +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] + +(41) CometBroadcastExchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: [c_last_name#22, c_first_name#21, d_date#19] + +(42) CometBroadcastHashJoin +Left output [3]: [c_last_name#8, c_first_name#7, d_date#4] +Right output [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)], [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)], LeftSemi, BuildRight + +(43) CometProject +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(44) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(45) CometExchange +Input [1]: [count#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(46) CometHashAggregate +Input [1]: [count#23] +Keys: [] +Functions [1]: [count(1)] + +(47) ColumnarToRow [codegen id : 1] +Input [1]: [count(1)#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..43c9ff4aa4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q38.native_iceberg_compat/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [count(1),count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + CometHashAggregate [c_last_name,c_first_name,d_date] + CometExchange [c_last_name,c_first_name,d_date] #2 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometProject [c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [ss_customer_sk,d_date,c_customer_sk,c_first_name,c_last_name] + CometProject [ss_customer_sk,d_date] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_customer_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #3 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name] #4 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] + CometBroadcastExchange [c_last_name,c_first_name,d_date] #5 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometExchange [c_last_name,c_first_name,d_date] #6 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometProject [c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [cs_bill_customer_sk,d_date,c_customer_sk,c_first_name,c_last_name] + CometProject [cs_bill_customer_sk,d_date] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_sold_date_sk,d_date_sk,d_date] + CometFilter [cs_bill_customer_sk,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk,d_date] #3 + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + CometBroadcastExchange [c_last_name,c_first_name,d_date] #7 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometExchange [c_last_name,c_first_name,d_date] #8 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometProject [c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [ws_bill_customer_sk,d_date,c_customer_sk,c_first_name,c_last_name] + CometProject [ws_bill_customer_sk,d_date] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk,d_date] + CometFilter [ws_bill_customer_sk,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk,d_date] #3 + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_datafusion/explain.txt new file mode 100644 index 0000000000..f1914bd440 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_datafusion/explain.txt @@ -0,0 +1,246 @@ +== Physical Plan == +* ColumnarToRow (47) ++- CometSort (46) + +- CometColumnarExchange (45) + +- CometBroadcastHashJoin (44) + :- CometProject (23) + : +- CometFilter (22) + : +- CometHashAggregate (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + +- CometBroadcastExchange (43) + +- CometProject (42) + +- CometFilter (41) + +- CometHashAggregate (40) + +- CometExchange (39) + +- CometHashAggregate (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (31) + : +- CometBroadcastHashJoin (30) + : :- CometProject (28) + : : +- CometBroadcastHashJoin (27) + : : :- CometFilter (25) + : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (24) + : : +- ReusedExchange (26) + : +- ReusedExchange (29) + +- CometBroadcastExchange (35) + +- CometProject (34) + +- CometFilter (33) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (32) + + +(1) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [1]: [i_item_sk#5] +Arguments: [i_item_sk#5] + +(4) CometFilter +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(5) CometBroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: [i_item_sk#5] + +(6) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [i_item_sk#5] +Arguments: [inv_item_sk#1], [i_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5], [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] + +(8) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(9) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(11) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#6], Inner, BuildRight + +(12) CometProject +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7], [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(14) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#8)) + +(15) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10], [d_date_sk#8, d_moy#10] + +(16) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10] + +(17) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Right output [2]: [d_date_sk#8, d_moy#10] +Arguments: [inv_date_sk#4], [d_date_sk#8], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_date_sk#8, d_moy#10] +Arguments: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10], [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] + +(19) CometHashAggregate +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] + +(20) CometExchange +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#11, avg#12, m2#13, sum#14, count#15] +Arguments: hashpartitioning(w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#11, avg#12, m2#13, sum#14, count#15] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] + +(22) CometFilter +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#16, mean#17] +Condition : CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))))) > 1.0) END + +(23) CometProject +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#16, mean#17] +Arguments: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18], [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN null ELSE (stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))) END AS cov#18] + +(24) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Arguments: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] + +(25) CometFilter +Input [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Condition : (isnotnull(inv_item_sk#19) AND isnotnull(inv_warehouse_sk#20)) + +(26) ReusedExchange [Reuses operator id: 5] +Output [1]: [i_item_sk#23] + +(27) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Right output [1]: [i_item_sk#23] +Arguments: [inv_item_sk#19], [i_item_sk#23], Inner, BuildRight + +(28) CometProject +Input [5]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] +Arguments: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23], [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] + +(29) ReusedExchange [Reuses operator id: 10] +Output [2]: [w_warehouse_sk#24, w_warehouse_name#25] + +(30) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] +Right output [2]: [w_warehouse_sk#24, w_warehouse_name#25] +Arguments: [inv_warehouse_sk#20], [w_warehouse_sk#24], Inner, BuildRight + +(31) CometProject +Input [6]: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] +Arguments: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25], [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] + +(32) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#26, d_year#27, d_moy#28] +Arguments: [d_date_sk#26, d_year#27, d_moy#28] + +(33) CometFilter +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] +Condition : ((((isnotnull(d_year#27) AND isnotnull(d_moy#28)) AND (d_year#27 = 2001)) AND (d_moy#28 = 2)) AND isnotnull(d_date_sk#26)) + +(34) CometProject +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] +Arguments: [d_date_sk#26, d_moy#28], [d_date_sk#26, d_moy#28] + +(35) CometBroadcastExchange +Input [2]: [d_date_sk#26, d_moy#28] +Arguments: [d_date_sk#26, d_moy#28] + +(36) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] +Right output [2]: [d_date_sk#26, d_moy#28] +Arguments: [inv_date_sk#22], [d_date_sk#26], Inner, BuildRight + +(37) CometProject +Input [7]: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_date_sk#26, d_moy#28] +Arguments: [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28], [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28] + +(38) CometHashAggregate +Input [5]: [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28] +Keys [4]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#21 as double)), partial_avg(inv_quantity_on_hand#21)] + +(39) CometExchange +Input [9]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, n#29, avg#30, m2#31, sum#32, count#33] +Arguments: hashpartitioning(w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(40) CometHashAggregate +Input [9]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, n#29, avg#30, m2#31, sum#32, count#33] +Keys [4]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#21 as double)), avg(inv_quantity_on_hand#21)] + +(41) CometFilter +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, stdev#16, mean#17] +Condition : CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))))) > 1.0) END + +(42) CometProject +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, stdev#16, mean#17] +Arguments: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35], [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#17 AS mean#34, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN null ELSE (stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))) END AS cov#35] + +(43) CometBroadcastExchange +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] + +(44) CometBroadcastHashJoin +Left output [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18] +Right output [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [i_item_sk#5, w_warehouse_sk#6], [i_item_sk#23, w_warehouse_sk#24], Inner, BuildRight + +(45) CometColumnarExchange +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: rangepartitioning(w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#17 ASC NULLS FIRST, cov#18 ASC NULLS FIRST, d_moy#28 ASC NULLS FIRST, mean#34 ASC NULLS FIRST, cov#35 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=3] + +(46) CometSort +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35], [w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#17 ASC NULLS FIRST, cov#18 ASC NULLS FIRST, d_moy#28 ASC NULLS FIRST, mean#34 ASC NULLS FIRST, cov#35 ASC NULLS FIRST] + +(47) ColumnarToRow [codegen id : 1] +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..39d6ad8918 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_datafusion/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometColumnarExchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + CometBroadcastHashJoin [w_warehouse_sk,i_item_sk,d_moy,mean,cov,w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometProject [stdev] [w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometFilter [w_warehouse_sk,i_item_sk,d_moy,stdev,mean] + CometHashAggregate [w_warehouse_sk,i_item_sk,d_moy,stdev,mean,w_warehouse_name,n,avg,m2,sum,count,stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand)] + CometExchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + CometHashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [i_item_sk] #3 + CometFilter [i_item_sk] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [d_date_sk,d_moy] #5 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov] #6 + CometProject [mean,stdev] [w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometFilter [w_warehouse_sk,i_item_sk,d_moy,stdev,mean] + CometHashAggregate [w_warehouse_sk,i_item_sk,d_moy,stdev,mean,w_warehouse_name,n,avg,m2,sum,count,stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand)] + CometExchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + CometHashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + ReusedExchange [i_item_sk] #3 + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + CometBroadcastExchange [d_date_sk,d_moy] #8 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..9a05531d17 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_iceberg_compat/explain.txt @@ -0,0 +1,266 @@ +== Physical Plan == +* ColumnarToRow (47) ++- CometSort (46) + +- CometColumnarExchange (45) + +- CometBroadcastHashJoin (44) + :- CometProject (23) + : +- CometFilter (22) + : +- CometHashAggregate (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.inventory (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.item (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometScan parquet spark_catalog.default.warehouse (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometScan parquet spark_catalog.default.date_dim (13) + +- CometBroadcastExchange (43) + +- CometProject (42) + +- CometFilter (41) + +- CometHashAggregate (40) + +- CometExchange (39) + +- CometHashAggregate (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (31) + : +- CometBroadcastHashJoin (30) + : :- CometProject (28) + : : +- CometBroadcastHashJoin (27) + : : :- CometFilter (25) + : : : +- CometScan parquet spark_catalog.default.inventory (24) + : : +- ReusedExchange (26) + : +- ReusedExchange (29) + +- CometBroadcastExchange (35) + +- CometProject (34) + +- CometFilter (33) + +- CometScan parquet spark_catalog.default.date_dim (32) + + +(1) CometScan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometScan parquet spark_catalog.default.item +Output [1]: [i_item_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(5) CometBroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: [i_item_sk#5] + +(6) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [i_item_sk#5] +Arguments: [inv_item_sk#1], [i_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5], [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] + +(8) CometScan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(11) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#6], Inner, BuildRight + +(12) CometProject +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7], [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#8)) + +(15) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10], [d_date_sk#8, d_moy#10] + +(16) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10] + +(17) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Right output [2]: [d_date_sk#8, d_moy#10] +Arguments: [inv_date_sk#4], [d_date_sk#8], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_date_sk#8, d_moy#10] +Arguments: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10], [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] + +(19) CometHashAggregate +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] + +(20) CometExchange +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#11, avg#12, m2#13, sum#14, count#15] +Arguments: hashpartitioning(w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#11, avg#12, m2#13, sum#14, count#15] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] + +(22) CometFilter +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#16, mean#17] +Condition : CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))))) > 1.0) END + +(23) CometProject +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#16, mean#17] +Arguments: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18], [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN null ELSE (stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))) END AS cov#18] + +(24) CometScan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#22)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(25) CometFilter +Input [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Condition : (isnotnull(inv_item_sk#19) AND isnotnull(inv_warehouse_sk#20)) + +(26) ReusedExchange [Reuses operator id: 5] +Output [1]: [i_item_sk#23] + +(27) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Right output [1]: [i_item_sk#23] +Arguments: [inv_item_sk#19], [i_item_sk#23], Inner, BuildRight + +(28) CometProject +Input [5]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] +Arguments: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23], [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] + +(29) ReusedExchange [Reuses operator id: 10] +Output [2]: [w_warehouse_sk#24, w_warehouse_name#25] + +(30) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] +Right output [2]: [w_warehouse_sk#24, w_warehouse_name#25] +Arguments: [inv_warehouse_sk#20], [w_warehouse_sk#24], Inner, BuildRight + +(31) CometProject +Input [6]: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] +Arguments: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25], [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] + +(32) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#26, d_year#27, d_moy#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(33) CometFilter +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] +Condition : ((((isnotnull(d_year#27) AND isnotnull(d_moy#28)) AND (d_year#27 = 2001)) AND (d_moy#28 = 2)) AND isnotnull(d_date_sk#26)) + +(34) CometProject +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] +Arguments: [d_date_sk#26, d_moy#28], [d_date_sk#26, d_moy#28] + +(35) CometBroadcastExchange +Input [2]: [d_date_sk#26, d_moy#28] +Arguments: [d_date_sk#26, d_moy#28] + +(36) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] +Right output [2]: [d_date_sk#26, d_moy#28] +Arguments: [inv_date_sk#22], [d_date_sk#26], Inner, BuildRight + +(37) CometProject +Input [7]: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_date_sk#26, d_moy#28] +Arguments: [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28], [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28] + +(38) CometHashAggregate +Input [5]: [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28] +Keys [4]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#21 as double)), partial_avg(inv_quantity_on_hand#21)] + +(39) CometExchange +Input [9]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, n#29, avg#30, m2#31, sum#32, count#33] +Arguments: hashpartitioning(w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(40) CometHashAggregate +Input [9]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, n#29, avg#30, m2#31, sum#32, count#33] +Keys [4]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#21 as double)), avg(inv_quantity_on_hand#21)] + +(41) CometFilter +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, stdev#16, mean#17] +Condition : CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))))) > 1.0) END + +(42) CometProject +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, stdev#16, mean#17] +Arguments: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35], [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#17 AS mean#34, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN null ELSE (stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))) END AS cov#35] + +(43) CometBroadcastExchange +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] + +(44) CometBroadcastHashJoin +Left output [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18] +Right output [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [i_item_sk#5, w_warehouse_sk#6], [i_item_sk#23, w_warehouse_sk#24], Inner, BuildRight + +(45) CometColumnarExchange +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: rangepartitioning(w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#17 ASC NULLS FIRST, cov#18 ASC NULLS FIRST, d_moy#28 ASC NULLS FIRST, mean#34 ASC NULLS FIRST, cov#35 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=3] + +(46) CometSort +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35], [w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#17 ASC NULLS FIRST, cov#18 ASC NULLS FIRST, d_moy#28 ASC NULLS FIRST, mean#34 ASC NULLS FIRST, cov#35 ASC NULLS FIRST] + +(47) ColumnarToRow [codegen id : 1] +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a64be6d73e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39a.native_iceberg_compat/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometColumnarExchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + CometBroadcastHashJoin [w_warehouse_sk,i_item_sk,d_moy,mean,cov,w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometProject [stdev] [w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometFilter [w_warehouse_sk,i_item_sk,d_moy,stdev,mean] + CometHashAggregate [w_warehouse_sk,i_item_sk,d_moy,stdev,mean,w_warehouse_name,n,avg,m2,sum,count,stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand)] + CometExchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + CometHashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [i_item_sk] #3 + CometFilter [i_item_sk] + CometScan parquet spark_catalog.default.item [i_item_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [d_date_sk,d_moy] #5 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov] #6 + CometProject [mean,stdev] [w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometFilter [w_warehouse_sk,i_item_sk,d_moy,stdev,mean] + CometHashAggregate [w_warehouse_sk,i_item_sk,d_moy,stdev,mean,w_warehouse_name,n,avg,m2,sum,count,stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand)] + CometExchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + CometHashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + ReusedExchange [i_item_sk] #3 + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + CometBroadcastExchange [d_date_sk,d_moy] #8 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_datafusion/explain.txt new file mode 100644 index 0000000000..90780b4060 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_datafusion/explain.txt @@ -0,0 +1,246 @@ +== Physical Plan == +* ColumnarToRow (47) ++- CometSort (46) + +- CometColumnarExchange (45) + +- CometBroadcastHashJoin (44) + :- CometProject (23) + : +- CometFilter (22) + : +- CometHashAggregate (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + +- CometBroadcastExchange (43) + +- CometProject (42) + +- CometFilter (41) + +- CometHashAggregate (40) + +- CometExchange (39) + +- CometHashAggregate (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (31) + : +- CometBroadcastHashJoin (30) + : :- CometProject (28) + : : +- CometBroadcastHashJoin (27) + : : :- CometFilter (25) + : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (24) + : : +- ReusedExchange (26) + : +- ReusedExchange (29) + +- CometBroadcastExchange (35) + +- CometProject (34) + +- CometFilter (33) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (32) + + +(1) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [1]: [i_item_sk#5] +Arguments: [i_item_sk#5] + +(4) CometFilter +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(5) CometBroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: [i_item_sk#5] + +(6) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [i_item_sk#5] +Arguments: [inv_item_sk#1], [i_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5], [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] + +(8) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(9) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(11) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#6], Inner, BuildRight + +(12) CometProject +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7], [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(14) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#8)) + +(15) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10], [d_date_sk#8, d_moy#10] + +(16) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10] + +(17) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Right output [2]: [d_date_sk#8, d_moy#10] +Arguments: [inv_date_sk#4], [d_date_sk#8], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_date_sk#8, d_moy#10] +Arguments: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10], [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] + +(19) CometHashAggregate +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] + +(20) CometExchange +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#11, avg#12, m2#13, sum#14, count#15] +Arguments: hashpartitioning(w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#11, avg#12, m2#13, sum#14, count#15] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] + +(22) CometFilter +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#16, mean#17] +Condition : (CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))))) > 1.0) END AND CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))))) > 1.5) END) + +(23) CometProject +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#16, mean#17] +Arguments: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18], [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN null ELSE (stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))) END AS cov#18] + +(24) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Arguments: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] + +(25) CometFilter +Input [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Condition : (isnotnull(inv_item_sk#19) AND isnotnull(inv_warehouse_sk#20)) + +(26) ReusedExchange [Reuses operator id: 5] +Output [1]: [i_item_sk#23] + +(27) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Right output [1]: [i_item_sk#23] +Arguments: [inv_item_sk#19], [i_item_sk#23], Inner, BuildRight + +(28) CometProject +Input [5]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] +Arguments: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23], [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] + +(29) ReusedExchange [Reuses operator id: 10] +Output [2]: [w_warehouse_sk#24, w_warehouse_name#25] + +(30) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] +Right output [2]: [w_warehouse_sk#24, w_warehouse_name#25] +Arguments: [inv_warehouse_sk#20], [w_warehouse_sk#24], Inner, BuildRight + +(31) CometProject +Input [6]: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] +Arguments: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25], [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] + +(32) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#26, d_year#27, d_moy#28] +Arguments: [d_date_sk#26, d_year#27, d_moy#28] + +(33) CometFilter +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] +Condition : ((((isnotnull(d_year#27) AND isnotnull(d_moy#28)) AND (d_year#27 = 2001)) AND (d_moy#28 = 2)) AND isnotnull(d_date_sk#26)) + +(34) CometProject +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] +Arguments: [d_date_sk#26, d_moy#28], [d_date_sk#26, d_moy#28] + +(35) CometBroadcastExchange +Input [2]: [d_date_sk#26, d_moy#28] +Arguments: [d_date_sk#26, d_moy#28] + +(36) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] +Right output [2]: [d_date_sk#26, d_moy#28] +Arguments: [inv_date_sk#22], [d_date_sk#26], Inner, BuildRight + +(37) CometProject +Input [7]: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_date_sk#26, d_moy#28] +Arguments: [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28], [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28] + +(38) CometHashAggregate +Input [5]: [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28] +Keys [4]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#21 as double)), partial_avg(inv_quantity_on_hand#21)] + +(39) CometExchange +Input [9]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, n#29, avg#30, m2#31, sum#32, count#33] +Arguments: hashpartitioning(w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(40) CometHashAggregate +Input [9]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, n#29, avg#30, m2#31, sum#32, count#33] +Keys [4]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#21 as double)), avg(inv_quantity_on_hand#21)] + +(41) CometFilter +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, stdev#16, mean#17] +Condition : CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))))) > 1.0) END + +(42) CometProject +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, stdev#16, mean#17] +Arguments: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35], [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#17 AS mean#34, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN null ELSE (stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))) END AS cov#35] + +(43) CometBroadcastExchange +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] + +(44) CometBroadcastHashJoin +Left output [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18] +Right output [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [i_item_sk#5, w_warehouse_sk#6], [i_item_sk#23, w_warehouse_sk#24], Inner, BuildRight + +(45) CometColumnarExchange +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: rangepartitioning(w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#17 ASC NULLS FIRST, cov#18 ASC NULLS FIRST, d_moy#28 ASC NULLS FIRST, mean#34 ASC NULLS FIRST, cov#35 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=3] + +(46) CometSort +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35], [w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#17 ASC NULLS FIRST, cov#18 ASC NULLS FIRST, d_moy#28 ASC NULLS FIRST, mean#34 ASC NULLS FIRST, cov#35 ASC NULLS FIRST] + +(47) ColumnarToRow [codegen id : 1] +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..39d6ad8918 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_datafusion/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometColumnarExchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + CometBroadcastHashJoin [w_warehouse_sk,i_item_sk,d_moy,mean,cov,w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometProject [stdev] [w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometFilter [w_warehouse_sk,i_item_sk,d_moy,stdev,mean] + CometHashAggregate [w_warehouse_sk,i_item_sk,d_moy,stdev,mean,w_warehouse_name,n,avg,m2,sum,count,stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand)] + CometExchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + CometHashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [i_item_sk] #3 + CometFilter [i_item_sk] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [d_date_sk,d_moy] #5 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov] #6 + CometProject [mean,stdev] [w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometFilter [w_warehouse_sk,i_item_sk,d_moy,stdev,mean] + CometHashAggregate [w_warehouse_sk,i_item_sk,d_moy,stdev,mean,w_warehouse_name,n,avg,m2,sum,count,stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand)] + CometExchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + CometHashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + ReusedExchange [i_item_sk] #3 + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + CometBroadcastExchange [d_date_sk,d_moy] #8 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f558157e65 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_iceberg_compat/explain.txt @@ -0,0 +1,266 @@ +== Physical Plan == +* ColumnarToRow (47) ++- CometSort (46) + +- CometColumnarExchange (45) + +- CometBroadcastHashJoin (44) + :- CometProject (23) + : +- CometFilter (22) + : +- CometHashAggregate (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.inventory (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.item (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometScan parquet spark_catalog.default.warehouse (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometScan parquet spark_catalog.default.date_dim (13) + +- CometBroadcastExchange (43) + +- CometProject (42) + +- CometFilter (41) + +- CometHashAggregate (40) + +- CometExchange (39) + +- CometHashAggregate (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (31) + : +- CometBroadcastHashJoin (30) + : :- CometProject (28) + : : +- CometBroadcastHashJoin (27) + : : :- CometFilter (25) + : : : +- CometScan parquet spark_catalog.default.inventory (24) + : : +- ReusedExchange (26) + : +- ReusedExchange (29) + +- CometBroadcastExchange (35) + +- CometProject (34) + +- CometFilter (33) + +- CometScan parquet spark_catalog.default.date_dim (32) + + +(1) CometScan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometScan parquet spark_catalog.default.item +Output [1]: [i_item_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(5) CometBroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: [i_item_sk#5] + +(6) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [i_item_sk#5] +Arguments: [inv_item_sk#1], [i_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5], [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] + +(8) CometScan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(11) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#6], Inner, BuildRight + +(12) CometProject +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7], [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#8)) + +(15) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10], [d_date_sk#8, d_moy#10] + +(16) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10] + +(17) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Right output [2]: [d_date_sk#8, d_moy#10] +Arguments: [inv_date_sk#4], [d_date_sk#8], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_date_sk#8, d_moy#10] +Arguments: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10], [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] + +(19) CometHashAggregate +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] + +(20) CometExchange +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#11, avg#12, m2#13, sum#14, count#15] +Arguments: hashpartitioning(w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#11, avg#12, m2#13, sum#14, count#15] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] + +(22) CometFilter +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#16, mean#17] +Condition : (CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))))) > 1.0) END AND CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))))) > 1.5) END) + +(23) CometProject +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#16, mean#17] +Arguments: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18], [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN null ELSE (stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))) END AS cov#18] + +(24) CometScan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#22)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(25) CometFilter +Input [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Condition : (isnotnull(inv_item_sk#19) AND isnotnull(inv_warehouse_sk#20)) + +(26) ReusedExchange [Reuses operator id: 5] +Output [1]: [i_item_sk#23] + +(27) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22] +Right output [1]: [i_item_sk#23] +Arguments: [inv_item_sk#19], [i_item_sk#23], Inner, BuildRight + +(28) CometProject +Input [5]: [inv_item_sk#19, inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] +Arguments: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23], [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] + +(29) ReusedExchange [Reuses operator id: 10] +Output [2]: [w_warehouse_sk#24, w_warehouse_name#25] + +(30) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23] +Right output [2]: [w_warehouse_sk#24, w_warehouse_name#25] +Arguments: [inv_warehouse_sk#20], [w_warehouse_sk#24], Inner, BuildRight + +(31) CometProject +Input [6]: [inv_warehouse_sk#20, inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] +Arguments: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25], [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] + +(32) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#26, d_year#27, d_moy#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(33) CometFilter +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] +Condition : ((((isnotnull(d_year#27) AND isnotnull(d_moy#28)) AND (d_year#27 = 2001)) AND (d_moy#28 = 2)) AND isnotnull(d_date_sk#26)) + +(34) CometProject +Input [3]: [d_date_sk#26, d_year#27, d_moy#28] +Arguments: [d_date_sk#26, d_moy#28], [d_date_sk#26, d_moy#28] + +(35) CometBroadcastExchange +Input [2]: [d_date_sk#26, d_moy#28] +Arguments: [d_date_sk#26, d_moy#28] + +(36) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25] +Right output [2]: [d_date_sk#26, d_moy#28] +Arguments: [inv_date_sk#22], [d_date_sk#26], Inner, BuildRight + +(37) CometProject +Input [7]: [inv_quantity_on_hand#21, inv_date_sk#22, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_date_sk#26, d_moy#28] +Arguments: [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28], [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28] + +(38) CometHashAggregate +Input [5]: [inv_quantity_on_hand#21, i_item_sk#23, w_warehouse_sk#24, w_warehouse_name#25, d_moy#28] +Keys [4]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#21 as double)), partial_avg(inv_quantity_on_hand#21)] + +(39) CometExchange +Input [9]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, n#29, avg#30, m2#31, sum#32, count#33] +Arguments: hashpartitioning(w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(40) CometHashAggregate +Input [9]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28, n#29, avg#30, m2#31, sum#32, count#33] +Keys [4]: [w_warehouse_name#25, w_warehouse_sk#24, i_item_sk#23, d_moy#28] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#21 as double)), avg(inv_quantity_on_hand#21)] + +(41) CometFilter +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, stdev#16, mean#17] +Condition : CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))))) > 1.0) END + +(42) CometProject +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, stdev#16, mean#17] +Arguments: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35], [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#17 AS mean#34, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#17)) = 0.0) THEN null ELSE (stdev#16 / knownfloatingpointnormalized(normalizenanandzero(mean#17))) END AS cov#35] + +(43) CometBroadcastExchange +Input [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] + +(44) CometBroadcastHashJoin +Left output [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18] +Right output [5]: [w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [i_item_sk#5, w_warehouse_sk#6], [i_item_sk#23, w_warehouse_sk#24], Inner, BuildRight + +(45) CometColumnarExchange +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: rangepartitioning(w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#17 ASC NULLS FIRST, cov#18 ASC NULLS FIRST, d_moy#28 ASC NULLS FIRST, mean#34 ASC NULLS FIRST, cov#35 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=3] + +(46) CometSort +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] +Arguments: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35], [w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#17 ASC NULLS FIRST, cov#18 ASC NULLS FIRST, d_moy#28 ASC NULLS FIRST, mean#34 ASC NULLS FIRST, cov#35 ASC NULLS FIRST] + +(47) ColumnarToRow [codegen id : 1] +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#17, cov#18, w_warehouse_sk#24, i_item_sk#23, d_moy#28, mean#34, cov#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a64be6d73e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q39b.native_iceberg_compat/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometColumnarExchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + CometBroadcastHashJoin [w_warehouse_sk,i_item_sk,d_moy,mean,cov,w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometProject [stdev] [w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometFilter [w_warehouse_sk,i_item_sk,d_moy,stdev,mean] + CometHashAggregate [w_warehouse_sk,i_item_sk,d_moy,stdev,mean,w_warehouse_name,n,avg,m2,sum,count,stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand)] + CometExchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + CometHashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [i_item_sk] #3 + CometFilter [i_item_sk] + CometScan parquet spark_catalog.default.item [i_item_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [d_date_sk,d_moy] #5 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov] #6 + CometProject [mean,stdev] [w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometFilter [w_warehouse_sk,i_item_sk,d_moy,stdev,mean] + CometHashAggregate [w_warehouse_sk,i_item_sk,d_moy,stdev,mean,w_warehouse_name,n,avg,m2,sum,count,stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand)] + CometExchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + CometHashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + ReusedExchange [i_item_sk] #3 + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + CometBroadcastExchange [d_date_sk,d_moy] #8 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_datafusion/explain.txt new file mode 100644 index 0000000000..96845f708b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_datafusion/explain.txt @@ -0,0 +1,262 @@ +== Physical Plan == +* ColumnarToRow (50) ++- CometTakeOrderedAndProject (49) + +- CometProject (48) + +- CometBroadcastHashJoin (47) + :- CometProject (45) + : +- CometBroadcastHashJoin (44) + : :- CometProject (42) + : : +- CometBroadcastHashJoin (41) + : : :- CometProject (37) + : : : +- CometBroadcastHashJoin (36) + : : : :- CometBroadcastHashJoin (31) + : : : : :- CometFilter (16) + : : : : : +- CometHashAggregate (15) + : : : : : +- CometExchange (14) + : : : : : +- CometHashAggregate (13) + : : : : : +- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : : : +- CometBroadcastExchange (30) + : : : : +- CometHashAggregate (29) + : : : : +- CometExchange (28) + : : : : +- CometHashAggregate (27) + : : : : +- CometProject (26) + : : : : +- CometBroadcastHashJoin (25) + : : : : :- CometProject (21) + : : : : : +- CometBroadcastHashJoin (20) + : : : : : :- CometFilter (18) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (17) + : : : : : +- ReusedExchange (19) + : : : : +- CometBroadcastExchange (24) + : : : : +- CometFilter (23) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (22) + : : : +- CometBroadcastExchange (35) + : : : +- CometFilter (34) + : : : +- CometHashAggregate (33) + : : : +- ReusedExchange (32) + : : +- CometBroadcastExchange (40) + : : +- CometHashAggregate (39) + : : +- ReusedExchange (38) + : +- ReusedExchange (43) + +- ReusedExchange (46) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Arguments: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(2) CometFilter +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(4) CometFilter +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Condition : isnotnull(ss_customer_sk#9) + +(5) CometBroadcastExchange +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(6) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Right output [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [c_customer_sk#1], [ss_customer_sk#9], Inner, BuildRight + +(7) CometProject +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15, d_year#16] + +(9) CometFilter +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15, d_year#16] + +(11) CometBroadcastHashJoin +Left output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Right output [2]: [d_date_sk#15, d_year#16] +Arguments: [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + +(12) CometProject +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14, d_date_sk#15, d_year#16] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] + +(13) CometHashAggregate +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16] +Functions [1]: [partial_sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))] + +(14) CometExchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#17, isEmpty#18] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#17, isEmpty#18] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16] +Functions [1]: [sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))] + +(16) CometFilter +Input [2]: [customer_id#19, year_total#20] +Condition : (isnotnull(year_total#20) AND (year_total#20 > 0.000000)) + +(17) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Arguments: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] + +(18) CometFilter +Input [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_customer_id#22)) + +(19) ReusedExchange [Reuses operator id: 5] +Output [6]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] + +(20) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Right output [6]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Arguments: [c_customer_sk#21], [ss_customer_sk#29], Inner, BuildRight + +(21) CometProject +Input [14]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Arguments: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34], [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] + +(22) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#35, d_year#36] +Arguments: [d_date_sk#35, d_year#36] + +(23) CometFilter +Input [2]: [d_date_sk#35, d_year#36] +Condition : ((isnotnull(d_year#36) AND (d_year#36 = 2002)) AND isnotnull(d_date_sk#35)) + +(24) CometBroadcastExchange +Input [2]: [d_date_sk#35, d_year#36] +Arguments: [d_date_sk#35, d_year#36] + +(25) CometBroadcastHashJoin +Left output [12]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Right output [2]: [d_date_sk#35, d_year#36] +Arguments: [ss_sold_date_sk#34], [d_date_sk#35], Inner, BuildRight + +(26) CometProject +Input [14]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34, d_date_sk#35, d_year#36] +Arguments: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#36], [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#36] + +(27) CometHashAggregate +Input [12]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#36] +Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, d_year#36] +Functions [1]: [partial_sum(((((ss_ext_list_price#33 - ss_ext_wholesale_cost#32) - ss_ext_discount_amt#30) + ss_ext_sales_price#31) / 2))] + +(28) CometExchange +Input [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, d_year#36, sum#37, isEmpty#38] +Arguments: hashpartitioning(c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, d_year#36, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, d_year#36, sum#37, isEmpty#38] +Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, d_year#36] +Functions [1]: [sum(((((ss_ext_list_price#33 - ss_ext_wholesale_cost#32) - ss_ext_discount_amt#30) + ss_ext_sales_price#31) / 2))] + +(30) CometBroadcastExchange +Input [8]: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46] +Arguments: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46] + +(31) CometBroadcastHashJoin +Left output [2]: [customer_id#19, year_total#20] +Right output [8]: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46] +Arguments: [customer_id#19], [customer_id#39], Inner, BuildRight + +(32) ReusedExchange [Reuses operator id: 14] +Output [10]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#54, sum#55, isEmpty#56] + +(33) CometHashAggregate +Input [10]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#54, sum#55, isEmpty#56] +Keys [8]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#54] +Functions [1]: [sum(((((cs_ext_list_price#57 - cs_ext_wholesale_cost#58) - cs_ext_discount_amt#59) + cs_ext_sales_price#60) / 2))] + +(34) CometFilter +Input [2]: [customer_id#61, year_total#62] +Condition : (isnotnull(year_total#62) AND (year_total#62 > 0.000000)) + +(35) CometBroadcastExchange +Input [2]: [customer_id#61, year_total#62] +Arguments: [customer_id#61, year_total#62] + +(36) CometBroadcastHashJoin +Left output [10]: [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46] +Right output [2]: [customer_id#61, year_total#62] +Arguments: [customer_id#19], [customer_id#61], Inner, BuildRight + +(37) CometProject +Input [12]: [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46, customer_id#61, year_total#62] +Arguments: [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46, year_total#62], [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46, year_total#62] + +(38) ReusedExchange [Reuses operator id: 28] +Output [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#70, sum#71, isEmpty#72] + +(39) CometHashAggregate +Input [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#70, sum#71, isEmpty#72] +Keys [8]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#70] +Functions [1]: [sum(((((cs_ext_list_price#73 - cs_ext_wholesale_cost#74) - cs_ext_discount_amt#75) + cs_ext_sales_price#76) / 2))] + +(40) CometBroadcastExchange +Input [2]: [customer_id#77, year_total#78] +Arguments: [customer_id#77, year_total#78] + +(41) CometBroadcastHashJoin +Left output [11]: [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46, year_total#62] +Right output [2]: [customer_id#77, year_total#78] +Arguments: [customer_id#19], [customer_id#77], Inner, (CASE WHEN (year_total#62 > 0.000000) THEN (year_total#78 / year_total#62) END > CASE WHEN (year_total#20 > 0.000000) THEN (year_total#46 / year_total#20) END), BuildRight + +(42) CometProject +Input [13]: [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46, year_total#62, customer_id#77, year_total#78] +Arguments: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#62, year_total#78], [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#62, year_total#78] + +(43) ReusedExchange [Reuses operator id: 35] +Output [2]: [customer_id#79, year_total#80] + +(44) CometBroadcastHashJoin +Left output [10]: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#62, year_total#78] +Right output [2]: [customer_id#79, year_total#80] +Arguments: [customer_id#19], [customer_id#79], Inner, BuildRight + +(45) CometProject +Input [12]: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#62, year_total#78, customer_id#79, year_total#80] +Arguments: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#62, year_total#78, year_total#80], [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#62, year_total#78, year_total#80] + +(46) ReusedExchange [Reuses operator id: 40] +Output [2]: [customer_id#81, year_total#82] + +(47) CometBroadcastHashJoin +Left output [11]: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#62, year_total#78, year_total#80] +Right output [2]: [customer_id#81, year_total#82] +Arguments: [customer_id#19], [customer_id#81], Inner, (CASE WHEN (year_total#62 > 0.000000) THEN (year_total#78 / year_total#62) END > CASE WHEN (year_total#80 > 0.000000) THEN (year_total#82 / year_total#80) END), BuildRight + +(48) CometProject +Input [13]: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#62, year_total#78, year_total#80, customer_id#81, year_total#82] +Arguments: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45], [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45] + +(49) CometTakeOrderedAndProject +Input [7]: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[customer_id#39 ASC NULLS FIRST,customer_first_name#40 ASC NULLS FIRST,customer_last_name#41 ASC NULLS FIRST,customer_preferred_cust_flag#42 ASC NULLS FIRST,customer_birth_country#43 ASC NULLS FIRST,customer_login#44 ASC NULLS FIRST,customer_email_address#45 ASC NULLS FIRST], output=[customer_id#39,customer_first_name#40,customer_last_name#41,customer_preferred_cust_flag#42,customer_birth_country#43,customer_login#44,customer_email_address#45]), [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45], 100, [customer_id#39 ASC NULLS FIRST, customer_first_name#40 ASC NULLS FIRST, customer_last_name#41 ASC NULLS FIRST, customer_preferred_cust_flag#42 ASC NULLS FIRST, customer_birth_country#43 ASC NULLS FIRST, customer_login#44 ASC NULLS FIRST, customer_email_address#45 ASC NULLS FIRST], [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45] + +(50) ColumnarToRow [codegen id : 1] +Input [7]: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_datafusion/simplified.txt new file mode 100644 index 0000000000..7754033520 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_datafusion/simplified.txt @@ -0,0 +1,52 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + CometProject [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + CometBroadcastHashJoin [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,year_total,customer_id,year_total] + CometProject [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,year_total] + CometBroadcastHashJoin [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,customer_id,year_total] + CometProject [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,customer_id,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total] + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #1 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total] #4 + CometHashAggregate [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #5 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [customer_id,year_total] #7 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2))] + ReusedExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] #1 + CometBroadcastExchange [customer_id,year_total] #8 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2))] + ReusedExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] #5 + ReusedExchange [customer_id,year_total] #7 + ReusedExchange [customer_id,year_total] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ed0d569680 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_iceberg_compat/explain.txt @@ -0,0 +1,535 @@ +== Physical Plan == +* ColumnarToRow (95) ++- CometTakeOrderedAndProject (94) + +- CometProject (93) + +- CometBroadcastHashJoin (92) + :- CometProject (79) + : +- CometBroadcastHashJoin (78) + : :- CometProject (62) + : : +- CometBroadcastHashJoin (61) + : : :- CometProject (48) + : : : +- CometBroadcastHashJoin (47) + : : : :- CometBroadcastHashJoin (31) + : : : : :- CometFilter (16) + : : : : : +- CometHashAggregate (15) + : : : : : +- CometExchange (14) + : : : : : +- CometHashAggregate (13) + : : : : : +- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometScan parquet spark_catalog.default.store_sales (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : : : +- CometBroadcastExchange (30) + : : : : +- CometHashAggregate (29) + : : : : +- CometExchange (28) + : : : : +- CometHashAggregate (27) + : : : : +- CometProject (26) + : : : : +- CometBroadcastHashJoin (25) + : : : : :- CometProject (21) + : : : : : +- CometBroadcastHashJoin (20) + : : : : : :- CometFilter (18) + : : : : : : +- CometScan parquet spark_catalog.default.customer (17) + : : : : : +- ReusedExchange (19) + : : : : +- CometBroadcastExchange (24) + : : : : +- CometFilter (23) + : : : : +- CometScan parquet spark_catalog.default.date_dim (22) + : : : +- CometBroadcastExchange (46) + : : : +- CometFilter (45) + : : : +- CometHashAggregate (44) + : : : +- CometExchange (43) + : : : +- CometHashAggregate (42) + : : : +- CometProject (41) + : : : +- CometBroadcastHashJoin (40) + : : : :- CometProject (38) + : : : : +- CometBroadcastHashJoin (37) + : : : : :- CometFilter (33) + : : : : : +- CometScan parquet spark_catalog.default.customer (32) + : : : : +- CometBroadcastExchange (36) + : : : : +- CometFilter (35) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (34) + : : : +- ReusedExchange (39) + : : +- CometBroadcastExchange (60) + : : +- CometHashAggregate (59) + : : +- CometExchange (58) + : : +- CometHashAggregate (57) + : : +- CometProject (56) + : : +- CometBroadcastHashJoin (55) + : : :- CometProject (53) + : : : +- CometBroadcastHashJoin (52) + : : : :- CometFilter (50) + : : : : +- CometScan parquet spark_catalog.default.customer (49) + : : : +- ReusedExchange (51) + : : +- ReusedExchange (54) + : +- CometBroadcastExchange (77) + : +- CometFilter (76) + : +- CometHashAggregate (75) + : +- CometExchange (74) + : +- CometHashAggregate (73) + : +- CometProject (72) + : +- CometBroadcastHashJoin (71) + : :- CometProject (69) + : : +- CometBroadcastHashJoin (68) + : : :- CometFilter (64) + : : : +- CometScan parquet spark_catalog.default.customer (63) + : : +- CometBroadcastExchange (67) + : : +- CometFilter (66) + : : +- CometScan parquet spark_catalog.default.web_sales (65) + : +- ReusedExchange (70) + +- CometBroadcastExchange (91) + +- CometHashAggregate (90) + +- CometExchange (89) + +- CometHashAggregate (88) + +- CometProject (87) + +- CometBroadcastHashJoin (86) + :- CometProject (84) + : +- CometBroadcastHashJoin (83) + : :- CometFilter (81) + : : +- CometScan parquet spark_catalog.default.customer (80) + : +- ReusedExchange (82) + +- ReusedExchange (85) + + +(1) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#14)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(4) CometFilter +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Condition : isnotnull(ss_customer_sk#9) + +(5) CometBroadcastExchange +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(6) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Right output [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [c_customer_sk#1], [ss_customer_sk#9], Inner, BuildRight + +(7) CometProject +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15, d_year#16] + +(11) CometBroadcastHashJoin +Left output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Right output [2]: [d_date_sk#15, d_year#16] +Arguments: [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + +(12) CometProject +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14, d_date_sk#15, d_year#16] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] + +(13) CometHashAggregate +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16] +Functions [1]: [partial_sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))] + +(14) CometExchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#17, isEmpty#18] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#17, isEmpty#18] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16] +Functions [1]: [sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))] + +(16) CometFilter +Input [2]: [customer_id#19, year_total#20] +Condition : (isnotnull(year_total#20) AND (year_total#20 > 0.000000)) + +(17) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(18) CometFilter +Input [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_customer_id#22)) + +(19) ReusedExchange [Reuses operator id: 5] +Output [6]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] + +(20) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Right output [6]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Arguments: [c_customer_sk#21], [ss_customer_sk#29], Inner, BuildRight + +(21) CometProject +Input [14]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Arguments: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34], [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] + +(22) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#35, d_year#36] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) CometFilter +Input [2]: [d_date_sk#35, d_year#36] +Condition : ((isnotnull(d_year#36) AND (d_year#36 = 2002)) AND isnotnull(d_date_sk#35)) + +(24) CometBroadcastExchange +Input [2]: [d_date_sk#35, d_year#36] +Arguments: [d_date_sk#35, d_year#36] + +(25) CometBroadcastHashJoin +Left output [12]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Right output [2]: [d_date_sk#35, d_year#36] +Arguments: [ss_sold_date_sk#34], [d_date_sk#35], Inner, BuildRight + +(26) CometProject +Input [14]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34, d_date_sk#35, d_year#36] +Arguments: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#36], [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#36] + +(27) CometHashAggregate +Input [12]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#36] +Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, d_year#36] +Functions [1]: [partial_sum(((((ss_ext_list_price#33 - ss_ext_wholesale_cost#32) - ss_ext_discount_amt#30) + ss_ext_sales_price#31) / 2))] + +(28) CometExchange +Input [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, d_year#36, sum#37, isEmpty#38] +Arguments: hashpartitioning(c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, d_year#36, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, d_year#36, sum#37, isEmpty#38] +Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, d_year#36] +Functions [1]: [sum(((((ss_ext_list_price#33 - ss_ext_wholesale_cost#32) - ss_ext_discount_amt#30) + ss_ext_sales_price#31) / 2))] + +(30) CometBroadcastExchange +Input [8]: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46] +Arguments: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46] + +(31) CometBroadcastHashJoin +Left output [2]: [customer_id#19, year_total#20] +Right output [8]: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46] +Arguments: [customer_id#19], [customer_id#39], Inner, BuildRight + +(32) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#47, c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(33) CometFilter +Input [8]: [c_customer_sk#47, c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54] +Condition : (isnotnull(c_customer_sk#47) AND isnotnull(c_customer_id#48)) + +(34) CometScan parquet spark_catalog.default.catalog_sales +Output [6]: [cs_bill_customer_sk#55, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, cs_sold_date_sk#60] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#60)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(35) CometFilter +Input [6]: [cs_bill_customer_sk#55, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, cs_sold_date_sk#60] +Condition : isnotnull(cs_bill_customer_sk#55) + +(36) CometBroadcastExchange +Input [6]: [cs_bill_customer_sk#55, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, cs_sold_date_sk#60] +Arguments: [cs_bill_customer_sk#55, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, cs_sold_date_sk#60] + +(37) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#47, c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54] +Right output [6]: [cs_bill_customer_sk#55, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, cs_sold_date_sk#60] +Arguments: [c_customer_sk#47], [cs_bill_customer_sk#55], Inner, BuildRight + +(38) CometProject +Input [14]: [c_customer_sk#47, c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, cs_bill_customer_sk#55, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, cs_sold_date_sk#60] +Arguments: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, cs_sold_date_sk#60], [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, cs_sold_date_sk#60] + +(39) ReusedExchange [Reuses operator id: 10] +Output [2]: [d_date_sk#61, d_year#62] + +(40) CometBroadcastHashJoin +Left output [12]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, cs_sold_date_sk#60] +Right output [2]: [d_date_sk#61, d_year#62] +Arguments: [cs_sold_date_sk#60], [d_date_sk#61], Inner, BuildRight + +(41) CometProject +Input [14]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, cs_sold_date_sk#60, d_date_sk#61, d_year#62] +Arguments: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, d_year#62], [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, d_year#62] + +(42) CometHashAggregate +Input [12]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, cs_ext_discount_amt#56, cs_ext_sales_price#57, cs_ext_wholesale_cost#58, cs_ext_list_price#59, d_year#62] +Keys [8]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#62] +Functions [1]: [partial_sum(((((cs_ext_list_price#59 - cs_ext_wholesale_cost#58) - cs_ext_discount_amt#56) + cs_ext_sales_price#57) / 2))] + +(43) CometExchange +Input [10]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#62, sum#63, isEmpty#64] +Arguments: hashpartitioning(c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#62, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(44) CometHashAggregate +Input [10]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#62, sum#63, isEmpty#64] +Keys [8]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#62] +Functions [1]: [sum(((((cs_ext_list_price#59 - cs_ext_wholesale_cost#58) - cs_ext_discount_amt#56) + cs_ext_sales_price#57) / 2))] + +(45) CometFilter +Input [2]: [customer_id#65, year_total#66] +Condition : (isnotnull(year_total#66) AND (year_total#66 > 0.000000)) + +(46) CometBroadcastExchange +Input [2]: [customer_id#65, year_total#66] +Arguments: [customer_id#65, year_total#66] + +(47) CometBroadcastHashJoin +Left output [10]: [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46] +Right output [2]: [customer_id#65, year_total#66] +Arguments: [customer_id#19], [customer_id#65], Inner, BuildRight + +(48) CometProject +Input [12]: [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46, customer_id#65, year_total#66] +Arguments: [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46, year_total#66], [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46, year_total#66] + +(49) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#67, c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(50) CometFilter +Input [8]: [c_customer_sk#67, c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74] +Condition : (isnotnull(c_customer_sk#67) AND isnotnull(c_customer_id#68)) + +(51) ReusedExchange [Reuses operator id: 36] +Output [6]: [cs_bill_customer_sk#75, cs_ext_discount_amt#76, cs_ext_sales_price#77, cs_ext_wholesale_cost#78, cs_ext_list_price#79, cs_sold_date_sk#80] + +(52) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#67, c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74] +Right output [6]: [cs_bill_customer_sk#75, cs_ext_discount_amt#76, cs_ext_sales_price#77, cs_ext_wholesale_cost#78, cs_ext_list_price#79, cs_sold_date_sk#80] +Arguments: [c_customer_sk#67], [cs_bill_customer_sk#75], Inner, BuildRight + +(53) CometProject +Input [14]: [c_customer_sk#67, c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, cs_bill_customer_sk#75, cs_ext_discount_amt#76, cs_ext_sales_price#77, cs_ext_wholesale_cost#78, cs_ext_list_price#79, cs_sold_date_sk#80] +Arguments: [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, cs_ext_discount_amt#76, cs_ext_sales_price#77, cs_ext_wholesale_cost#78, cs_ext_list_price#79, cs_sold_date_sk#80], [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, cs_ext_discount_amt#76, cs_ext_sales_price#77, cs_ext_wholesale_cost#78, cs_ext_list_price#79, cs_sold_date_sk#80] + +(54) ReusedExchange [Reuses operator id: 24] +Output [2]: [d_date_sk#81, d_year#82] + +(55) CometBroadcastHashJoin +Left output [12]: [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, cs_ext_discount_amt#76, cs_ext_sales_price#77, cs_ext_wholesale_cost#78, cs_ext_list_price#79, cs_sold_date_sk#80] +Right output [2]: [d_date_sk#81, d_year#82] +Arguments: [cs_sold_date_sk#80], [d_date_sk#81], Inner, BuildRight + +(56) CometProject +Input [14]: [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, cs_ext_discount_amt#76, cs_ext_sales_price#77, cs_ext_wholesale_cost#78, cs_ext_list_price#79, cs_sold_date_sk#80, d_date_sk#81, d_year#82] +Arguments: [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, cs_ext_discount_amt#76, cs_ext_sales_price#77, cs_ext_wholesale_cost#78, cs_ext_list_price#79, d_year#82], [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, cs_ext_discount_amt#76, cs_ext_sales_price#77, cs_ext_wholesale_cost#78, cs_ext_list_price#79, d_year#82] + +(57) CometHashAggregate +Input [12]: [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, cs_ext_discount_amt#76, cs_ext_sales_price#77, cs_ext_wholesale_cost#78, cs_ext_list_price#79, d_year#82] +Keys [8]: [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, d_year#82] +Functions [1]: [partial_sum(((((cs_ext_list_price#79 - cs_ext_wholesale_cost#78) - cs_ext_discount_amt#76) + cs_ext_sales_price#77) / 2))] + +(58) CometExchange +Input [10]: [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, d_year#82, sum#83, isEmpty#84] +Arguments: hashpartitioning(c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, d_year#82, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(59) CometHashAggregate +Input [10]: [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, d_year#82, sum#83, isEmpty#84] +Keys [8]: [c_customer_id#68, c_first_name#69, c_last_name#70, c_preferred_cust_flag#71, c_birth_country#72, c_login#73, c_email_address#74, d_year#82] +Functions [1]: [sum(((((cs_ext_list_price#79 - cs_ext_wholesale_cost#78) - cs_ext_discount_amt#76) + cs_ext_sales_price#77) / 2))] + +(60) CometBroadcastExchange +Input [2]: [customer_id#85, year_total#86] +Arguments: [customer_id#85, year_total#86] + +(61) CometBroadcastHashJoin +Left output [11]: [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46, year_total#66] +Right output [2]: [customer_id#85, year_total#86] +Arguments: [customer_id#19], [customer_id#85], Inner, (CASE WHEN (year_total#66 > 0.000000) THEN (year_total#86 / year_total#66) END > CASE WHEN (year_total#20 > 0.000000) THEN (year_total#46 / year_total#20) END), BuildRight + +(62) CometProject +Input [13]: [customer_id#19, year_total#20, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#46, year_total#66, customer_id#85, year_total#86] +Arguments: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#66, year_total#86], [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#66, year_total#86] + +(63) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(64) CometFilter +Input [8]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94] +Condition : (isnotnull(c_customer_sk#87) AND isnotnull(c_customer_id#88)) + +(65) CometScan parquet spark_catalog.default.web_sales +Output [6]: [ws_bill_customer_sk#95, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, ws_sold_date_sk#100] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#100)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(66) CometFilter +Input [6]: [ws_bill_customer_sk#95, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, ws_sold_date_sk#100] +Condition : isnotnull(ws_bill_customer_sk#95) + +(67) CometBroadcastExchange +Input [6]: [ws_bill_customer_sk#95, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, ws_sold_date_sk#100] +Arguments: [ws_bill_customer_sk#95, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, ws_sold_date_sk#100] + +(68) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94] +Right output [6]: [ws_bill_customer_sk#95, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, ws_sold_date_sk#100] +Arguments: [c_customer_sk#87], [ws_bill_customer_sk#95], Inner, BuildRight + +(69) CometProject +Input [14]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, ws_bill_customer_sk#95, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, ws_sold_date_sk#100] +Arguments: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, ws_sold_date_sk#100], [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, ws_sold_date_sk#100] + +(70) ReusedExchange [Reuses operator id: 10] +Output [2]: [d_date_sk#101, d_year#102] + +(71) CometBroadcastHashJoin +Left output [12]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, ws_sold_date_sk#100] +Right output [2]: [d_date_sk#101, d_year#102] +Arguments: [ws_sold_date_sk#100], [d_date_sk#101], Inner, BuildRight + +(72) CometProject +Input [14]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, ws_sold_date_sk#100, d_date_sk#101, d_year#102] +Arguments: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, d_year#102], [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, d_year#102] + +(73) CometHashAggregate +Input [12]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, ws_ext_discount_amt#96, ws_ext_sales_price#97, ws_ext_wholesale_cost#98, ws_ext_list_price#99, d_year#102] +Keys [8]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, d_year#102] +Functions [1]: [partial_sum(((((ws_ext_list_price#99 - ws_ext_wholesale_cost#98) - ws_ext_discount_amt#96) + ws_ext_sales_price#97) / 2))] + +(74) CometExchange +Input [10]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, d_year#102, sum#103, isEmpty#104] +Arguments: hashpartitioning(c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, d_year#102, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(75) CometHashAggregate +Input [10]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, d_year#102, sum#103, isEmpty#104] +Keys [8]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#94, d_year#102] +Functions [1]: [sum(((((ws_ext_list_price#99 - ws_ext_wholesale_cost#98) - ws_ext_discount_amt#96) + ws_ext_sales_price#97) / 2))] + +(76) CometFilter +Input [2]: [customer_id#105, year_total#106] +Condition : (isnotnull(year_total#106) AND (year_total#106 > 0.000000)) + +(77) CometBroadcastExchange +Input [2]: [customer_id#105, year_total#106] +Arguments: [customer_id#105, year_total#106] + +(78) CometBroadcastHashJoin +Left output [10]: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#66, year_total#86] +Right output [2]: [customer_id#105, year_total#106] +Arguments: [customer_id#19], [customer_id#105], Inner, BuildRight + +(79) CometProject +Input [12]: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#66, year_total#86, customer_id#105, year_total#106] +Arguments: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#66, year_total#86, year_total#106], [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#66, year_total#86, year_total#106] + +(80) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#107, c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(81) CometFilter +Input [8]: [c_customer_sk#107, c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114] +Condition : (isnotnull(c_customer_sk#107) AND isnotnull(c_customer_id#108)) + +(82) ReusedExchange [Reuses operator id: 67] +Output [6]: [ws_bill_customer_sk#115, ws_ext_discount_amt#116, ws_ext_sales_price#117, ws_ext_wholesale_cost#118, ws_ext_list_price#119, ws_sold_date_sk#120] + +(83) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#107, c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114] +Right output [6]: [ws_bill_customer_sk#115, ws_ext_discount_amt#116, ws_ext_sales_price#117, ws_ext_wholesale_cost#118, ws_ext_list_price#119, ws_sold_date_sk#120] +Arguments: [c_customer_sk#107], [ws_bill_customer_sk#115], Inner, BuildRight + +(84) CometProject +Input [14]: [c_customer_sk#107, c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_bill_customer_sk#115, ws_ext_discount_amt#116, ws_ext_sales_price#117, ws_ext_wholesale_cost#118, ws_ext_list_price#119, ws_sold_date_sk#120] +Arguments: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_ext_discount_amt#116, ws_ext_sales_price#117, ws_ext_wholesale_cost#118, ws_ext_list_price#119, ws_sold_date_sk#120], [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_ext_discount_amt#116, ws_ext_sales_price#117, ws_ext_wholesale_cost#118, ws_ext_list_price#119, ws_sold_date_sk#120] + +(85) ReusedExchange [Reuses operator id: 24] +Output [2]: [d_date_sk#121, d_year#122] + +(86) CometBroadcastHashJoin +Left output [12]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_ext_discount_amt#116, ws_ext_sales_price#117, ws_ext_wholesale_cost#118, ws_ext_list_price#119, ws_sold_date_sk#120] +Right output [2]: [d_date_sk#121, d_year#122] +Arguments: [ws_sold_date_sk#120], [d_date_sk#121], Inner, BuildRight + +(87) CometProject +Input [14]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_ext_discount_amt#116, ws_ext_sales_price#117, ws_ext_wholesale_cost#118, ws_ext_list_price#119, ws_sold_date_sk#120, d_date_sk#121, d_year#122] +Arguments: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_ext_discount_amt#116, ws_ext_sales_price#117, ws_ext_wholesale_cost#118, ws_ext_list_price#119, d_year#122], [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_ext_discount_amt#116, ws_ext_sales_price#117, ws_ext_wholesale_cost#118, ws_ext_list_price#119, d_year#122] + +(88) CometHashAggregate +Input [12]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_ext_discount_amt#116, ws_ext_sales_price#117, ws_ext_wholesale_cost#118, ws_ext_list_price#119, d_year#122] +Keys [8]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#122] +Functions [1]: [partial_sum(((((ws_ext_list_price#119 - ws_ext_wholesale_cost#118) - ws_ext_discount_amt#116) + ws_ext_sales_price#117) / 2))] + +(89) CometExchange +Input [10]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#122, sum#123, isEmpty#124] +Arguments: hashpartitioning(c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#122, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(90) CometHashAggregate +Input [10]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#122, sum#123, isEmpty#124] +Keys [8]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#122] +Functions [1]: [sum(((((ws_ext_list_price#119 - ws_ext_wholesale_cost#118) - ws_ext_discount_amt#116) + ws_ext_sales_price#117) / 2))] + +(91) CometBroadcastExchange +Input [2]: [customer_id#125, year_total#126] +Arguments: [customer_id#125, year_total#126] + +(92) CometBroadcastHashJoin +Left output [11]: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#66, year_total#86, year_total#106] +Right output [2]: [customer_id#125, year_total#126] +Arguments: [customer_id#19], [customer_id#125], Inner, (CASE WHEN (year_total#66 > 0.000000) THEN (year_total#86 / year_total#66) END > CASE WHEN (year_total#106 > 0.000000) THEN (year_total#126 / year_total#106) END), BuildRight + +(93) CometProject +Input [13]: [customer_id#19, customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45, year_total#66, year_total#86, year_total#106, customer_id#125, year_total#126] +Arguments: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45], [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45] + +(94) CometTakeOrderedAndProject +Input [7]: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[customer_id#39 ASC NULLS FIRST,customer_first_name#40 ASC NULLS FIRST,customer_last_name#41 ASC NULLS FIRST,customer_preferred_cust_flag#42 ASC NULLS FIRST,customer_birth_country#43 ASC NULLS FIRST,customer_login#44 ASC NULLS FIRST,customer_email_address#45 ASC NULLS FIRST], output=[customer_id#39,customer_first_name#40,customer_last_name#41,customer_preferred_cust_flag#42,customer_birth_country#43,customer_login#44,customer_email_address#45]), [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45], 100, [customer_id#39 ASC NULLS FIRST, customer_first_name#40 ASC NULLS FIRST, customer_last_name#41 ASC NULLS FIRST, customer_preferred_cust_flag#42 ASC NULLS FIRST, customer_birth_country#43 ASC NULLS FIRST, customer_login#44 ASC NULLS FIRST, customer_email_address#45 ASC NULLS FIRST], [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45] + +(95) ColumnarToRow [codegen id : 1] +Input [7]: [customer_id#39, customer_first_name#40, customer_last_name#41, customer_preferred_cust_flag#42, customer_birth_country#43, customer_login#44, customer_email_address#45] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..4406ff5340 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q4.native_iceberg_compat/simplified.txt @@ -0,0 +1,97 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + CometProject [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + CometBroadcastHashJoin [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,year_total,customer_id,year_total] + CometProject [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,year_total] + CometBroadcastHashJoin [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,customer_id,year_total] + CometProject [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,customer_id,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total] + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #1 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total] #4 + CometHashAggregate [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #5 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [customer_id,year_total] #7 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,cs_ext_list_price,cs_ext_wholesale_cost,cs_ext_discount_amt,cs_ext_sales_price] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] #9 + CometFilter [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + ReusedExchange [d_date_sk,d_year] #3 + CometBroadcastExchange [customer_id,year_total] #10 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,cs_ext_list_price,cs_ext_wholesale_cost,cs_ext_discount_amt,cs_ext_sales_price] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] #9 + ReusedExchange [d_date_sk,d_year] #6 + CometBroadcastExchange [customer_id,year_total] #12 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,sum(((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #13 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_discount_amt,ws_ext_sales_price] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] #14 + CometFilter [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk,d_year] #3 + CometBroadcastExchange [customer_id,year_total] #15 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,sum(((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #16 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_discount_amt,ws_ext_sales_price] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] #14 + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_datafusion/explain.txt new file mode 100644 index 0000000000..d78d968483 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_datafusion/explain.txt @@ -0,0 +1,168 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometTakeOrderedAndProject (31) + +- CometHashAggregate (30) + +- CometExchange (29) + +- CometHashAggregate (28) + +- CometProject (27) + +- CometBroadcastHashJoin (26) + :- CometProject (22) + : +- CometBroadcastHashJoin (21) + : :- CometProject (16) + : : +- CometBroadcastHashJoin (15) + : : :- CometProject (11) + : : : +- CometSortMergeJoin (10) + : : : :- CometSort (4) + : : : : +- CometExchange (3) + : : : : +- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : +- CometSort (9) + : : : +- CometExchange (8) + : : : +- CometProject (7) + : : : +- CometFilter (6) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (5) + : : +- CometBroadcastExchange (14) + : : +- CometFilter (13) + : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (12) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (17) + +- CometBroadcastExchange (25) + +- CometFilter (24) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (23) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] + +(2) CometFilter +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Condition : (isnotnull(cs_warehouse_sk#1) AND isnotnull(cs_item_sk#2)) + +(3) CometExchange +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5], [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST] + +(5) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Arguments: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] + +(6) CometFilter +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Condition : (isnotnull(cr_order_number#7) AND isnotnull(cr_item_sk#6)) + +(7) CometProject +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Arguments: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8], [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] + +(8) CometExchange +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: hashpartitioning(cr_order_number#7, cr_item_sk#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8], [cr_order_number#7 ASC NULLS FIRST, cr_item_sk#6 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Right output [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cs_order_number#3, cs_item_sk#2], [cr_order_number#7, cr_item_sk#6], LeftOuter + +(11) CometProject +Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8], [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8] + +(12) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#10, w_state#11] +Arguments: [w_warehouse_sk#10, w_state#11] + +(13) CometFilter +Input [2]: [w_warehouse_sk#10, w_state#11] +Condition : isnotnull(w_warehouse_sk#10) + +(14) CometBroadcastExchange +Input [2]: [w_warehouse_sk#10, w_state#11] +Arguments: [w_warehouse_sk#10, w_state#11] + +(15) CometBroadcastHashJoin +Left output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8] +Right output [2]: [w_warehouse_sk#10, w_state#11] +Arguments: [cs_warehouse_sk#1], [w_warehouse_sk#10], Inner, BuildRight + +(16) CometProject +Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_warehouse_sk#10, w_state#11] +Arguments: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11], [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11] + +(17) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Arguments: [i_item_sk#12, i_item_id#13, i_current_price#14] + +(18) CometFilter +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Condition : (((isnotnull(i_current_price#14) AND (i_current_price#14 >= 0.99)) AND (i_current_price#14 <= 1.49)) AND isnotnull(i_item_sk#12)) + +(19) CometProject +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Arguments: [i_item_sk#12, i_item_id#13], [i_item_sk#12, i_item_id#13] + +(20) CometBroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: [i_item_sk#12, i_item_id#13] + +(21) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11] +Right output [2]: [i_item_sk#12, i_item_id#13] +Arguments: [cs_item_sk#2], [i_item_sk#12], Inner, BuildRight + +(22) CometProject +Input [7]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_sk#12, i_item_id#13] +Arguments: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13], [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13] + +(23) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#15, d_date#16] +Arguments: [d_date_sk#15, d_date#16] + +(24) CometFilter +Input [2]: [d_date_sk#15, d_date#16] +Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 2000-02-10)) AND (d_date#16 <= 2000-04-10)) AND isnotnull(d_date_sk#15)) + +(25) CometBroadcastExchange +Input [2]: [d_date_sk#15, d_date#16] +Arguments: [d_date_sk#15, d_date#16] + +(26) CometBroadcastHashJoin +Left output [5]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13] +Right output [2]: [d_date_sk#15, d_date#16] +Arguments: [cs_sold_date_sk#5], [d_date_sk#15], Inner, BuildRight + +(27) CometProject +Input [7]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date_sk#15, d_date#16] +Arguments: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16], [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] + +(28) CometHashAggregate +Input [5]: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] +Keys [2]: [w_state#11, i_item_id#13] +Functions [2]: [partial_sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)] + +(29) CometExchange +Input [6]: [w_state#11, i_item_id#13, sum#17, isEmpty#18, sum#19, isEmpty#20] +Arguments: hashpartitioning(w_state#11, i_item_id#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(30) CometHashAggregate +Input [6]: [w_state#11, i_item_id#13, sum#17, isEmpty#18, sum#19, isEmpty#20] +Keys [2]: [w_state#11, i_item_id#13] +Functions [2]: [sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END), sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)] + +(31) CometTakeOrderedAndProject +Input [4]: [w_state#11, i_item_id#13, sales_before#21, sales_after#22] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[w_state#11 ASC NULLS FIRST,i_item_id#13 ASC NULLS FIRST], output=[w_state#11,i_item_id#13,sales_before#21,sales_after#22]), [w_state#11, i_item_id#13, sales_before#21, sales_after#22], 100, [w_state#11 ASC NULLS FIRST, i_item_id#13 ASC NULLS FIRST], [w_state#11, i_item_id#13, sales_before#21, sales_after#22] + +(32) ColumnarToRow [codegen id : 1] +Input [4]: [w_state#11, i_item_id#13, sales_before#21, sales_after#22] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_datafusion/simplified.txt new file mode 100644 index 0000000000..e2f68d5b37 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] + CometHashAggregate [w_state,i_item_id,sales_before,sales_after,sum,isEmpty,sum,isEmpty,sum(CASE WHEN (d_date < 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END)] + CometExchange [w_state,i_item_id] #1 + CometHashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty,d_date,cs_sales_price,cr_refunded_cash] + CometProject [cs_sales_price,cr_refunded_cash,w_state,i_item_id,d_date] + CometBroadcastHashJoin [cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_id,d_date_sk,d_date] + CometProject [cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state] + CometBroadcastHashJoin [cs_warehouse_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_warehouse_sk,w_state] + CometProject [cs_warehouse_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash] + CometSortMergeJoin [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number,cr_refunded_cash] + CometSort [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + CometExchange [cs_order_number,cs_item_sk] #2 + CometFilter [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_refunded_cash] + CometExchange [cr_order_number,cr_item_sk] #3 + CometProject [cr_item_sk,cr_order_number,cr_refunded_cash] + CometFilter [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_state] #4 + CometFilter [w_warehouse_sk,w_state] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_state] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometProject [i_item_sk,i_item_id] + CometFilter [i_item_sk,i_item_id,i_current_price] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_current_price] + CometBroadcastExchange [d_date_sk,d_date] #6 + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..7cde0d368f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_iceberg_compat/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometTakeOrderedAndProject (31) + +- CometHashAggregate (30) + +- CometExchange (29) + +- CometHashAggregate (28) + +- CometProject (27) + +- CometBroadcastHashJoin (26) + :- CometProject (22) + : +- CometBroadcastHashJoin (21) + : :- CometProject (16) + : : +- CometBroadcastHashJoin (15) + : : :- CometProject (11) + : : : +- CometSortMergeJoin (10) + : : : :- CometSort (4) + : : : : +- CometExchange (3) + : : : : +- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : +- CometSort (9) + : : : +- CometExchange (8) + : : : +- CometProject (7) + : : : +- CometFilter (6) + : : : +- CometScan parquet spark_catalog.default.catalog_returns (5) + : : +- CometBroadcastExchange (14) + : : +- CometFilter (13) + : : +- CometScan parquet spark_catalog.default.warehouse (12) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometScan parquet spark_catalog.default.item (17) + +- CometBroadcastExchange (25) + +- CometFilter (24) + +- CometScan parquet spark_catalog.default.date_dim (23) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Condition : (isnotnull(cs_warehouse_sk#1) AND isnotnull(cs_item_sk#2)) + +(3) CometExchange +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5], [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST] + +(5) CometScan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(6) CometFilter +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Condition : (isnotnull(cr_order_number#7) AND isnotnull(cr_item_sk#6)) + +(7) CometProject +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Arguments: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8], [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] + +(8) CometExchange +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: hashpartitioning(cr_order_number#7, cr_item_sk#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8], [cr_order_number#7 ASC NULLS FIRST, cr_item_sk#6 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Right output [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cs_order_number#3, cs_item_sk#2], [cr_order_number#7, cr_item_sk#6], LeftOuter + +(11) CometProject +Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8], [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8] + +(12) CometScan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#10, w_state#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(13) CometFilter +Input [2]: [w_warehouse_sk#10, w_state#11] +Condition : isnotnull(w_warehouse_sk#10) + +(14) CometBroadcastExchange +Input [2]: [w_warehouse_sk#10, w_state#11] +Arguments: [w_warehouse_sk#10, w_state#11] + +(15) CometBroadcastHashJoin +Left output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8] +Right output [2]: [w_warehouse_sk#10, w_state#11] +Arguments: [cs_warehouse_sk#1], [w_warehouse_sk#10], Inner, BuildRight + +(16) CometProject +Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_warehouse_sk#10, w_state#11] +Arguments: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11], [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11] + +(17) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) CometFilter +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Condition : (((isnotnull(i_current_price#14) AND (i_current_price#14 >= 0.99)) AND (i_current_price#14 <= 1.49)) AND isnotnull(i_item_sk#12)) + +(19) CometProject +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Arguments: [i_item_sk#12, i_item_id#13], [i_item_sk#12, i_item_id#13] + +(20) CometBroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: [i_item_sk#12, i_item_id#13] + +(21) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11] +Right output [2]: [i_item_sk#12, i_item_id#13] +Arguments: [cs_item_sk#2], [i_item_sk#12], Inner, BuildRight + +(22) CometProject +Input [7]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_sk#12, i_item_id#13] +Arguments: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13], [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13] + +(23) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_date#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) CometFilter +Input [2]: [d_date_sk#15, d_date#16] +Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 2000-02-10)) AND (d_date#16 <= 2000-04-10)) AND isnotnull(d_date_sk#15)) + +(25) CometBroadcastExchange +Input [2]: [d_date_sk#15, d_date#16] +Arguments: [d_date_sk#15, d_date#16] + +(26) CometBroadcastHashJoin +Left output [5]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13] +Right output [2]: [d_date_sk#15, d_date#16] +Arguments: [cs_sold_date_sk#5], [d_date_sk#15], Inner, BuildRight + +(27) CometProject +Input [7]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date_sk#15, d_date#16] +Arguments: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16], [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] + +(28) CometHashAggregate +Input [5]: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] +Keys [2]: [w_state#11, i_item_id#13] +Functions [2]: [partial_sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)] + +(29) CometExchange +Input [6]: [w_state#11, i_item_id#13, sum#17, isEmpty#18, sum#19, isEmpty#20] +Arguments: hashpartitioning(w_state#11, i_item_id#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(30) CometHashAggregate +Input [6]: [w_state#11, i_item_id#13, sum#17, isEmpty#18, sum#19, isEmpty#20] +Keys [2]: [w_state#11, i_item_id#13] +Functions [2]: [sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END), sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)] + +(31) CometTakeOrderedAndProject +Input [4]: [w_state#11, i_item_id#13, sales_before#21, sales_after#22] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[w_state#11 ASC NULLS FIRST,i_item_id#13 ASC NULLS FIRST], output=[w_state#11,i_item_id#13,sales_before#21,sales_after#22]), [w_state#11, i_item_id#13, sales_before#21, sales_after#22], 100, [w_state#11 ASC NULLS FIRST, i_item_id#13 ASC NULLS FIRST], [w_state#11, i_item_id#13, sales_before#21, sales_after#22] + +(32) ColumnarToRow [codegen id : 1] +Input [4]: [w_state#11, i_item_id#13, sales_before#21, sales_after#22] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c904b45d49 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q40.native_iceberg_compat/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] + CometHashAggregate [w_state,i_item_id,sales_before,sales_after,sum,isEmpty,sum,isEmpty,sum(CASE WHEN (d_date < 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END)] + CometExchange [w_state,i_item_id] #1 + CometHashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty,d_date,cs_sales_price,cr_refunded_cash] + CometProject [cs_sales_price,cr_refunded_cash,w_state,i_item_id,d_date] + CometBroadcastHashJoin [cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_id,d_date_sk,d_date] + CometProject [cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state] + CometBroadcastHashJoin [cs_warehouse_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_warehouse_sk,w_state] + CometProject [cs_warehouse_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash] + CometSortMergeJoin [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number,cr_refunded_cash] + CometSort [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + CometExchange [cs_order_number,cs_item_sk] #2 + CometFilter [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_refunded_cash] + CometExchange [cr_order_number,cr_item_sk] #3 + CometProject [cr_item_sk,cr_order_number,cr_refunded_cash] + CometFilter [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_state] #4 + CometFilter [w_warehouse_sk,w_state] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_state] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometProject [i_item_sk,i_item_id] + CometFilter [i_item_sk,i_item_id,i_current_price] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_current_price] + CometBroadcastExchange [d_date_sk,d_date] #6 + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_datafusion/explain.txt new file mode 100644 index 0000000000..e1a62cf025 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_datafusion/explain.txt @@ -0,0 +1,102 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (3) + : +- CometFilter (2) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometHashAggregate (9) + +- CometExchange (8) + +- CometHashAggregate (7) + +- CometProject (6) + +- CometFilter (5) + +- CometNativeScan: `spark_catalog`.`default`.`item` (4) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Arguments: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(2) CometFilter +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_manufact_id#1 <= 778)) AND isnotnull(i_manufact#2)) + +(3) CometProject +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Arguments: [i_manufact#2, i_product_name#3], [i_manufact#2, i_product_name#3] + +(4) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Arguments: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] + +(5) CometFilter +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Condition : (((((i_category#4 = Women ) AND (((((i_color#7 = powder ) OR (i_color#7 = khaki )) AND ((i_units#8 = Ounce ) OR (i_units#8 = Oz ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))) OR ((((i_color#7 = brown ) OR (i_color#7 = honeydew )) AND ((i_units#8 = Bunch ) OR (i_units#8 = Ton ))) AND ((i_size#6 = N/A ) OR (i_size#6 = small ))))) OR ((i_category#4 = Men ) AND (((((i_color#7 = floral ) OR (i_color#7 = deep )) AND ((i_units#8 = N/A ) OR (i_units#8 = Dozen ))) AND ((i_size#6 = petite ) OR (i_size#6 = large ))) OR ((((i_color#7 = light ) OR (i_color#7 = cornflower )) AND ((i_units#8 = Box ) OR (i_units#8 = Pound ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large )))))) OR (((i_category#4 = Women ) AND (((((i_color#7 = midnight ) OR (i_color#7 = snow )) AND ((i_units#8 = Pallet ) OR (i_units#8 = Gross ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))) OR ((((i_color#7 = cyan ) OR (i_color#7 = papaya )) AND ((i_units#8 = Cup ) OR (i_units#8 = Dram ))) AND ((i_size#6 = N/A ) OR (i_size#6 = small ))))) OR ((i_category#4 = Men ) AND (((((i_color#7 = orange ) OR (i_color#7 = frosted )) AND ((i_units#8 = Each ) OR (i_units#8 = Tbl ))) AND ((i_size#6 = petite ) OR (i_size#6 = large ))) OR ((((i_color#7 = forest ) OR (i_color#7 = ghost )) AND ((i_units#8 = Lb ) OR (i_units#8 = Bundle ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))))))) AND isnotnull(i_manufact#5)) + +(6) CometProject +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Arguments: [i_manufact#5], [i_manufact#5] + +(7) CometHashAggregate +Input [1]: [i_manufact#5] +Keys [1]: [i_manufact#5] +Functions [1]: [partial_count(1)] + +(8) CometExchange +Input [2]: [i_manufact#5, count#9] +Arguments: hashpartitioning(i_manufact#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(9) CometHashAggregate +Input [2]: [i_manufact#5, count#9] +Keys [1]: [i_manufact#5] +Functions [1]: [count(1)] + +(10) CometFilter +Input [2]: [item_cnt#10, i_manufact#5] +Condition : (item_cnt#10 > 0) + +(11) CometProject +Input [2]: [item_cnt#10, i_manufact#5] +Arguments: [i_manufact#5], [i_manufact#5] + +(12) CometBroadcastExchange +Input [1]: [i_manufact#5] +Arguments: [i_manufact#5] + +(13) CometBroadcastHashJoin +Left output [2]: [i_manufact#2, i_product_name#3] +Right output [1]: [i_manufact#5] +Arguments: [i_manufact#2], [i_manufact#5], Inner, BuildRight + +(14) CometProject +Input [3]: [i_manufact#2, i_product_name#3, i_manufact#5] +Arguments: [i_product_name#3], [i_product_name#3] + +(15) CometHashAggregate +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] + +(16) CometExchange +Input [1]: [i_product_name#3] +Arguments: hashpartitioning(i_product_name#3, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(17) CometHashAggregate +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] + +(18) CometTakeOrderedAndProject +Input [1]: [i_product_name#3] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_product_name#3 ASC NULLS FIRST], output=[i_product_name#3]), [i_product_name#3], 100, [i_product_name#3 ASC NULLS FIRST], [i_product_name#3] + +(19) ColumnarToRow [codegen id : 1] +Input [1]: [i_product_name#3] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ea03f38e2c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_datafusion/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_product_name] + CometHashAggregate [i_product_name] + CometExchange [i_product_name] #1 + CometHashAggregate [i_product_name] + CometProject [i_product_name] + CometBroadcastHashJoin [i_manufact,i_product_name,i_manufact] + CometProject [i_manufact,i_product_name] + CometFilter [i_manufact_id,i_manufact,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_manufact_id,i_manufact,i_product_name] + CometBroadcastExchange [i_manufact] #2 + CometProject [i_manufact] + CometFilter [item_cnt,i_manufact] + CometHashAggregate [item_cnt,i_manufact,count,count(1)] + CometExchange [i_manufact] #3 + CometHashAggregate [i_manufact,count] + CometProject [i_manufact] + CometFilter [i_category,i_manufact,i_size,i_color,i_units] + CometNativeScan: `spark_catalog`.`default`.`item` [i_category,i_manufact,i_size,i_color,i_units] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..48a1c60d16 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_iceberg_compat/explain.txt @@ -0,0 +1,108 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (3) + : +- CometFilter (2) + : +- CometScan parquet spark_catalog.default.item (1) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometHashAggregate (9) + +- CometExchange (8) + +- CometHashAggregate (7) + +- CometProject (6) + +- CometFilter (5) + +- CometScan parquet spark_catalog.default.item (4) + + +(1) CometScan parquet spark_catalog.default.item +Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,778), IsNotNull(i_manufact)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_manufact_id#1 <= 778)) AND isnotnull(i_manufact#2)) + +(3) CometProject +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Arguments: [i_manufact#2, i_product_name#3], [i_manufact#2, i_product_name#3] + +(4) CometScan parquet spark_catalog.default.item +Output [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(Or(And(EqualTo(i_category,Women ),Or(And(And(Or(EqualTo(i_color,powder ),EqualTo(i_color,khaki )),Or(EqualTo(i_units,Ounce ),EqualTo(i_units,Oz ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large ))),And(And(Or(EqualTo(i_color,brown ),EqualTo(i_color,honeydew )),Or(EqualTo(i_units,Bunch ),EqualTo(i_units,Ton ))),Or(EqualTo(i_size,N/A ),EqualTo(i_size,small ))))),And(EqualTo(i_category,Men ),Or(And(And(Or(EqualTo(i_color,floral ),EqualTo(i_color,deep )),Or(EqualTo(i_units,N/A ),EqualTo(i_units,Dozen ))),Or(EqualTo(i_size,petite ),EqualTo(i_size,large ))),And(And(Or(EqualTo(i_color,light ),EqualTo(i_color,cornflower )),Or(EqualTo(i_units,Box ),EqualTo(i_units,Pound ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large )))))),Or(And(EqualTo(i_category,Women ),Or(And(And(Or(EqualTo(i_color,midnight ),EqualTo(i_color,snow )),Or(EqualTo(i_units,Pallet ),EqualTo(i_units,Gross ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large ))),And(And(Or(EqualTo(i_color,cyan ),EqualTo(i_color,papaya )),Or(EqualTo(i_units,Cup ),EqualTo(i_units,Dram ))),Or(EqualTo(i_size,N/A ),EqualTo(i_size,small ))))),And(EqualTo(i_category,Men ),Or(And(And(Or(EqualTo(i_color,orange ),EqualTo(i_color,frosted )),Or(EqualTo(i_units,Each ),EqualTo(i_units,Tbl ))),Or(EqualTo(i_size,petite ),EqualTo(i_size,large ))),And(And(Or(EqualTo(i_color,forest ),EqualTo(i_color,ghost )),Or(EqualTo(i_units,Lb ),EqualTo(i_units,Bundle ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large ))))))), IsNotNull(i_manufact)] +ReadSchema: struct + +(5) CometFilter +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Condition : (((((i_category#4 = Women ) AND (((((i_color#7 = powder ) OR (i_color#7 = khaki )) AND ((i_units#8 = Ounce ) OR (i_units#8 = Oz ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))) OR ((((i_color#7 = brown ) OR (i_color#7 = honeydew )) AND ((i_units#8 = Bunch ) OR (i_units#8 = Ton ))) AND ((i_size#6 = N/A ) OR (i_size#6 = small ))))) OR ((i_category#4 = Men ) AND (((((i_color#7 = floral ) OR (i_color#7 = deep )) AND ((i_units#8 = N/A ) OR (i_units#8 = Dozen ))) AND ((i_size#6 = petite ) OR (i_size#6 = large ))) OR ((((i_color#7 = light ) OR (i_color#7 = cornflower )) AND ((i_units#8 = Box ) OR (i_units#8 = Pound ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large )))))) OR (((i_category#4 = Women ) AND (((((i_color#7 = midnight ) OR (i_color#7 = snow )) AND ((i_units#8 = Pallet ) OR (i_units#8 = Gross ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))) OR ((((i_color#7 = cyan ) OR (i_color#7 = papaya )) AND ((i_units#8 = Cup ) OR (i_units#8 = Dram ))) AND ((i_size#6 = N/A ) OR (i_size#6 = small ))))) OR ((i_category#4 = Men ) AND (((((i_color#7 = orange ) OR (i_color#7 = frosted )) AND ((i_units#8 = Each ) OR (i_units#8 = Tbl ))) AND ((i_size#6 = petite ) OR (i_size#6 = large ))) OR ((((i_color#7 = forest ) OR (i_color#7 = ghost )) AND ((i_units#8 = Lb ) OR (i_units#8 = Bundle ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))))))) AND isnotnull(i_manufact#5)) + +(6) CometProject +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Arguments: [i_manufact#5], [i_manufact#5] + +(7) CometHashAggregate +Input [1]: [i_manufact#5] +Keys [1]: [i_manufact#5] +Functions [1]: [partial_count(1)] + +(8) CometExchange +Input [2]: [i_manufact#5, count#9] +Arguments: hashpartitioning(i_manufact#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(9) CometHashAggregate +Input [2]: [i_manufact#5, count#9] +Keys [1]: [i_manufact#5] +Functions [1]: [count(1)] + +(10) CometFilter +Input [2]: [item_cnt#10, i_manufact#5] +Condition : (item_cnt#10 > 0) + +(11) CometProject +Input [2]: [item_cnt#10, i_manufact#5] +Arguments: [i_manufact#5], [i_manufact#5] + +(12) CometBroadcastExchange +Input [1]: [i_manufact#5] +Arguments: [i_manufact#5] + +(13) CometBroadcastHashJoin +Left output [2]: [i_manufact#2, i_product_name#3] +Right output [1]: [i_manufact#5] +Arguments: [i_manufact#2], [i_manufact#5], Inner, BuildRight + +(14) CometProject +Input [3]: [i_manufact#2, i_product_name#3, i_manufact#5] +Arguments: [i_product_name#3], [i_product_name#3] + +(15) CometHashAggregate +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] + +(16) CometExchange +Input [1]: [i_product_name#3] +Arguments: hashpartitioning(i_product_name#3, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(17) CometHashAggregate +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] + +(18) CometTakeOrderedAndProject +Input [1]: [i_product_name#3] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_product_name#3 ASC NULLS FIRST], output=[i_product_name#3]), [i_product_name#3], 100, [i_product_name#3 ASC NULLS FIRST], [i_product_name#3] + +(19) ColumnarToRow [codegen id : 1] +Input [1]: [i_product_name#3] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..007103499b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q41.native_iceberg_compat/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_product_name] + CometHashAggregate [i_product_name] + CometExchange [i_product_name] #1 + CometHashAggregate [i_product_name] + CometProject [i_product_name] + CometBroadcastHashJoin [i_manufact,i_product_name,i_manufact] + CometProject [i_manufact,i_product_name] + CometFilter [i_manufact_id,i_manufact,i_product_name] + CometScan parquet spark_catalog.default.item [i_manufact_id,i_manufact,i_product_name] + CometBroadcastExchange [i_manufact] #2 + CometProject [i_manufact] + CometFilter [item_cnt,i_manufact] + CometHashAggregate [item_cnt,i_manufact,count,count(1)] + CometExchange [i_manufact] #3 + CometHashAggregate [i_manufact,count] + CometProject [i_manufact] + CometFilter [i_category,i_manufact,i_size,i_color,i_units] + CometScan parquet spark_catalog.default.item [i_category,i_manufact,i_size,i_color,i_units] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_datafusion/explain.txt new file mode 100644 index 0000000000..ed78e7e69c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_datafusion/explain.txt @@ -0,0 +1,101 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2, d_moy#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2], [d_date_sk#1, d_year#2] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_year#2] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5], [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] + +(10) CometFilter +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_category_id#8, i_category#9], [i_item_sk#7, i_category_id#8, i_category#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Arguments: [i_item_sk#7, i_category_id#8, i_category#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_category_id#8, i_category#9] +Arguments: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9], [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] + +(15) CometHashAggregate +Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] + +(16) CometExchange +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#11] +Arguments: hashpartitioning(d_year#2, i_category_id#8, i_category#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#11] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] + +(18) CometTakeOrderedAndProject +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#12] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[sum(ss_ext_sales_price)#12 DESC NULLS LAST,d_year#2 ASC NULLS FIRST,i_category_id#8 ASC NULLS FIRST,i_category#9 ASC NULLS FIRST], output=[d_year#2,i_category_id#8,i_category#9,sum(ss_ext_sales_price)#12]), [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#12], 100, [sum(ss_ext_sales_price)#12 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST, i_category#9 ASC NULLS FIRST], [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#12] + +(19) ColumnarToRow [codegen id : 1] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#12] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_datafusion/simplified.txt new file mode 100644 index 0000000000..12bf8b222e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_datafusion/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [d_year,i_category_id,i_category,sum(ss_ext_sales_price)] + CometHashAggregate [d_year,i_category_id,i_category,sum(ss_ext_sales_price),sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [d_year,i_category_id,i_category] #1 + CometHashAggregate [d_year,i_category_id,i_category,sum,ss_ext_sales_price] + CometProject [d_year,ss_ext_sales_price,i_category_id,i_category] + CometBroadcastHashJoin [d_year,ss_item_sk,ss_ext_sales_price,i_item_sk,i_category_id,i_category] + CometProject [d_year,ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,d_year,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_year] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_category_id,i_category] #3 + CometProject [i_item_sk,i_category_id,i_category] + CometFilter [i_item_sk,i_category_id,i_category,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_category_id,i_category,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..27f73d9e42 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_iceberg_compat/explain.txt @@ -0,0 +1,111 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.date_dim (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometScan parquet spark_catalog.default.store_sales (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometScan parquet spark_catalog.default.item (9) + + +(1) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2], [d_date_sk#1, d_year#2] + +(4) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_year#2] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5], [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_category_id#8, i_category#9], [i_item_sk#7, i_category_id#8, i_category#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Arguments: [i_item_sk#7, i_category_id#8, i_category#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_category_id#8, i_category#9] +Arguments: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9], [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] + +(15) CometHashAggregate +Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] + +(16) CometExchange +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#11] +Arguments: hashpartitioning(d_year#2, i_category_id#8, i_category#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#11] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] + +(18) CometTakeOrderedAndProject +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#12] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[sum(ss_ext_sales_price)#12 DESC NULLS LAST,d_year#2 ASC NULLS FIRST,i_category_id#8 ASC NULLS FIRST,i_category#9 ASC NULLS FIRST], output=[d_year#2,i_category_id#8,i_category#9,sum(ss_ext_sales_price)#12]), [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#12], 100, [sum(ss_ext_sales_price)#12 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST, i_category#9 ASC NULLS FIRST], [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#12] + +(19) ColumnarToRow [codegen id : 1] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#12] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..40528f52d3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q42.native_iceberg_compat/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [d_year,i_category_id,i_category,sum(ss_ext_sales_price)] + CometHashAggregate [d_year,i_category_id,i_category,sum(ss_ext_sales_price),sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [d_year,i_category_id,i_category] #1 + CometHashAggregate [d_year,i_category_id,i_category,sum,ss_ext_sales_price] + CometProject [d_year,ss_ext_sales_price,i_category_id,i_category] + CometBroadcastHashJoin [d_year,ss_item_sk,ss_ext_sales_price,i_item_sk,i_category_id,i_category] + CometProject [d_year,ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,d_year,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_year] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_category_id,i_category] #3 + CometProject [i_item_sk,i_category_id,i_category] + CometFilter [i_item_sk,i_category_id,i_category,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_category_id,i_category,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_datafusion/explain.txt new file mode 100644 index 0000000000..a5550e3d37 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_datafusion/explain.txt @@ -0,0 +1,101 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Arguments: [d_date_sk#1, d_year#2, d_day_name#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Arguments: [d_date_sk#1, d_day_name#3], [d_date_sk#1, d_day_name#3] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_store_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_day_name#3] +Right output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_day_name#3, ss_store_sk#4, ss_sales_price#5], [d_day_name#3, ss_store_sk#4, ss_sales_price#5] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Arguments: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] + +(10) CometFilter +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Condition : ((isnotnull(s_gmt_offset#10) AND (s_gmt_offset#10 = -5.00)) AND isnotnull(s_store_sk#7)) + +(11) CometProject +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Arguments: [s_store_sk#7, s_store_id#8, s_store_name#9], [s_store_sk#7, s_store_id#8, s_store_name#9] + +(12) CometBroadcastExchange +Input [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: [s_store_sk#7, s_store_id#8, s_store_name#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5] +Right output [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: [ss_store_sk#4], [s_store_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9], [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] + +(15) CometHashAggregate +Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Keys [2]: [s_store_name#9, s_store_id#8] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] + +(16) CometExchange +Input [9]: [s_store_name#9, s_store_id#8, sum#11, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17] +Arguments: hashpartitioning(s_store_name#9, s_store_id#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [9]: [s_store_name#9, s_store_id#8, sum#11, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17] +Keys [2]: [s_store_name#9, s_store_id#8] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] + +(18) CometTakeOrderedAndProject +Input [9]: [s_store_name#9, s_store_id#8, sun_sales#18, mon_sales#19, tue_sales#20, wed_sales#21, thu_sales#22, fri_sales#23, sat_sales#24] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[s_store_name#9 ASC NULLS FIRST,s_store_id#8 ASC NULLS FIRST,sun_sales#18 ASC NULLS FIRST,mon_sales#19 ASC NULLS FIRST,tue_sales#20 ASC NULLS FIRST,wed_sales#21 ASC NULLS FIRST,thu_sales#22 ASC NULLS FIRST,fri_sales#23 ASC NULLS FIRST,sat_sales#24 ASC NULLS FIRST], output=[s_store_name#9,s_store_id#8,sun_sales#18,mon_sales#19,tue_sales#20,wed_sales#21,thu_sales#22,fri_sales#23,sat_sales#24]), [s_store_name#9, s_store_id#8, sun_sales#18, mon_sales#19, tue_sales#20, wed_sales#21, thu_sales#22, fri_sales#23, sat_sales#24], 100, [s_store_name#9 ASC NULLS FIRST, s_store_id#8 ASC NULLS FIRST, sun_sales#18 ASC NULLS FIRST, mon_sales#19 ASC NULLS FIRST, tue_sales#20 ASC NULLS FIRST, wed_sales#21 ASC NULLS FIRST, thu_sales#22 ASC NULLS FIRST, fri_sales#23 ASC NULLS FIRST, sat_sales#24 ASC NULLS FIRST], [s_store_name#9, s_store_id#8, sun_sales#18, mon_sales#19, tue_sales#20, wed_sales#21, thu_sales#22, fri_sales#23, sat_sales#24] + +(19) ColumnarToRow [codegen id : 1] +Input [9]: [s_store_name#9, s_store_id#8, sun_sales#18, mon_sales#19, tue_sales#20, wed_sales#21, thu_sales#22, fri_sales#23, sat_sales#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bb39c4233c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_datafusion/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + CometHashAggregate [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END))] + CometExchange [s_store_name,s_store_id] #1 + CometHashAggregate [s_store_name,s_store_id,sum,sum,sum,sum,sum,sum,sum,d_day_name,ss_sales_price] + CometProject [d_day_name,ss_sales_price,s_store_id,s_store_name] + CometBroadcastHashJoin [d_day_name,ss_store_sk,ss_sales_price,s_store_sk,s_store_id,s_store_name] + CometProject [d_day_name,ss_store_sk,ss_sales_price] + CometBroadcastHashJoin [d_date_sk,d_day_name,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_day_name] + CometFilter [d_date_sk,d_year,d_day_name] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_day_name] + CometBroadcastExchange [ss_store_sk,ss_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #3 + CometProject [s_store_sk,s_store_id,s_store_name] + CometFilter [s_store_sk,s_store_id,s_store_name,s_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id,s_store_name,s_gmt_offset] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..db7c6d8c3d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_iceberg_compat/explain.txt @@ -0,0 +1,111 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.date_dim (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometScan parquet spark_catalog.default.store_sales (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometScan parquet spark_catalog.default.store (9) + + +(1) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Arguments: [d_date_sk#1, d_day_name#3], [d_date_sk#1, d_day_name#3] + +(4) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_store_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_day_name#3] +Right output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_day_name#3, ss_store_sk#4, ss_sales_price#5], [d_day_name#3, ss_store_sk#4, ss_sales_price#5] + +(9) CometScan parquet spark_catalog.default.store +Output [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Condition : ((isnotnull(s_gmt_offset#10) AND (s_gmt_offset#10 = -5.00)) AND isnotnull(s_store_sk#7)) + +(11) CometProject +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Arguments: [s_store_sk#7, s_store_id#8, s_store_name#9], [s_store_sk#7, s_store_id#8, s_store_name#9] + +(12) CometBroadcastExchange +Input [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: [s_store_sk#7, s_store_id#8, s_store_name#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5] +Right output [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: [ss_store_sk#4], [s_store_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9], [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] + +(15) CometHashAggregate +Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Keys [2]: [s_store_name#9, s_store_id#8] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] + +(16) CometExchange +Input [9]: [s_store_name#9, s_store_id#8, sum#11, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17] +Arguments: hashpartitioning(s_store_name#9, s_store_id#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [9]: [s_store_name#9, s_store_id#8, sum#11, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17] +Keys [2]: [s_store_name#9, s_store_id#8] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] + +(18) CometTakeOrderedAndProject +Input [9]: [s_store_name#9, s_store_id#8, sun_sales#18, mon_sales#19, tue_sales#20, wed_sales#21, thu_sales#22, fri_sales#23, sat_sales#24] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[s_store_name#9 ASC NULLS FIRST,s_store_id#8 ASC NULLS FIRST,sun_sales#18 ASC NULLS FIRST,mon_sales#19 ASC NULLS FIRST,tue_sales#20 ASC NULLS FIRST,wed_sales#21 ASC NULLS FIRST,thu_sales#22 ASC NULLS FIRST,fri_sales#23 ASC NULLS FIRST,sat_sales#24 ASC NULLS FIRST], output=[s_store_name#9,s_store_id#8,sun_sales#18,mon_sales#19,tue_sales#20,wed_sales#21,thu_sales#22,fri_sales#23,sat_sales#24]), [s_store_name#9, s_store_id#8, sun_sales#18, mon_sales#19, tue_sales#20, wed_sales#21, thu_sales#22, fri_sales#23, sat_sales#24], 100, [s_store_name#9 ASC NULLS FIRST, s_store_id#8 ASC NULLS FIRST, sun_sales#18 ASC NULLS FIRST, mon_sales#19 ASC NULLS FIRST, tue_sales#20 ASC NULLS FIRST, wed_sales#21 ASC NULLS FIRST, thu_sales#22 ASC NULLS FIRST, fri_sales#23 ASC NULLS FIRST, sat_sales#24 ASC NULLS FIRST], [s_store_name#9, s_store_id#8, sun_sales#18, mon_sales#19, tue_sales#20, wed_sales#21, thu_sales#22, fri_sales#23, sat_sales#24] + +(19) ColumnarToRow [codegen id : 1] +Input [9]: [s_store_name#9, s_store_id#8, sun_sales#18, mon_sales#19, tue_sales#20, wed_sales#21, thu_sales#22, fri_sales#23, sat_sales#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..447131b776 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q43.native_iceberg_compat/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + CometHashAggregate [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END))] + CometExchange [s_store_name,s_store_id] #1 + CometHashAggregate [s_store_name,s_store_id,sum,sum,sum,sum,sum,sum,sum,d_day_name,ss_sales_price] + CometProject [d_day_name,ss_sales_price,s_store_id,s_store_name] + CometBroadcastHashJoin [d_day_name,ss_store_sk,ss_sales_price,s_store_sk,s_store_id,s_store_name] + CometProject [d_day_name,ss_store_sk,ss_sales_price] + CometBroadcastHashJoin [d_date_sk,d_day_name,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_day_name] + CometFilter [d_date_sk,d_year,d_day_name] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_day_name] + CometBroadcastExchange [ss_store_sk,ss_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #3 + CometProject [s_store_sk,s_store_id,s_store_name] + CometFilter [s_store_sk,s_store_id,s_store_name,s_gmt_offset] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_datafusion/explain.txt new file mode 100644 index 0000000000..3399127da3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_datafusion/explain.txt @@ -0,0 +1,261 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (32) + : : +- * SortMergeJoin Inner (31) + : : :- * Sort (17) + : : : +- * Project (16) + : : : +- * Filter (15) + : : : +- Window (14) + : : : +- WindowGroupLimit (13) + : : : +- * Sort (12) + : : : +- Exchange (11) + : : : +- WindowGroupLimit (10) + : : : +- * ColumnarToRow (9) + : : : +- CometSort (8) + : : : +- CometFilter (7) + : : : +- CometHashAggregate (6) + : : : +- CometExchange (5) + : : : +- CometHashAggregate (4) + : : : +- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- * Sort (30) + : : +- * Project (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- WindowGroupLimit (26) + : : +- * Sort (25) + : : +- Exchange (24) + : : +- WindowGroupLimit (23) + : : +- * ColumnarToRow (22) + : : +- CometSort (21) + : : +- CometFilter (20) + : : +- CometHashAggregate (19) + : : +- ReusedExchange (18) + : +- BroadcastExchange (36) + : +- * ColumnarToRow (35) + : +- CometFilter (34) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (33) + +- ReusedExchange (39) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) + +(3) CometProject +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_net_profit#3], [ss_item_sk#1, ss_net_profit#3] + +(4) CometHashAggregate +Input [2]: [ss_item_sk#1, ss_net_profit#3] +Keys [1]: [ss_item_sk#1] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] + +(5) CometExchange +Input [3]: [ss_item_sk#1, sum#5, count#6] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(6) CometHashAggregate +Input [3]: [ss_item_sk#1, sum#5, count#6] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] + +(7) CometFilter +Input [2]: [item_sk#7, rank_col#8] +Condition : (isnotnull(rank_col#8) AND (cast(rank_col#8 as decimal(13,7)) > (0.9 * Subquery scalar-subquery#9, [id=#10]))) + +(8) CometSort +Input [2]: [item_sk#7, rank_col#8] +Arguments: [item_sk#7, rank_col#8], [rank_col#8 ASC NULLS FIRST] + +(9) ColumnarToRow [codegen id : 1] +Input [2]: [item_sk#7, rank_col#8] + +(10) WindowGroupLimit +Input [2]: [item_sk#7, rank_col#8] +Arguments: [rank_col#8 ASC NULLS FIRST], rank(rank_col#8), 10, Partial + +(11) Exchange +Input [2]: [item_sk#7, rank_col#8] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 2] +Input [2]: [item_sk#7, rank_col#8] +Arguments: [rank_col#8 ASC NULLS FIRST], false, 0 + +(13) WindowGroupLimit +Input [2]: [item_sk#7, rank_col#8] +Arguments: [rank_col#8 ASC NULLS FIRST], rank(rank_col#8), 10, Final + +(14) Window +Input [2]: [item_sk#7, rank_col#8] +Arguments: [rank(rank_col#8) windowspecdefinition(rank_col#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#11], [rank_col#8 ASC NULLS FIRST] + +(15) Filter [codegen id : 3] +Input [3]: [item_sk#7, rank_col#8, rnk#11] +Condition : ((rnk#11 < 11) AND isnotnull(item_sk#7)) + +(16) Project [codegen id : 3] +Output [2]: [item_sk#7, rnk#11] +Input [3]: [item_sk#7, rank_col#8, rnk#11] + +(17) Sort [codegen id : 3] +Input [2]: [item_sk#7, rnk#11] +Arguments: [rnk#11 ASC NULLS FIRST], false, 0 + +(18) ReusedExchange [Reuses operator id: 5] +Output [3]: [ss_item_sk#12, sum#13, count#14] + +(19) CometHashAggregate +Input [3]: [ss_item_sk#12, sum#13, count#14] +Keys [1]: [ss_item_sk#12] +Functions [1]: [avg(UnscaledValue(ss_net_profit#15))] + +(20) CometFilter +Input [2]: [item_sk#16, rank_col#17] +Condition : (isnotnull(rank_col#17) AND (cast(rank_col#17 as decimal(13,7)) > (0.9 * ReusedSubquery Subquery scalar-subquery#9, [id=#10]))) + +(21) CometSort +Input [2]: [item_sk#16, rank_col#17] +Arguments: [item_sk#16, rank_col#17], [rank_col#17 DESC NULLS LAST] + +(22) ColumnarToRow [codegen id : 4] +Input [2]: [item_sk#16, rank_col#17] + +(23) WindowGroupLimit +Input [2]: [item_sk#16, rank_col#17] +Arguments: [rank_col#17 DESC NULLS LAST], rank(rank_col#17), 10, Partial + +(24) Exchange +Input [2]: [item_sk#16, rank_col#17] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] + +(25) Sort [codegen id : 5] +Input [2]: [item_sk#16, rank_col#17] +Arguments: [rank_col#17 DESC NULLS LAST], false, 0 + +(26) WindowGroupLimit +Input [2]: [item_sk#16, rank_col#17] +Arguments: [rank_col#17 DESC NULLS LAST], rank(rank_col#17), 10, Final + +(27) Window +Input [2]: [item_sk#16, rank_col#17] +Arguments: [rank(rank_col#17) windowspecdefinition(rank_col#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#18], [rank_col#17 DESC NULLS LAST] + +(28) Filter [codegen id : 6] +Input [3]: [item_sk#16, rank_col#17, rnk#18] +Condition : ((rnk#18 < 11) AND isnotnull(item_sk#16)) + +(29) Project [codegen id : 6] +Output [2]: [item_sk#16, rnk#18] +Input [3]: [item_sk#16, rank_col#17, rnk#18] + +(30) Sort [codegen id : 6] +Input [2]: [item_sk#16, rnk#18] +Arguments: [rnk#18 ASC NULLS FIRST], false, 0 + +(31) SortMergeJoin [codegen id : 9] +Left keys [1]: [rnk#11] +Right keys [1]: [rnk#18] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 9] +Output [3]: [item_sk#7, rnk#11, item_sk#16] +Input [4]: [item_sk#7, rnk#11, item_sk#16, rnk#18] + +(33) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#19, i_product_name#20] +Arguments: [i_item_sk#19, i_product_name#20] + +(34) CometFilter +Input [2]: [i_item_sk#19, i_product_name#20] +Condition : isnotnull(i_item_sk#19) + +(35) ColumnarToRow [codegen id : 7] +Input [2]: [i_item_sk#19, i_product_name#20] + +(36) BroadcastExchange +Input [2]: [i_item_sk#19, i_product_name#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [item_sk#7] +Right keys [1]: [i_item_sk#19] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [rnk#11, item_sk#16, i_product_name#20] +Input [5]: [item_sk#7, rnk#11, item_sk#16, i_item_sk#19, i_product_name#20] + +(39) ReusedExchange [Reuses operator id: 36] +Output [2]: [i_item_sk#21, i_product_name#22] + +(40) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [item_sk#16] +Right keys [1]: [i_item_sk#21] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 9] +Output [3]: [rnk#11, i_product_name#20 AS best_performing#23, i_product_name#22 AS worst_performing#24] +Input [5]: [rnk#11, item_sk#16, i_product_name#20, i_item_sk#21, i_product_name#22] + +(42) TakeOrderedAndProject +Input [3]: [rnk#11, best_performing#23, worst_performing#24] +Arguments: 100, [rnk#11 ASC NULLS FIRST], [rnk#11, best_performing#23, worst_performing#24] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 7 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +* ColumnarToRow (49) ++- CometHashAggregate (48) + +- CometExchange (47) + +- CometHashAggregate (46) + +- CometProject (45) + +- CometFilter (44) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (43) + + +(43) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_addr_sk#25, ss_store_sk#26, ss_net_profit#27, ss_sold_date_sk#28] +Arguments: [ss_addr_sk#25, ss_store_sk#26, ss_net_profit#27, ss_sold_date_sk#28] + +(44) CometFilter +Input [4]: [ss_addr_sk#25, ss_store_sk#26, ss_net_profit#27, ss_sold_date_sk#28] +Condition : ((isnotnull(ss_store_sk#26) AND (ss_store_sk#26 = 4)) AND isnull(ss_addr_sk#25)) + +(45) CometProject +Input [4]: [ss_addr_sk#25, ss_store_sk#26, ss_net_profit#27, ss_sold_date_sk#28] +Arguments: [ss_store_sk#26, ss_net_profit#27], [ss_store_sk#26, ss_net_profit#27] + +(46) CometHashAggregate +Input [2]: [ss_store_sk#26, ss_net_profit#27] +Keys [1]: [ss_store_sk#26] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#27))] + +(47) CometExchange +Input [3]: [ss_store_sk#26, sum#29, count#30] +Arguments: hashpartitioning(ss_store_sk#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(48) CometHashAggregate +Input [3]: [ss_store_sk#26, sum#29, count#30] +Keys [1]: [ss_store_sk#26] +Functions [1]: [avg(UnscaledValue(ss_net_profit#27))] + +(49) ColumnarToRow [codegen id : 1] +Input [1]: [rank_col#31] + +Subquery:2 Hosting operator id = 20 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6b6c66a16e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_datafusion/simplified.txt @@ -0,0 +1,72 @@ +TakeOrderedAndProject [rnk,best_performing,worst_performing] + WholeStageCodegen (9) + Project [rnk,i_product_name,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [rnk,item_sk,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [item_sk,rnk,item_sk] + SortMergeJoin [rnk,rnk] + InputAdapter + WholeStageCodegen (3) + Sort [rnk] + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WindowGroupLimit [rank_col] + WholeStageCodegen (2) + Sort [rank_col] + InputAdapter + Exchange #1 + WindowGroupLimit [rank_col] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [item_sk,rank_col] + CometFilter [item_sk,rank_col] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [rank_col,ss_store_sk,sum,count,avg(UnscaledValue(ss_net_profit))] + CometExchange [ss_store_sk] #3 + CometHashAggregate [ss_store_sk,sum,count,ss_net_profit] + CometProject [ss_store_sk,ss_net_profit] + CometFilter [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometHashAggregate [item_sk,rank_col,ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] + CometExchange [ss_item_sk] #2 + CometHashAggregate [ss_item_sk,sum,count,ss_net_profit] + CometProject [ss_item_sk,ss_net_profit] + CometFilter [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (6) + Sort [rnk] + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WindowGroupLimit [rank_col] + WholeStageCodegen (5) + Sort [rank_col] + InputAdapter + Exchange #4 + WindowGroupLimit [rank_col] + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometSort [item_sk,rank_col] + CometFilter [item_sk,rank_col] + ReusedSubquery [rank_col] #1 + CometHashAggregate [item_sk,rank_col,ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] + ReusedExchange [ss_item_sk,sum,count] #2 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometFilter [i_item_sk,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_product_name] + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..337a09591b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_iceberg_compat/explain.txt @@ -0,0 +1,270 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (32) + : : +- * SortMergeJoin Inner (31) + : : :- * Sort (17) + : : : +- * Project (16) + : : : +- * Filter (15) + : : : +- Window (14) + : : : +- WindowGroupLimit (13) + : : : +- * Sort (12) + : : : +- Exchange (11) + : : : +- WindowGroupLimit (10) + : : : +- * ColumnarToRow (9) + : : : +- CometSort (8) + : : : +- CometFilter (7) + : : : +- CometHashAggregate (6) + : : : +- CometExchange (5) + : : : +- CometHashAggregate (4) + : : : +- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : +- * Sort (30) + : : +- * Project (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- WindowGroupLimit (26) + : : +- * Sort (25) + : : +- Exchange (24) + : : +- WindowGroupLimit (23) + : : +- * ColumnarToRow (22) + : : +- CometSort (21) + : : +- CometFilter (20) + : : +- CometHashAggregate (19) + : : +- ReusedExchange (18) + : +- BroadcastExchange (36) + : +- * ColumnarToRow (35) + : +- CometFilter (34) + : +- CometScan parquet spark_catalog.default.item (33) + +- ReusedExchange (39) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) + +(3) CometProject +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_net_profit#3], [ss_item_sk#1, ss_net_profit#3] + +(4) CometHashAggregate +Input [2]: [ss_item_sk#1, ss_net_profit#3] +Keys [1]: [ss_item_sk#1] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] + +(5) CometExchange +Input [3]: [ss_item_sk#1, sum#5, count#6] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(6) CometHashAggregate +Input [3]: [ss_item_sk#1, sum#5, count#6] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] + +(7) CometFilter +Input [2]: [item_sk#7, rank_col#8] +Condition : (isnotnull(rank_col#8) AND (cast(rank_col#8 as decimal(13,7)) > (0.9 * Subquery scalar-subquery#9, [id=#10]))) + +(8) CometSort +Input [2]: [item_sk#7, rank_col#8] +Arguments: [item_sk#7, rank_col#8], [rank_col#8 ASC NULLS FIRST] + +(9) ColumnarToRow [codegen id : 1] +Input [2]: [item_sk#7, rank_col#8] + +(10) WindowGroupLimit +Input [2]: [item_sk#7, rank_col#8] +Arguments: [rank_col#8 ASC NULLS FIRST], rank(rank_col#8), 10, Partial + +(11) Exchange +Input [2]: [item_sk#7, rank_col#8] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 2] +Input [2]: [item_sk#7, rank_col#8] +Arguments: [rank_col#8 ASC NULLS FIRST], false, 0 + +(13) WindowGroupLimit +Input [2]: [item_sk#7, rank_col#8] +Arguments: [rank_col#8 ASC NULLS FIRST], rank(rank_col#8), 10, Final + +(14) Window +Input [2]: [item_sk#7, rank_col#8] +Arguments: [rank(rank_col#8) windowspecdefinition(rank_col#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#11], [rank_col#8 ASC NULLS FIRST] + +(15) Filter [codegen id : 3] +Input [3]: [item_sk#7, rank_col#8, rnk#11] +Condition : ((rnk#11 < 11) AND isnotnull(item_sk#7)) + +(16) Project [codegen id : 3] +Output [2]: [item_sk#7, rnk#11] +Input [3]: [item_sk#7, rank_col#8, rnk#11] + +(17) Sort [codegen id : 3] +Input [2]: [item_sk#7, rnk#11] +Arguments: [rnk#11 ASC NULLS FIRST], false, 0 + +(18) ReusedExchange [Reuses operator id: 5] +Output [3]: [ss_item_sk#12, sum#13, count#14] + +(19) CometHashAggregate +Input [3]: [ss_item_sk#12, sum#13, count#14] +Keys [1]: [ss_item_sk#12] +Functions [1]: [avg(UnscaledValue(ss_net_profit#15))] + +(20) CometFilter +Input [2]: [item_sk#16, rank_col#17] +Condition : (isnotnull(rank_col#17) AND (cast(rank_col#17 as decimal(13,7)) > (0.9 * ReusedSubquery Subquery scalar-subquery#9, [id=#10]))) + +(21) CometSort +Input [2]: [item_sk#16, rank_col#17] +Arguments: [item_sk#16, rank_col#17], [rank_col#17 DESC NULLS LAST] + +(22) ColumnarToRow [codegen id : 4] +Input [2]: [item_sk#16, rank_col#17] + +(23) WindowGroupLimit +Input [2]: [item_sk#16, rank_col#17] +Arguments: [rank_col#17 DESC NULLS LAST], rank(rank_col#17), 10, Partial + +(24) Exchange +Input [2]: [item_sk#16, rank_col#17] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] + +(25) Sort [codegen id : 5] +Input [2]: [item_sk#16, rank_col#17] +Arguments: [rank_col#17 DESC NULLS LAST], false, 0 + +(26) WindowGroupLimit +Input [2]: [item_sk#16, rank_col#17] +Arguments: [rank_col#17 DESC NULLS LAST], rank(rank_col#17), 10, Final + +(27) Window +Input [2]: [item_sk#16, rank_col#17] +Arguments: [rank(rank_col#17) windowspecdefinition(rank_col#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#18], [rank_col#17 DESC NULLS LAST] + +(28) Filter [codegen id : 6] +Input [3]: [item_sk#16, rank_col#17, rnk#18] +Condition : ((rnk#18 < 11) AND isnotnull(item_sk#16)) + +(29) Project [codegen id : 6] +Output [2]: [item_sk#16, rnk#18] +Input [3]: [item_sk#16, rank_col#17, rnk#18] + +(30) Sort [codegen id : 6] +Input [2]: [item_sk#16, rnk#18] +Arguments: [rnk#18 ASC NULLS FIRST], false, 0 + +(31) SortMergeJoin [codegen id : 9] +Left keys [1]: [rnk#11] +Right keys [1]: [rnk#18] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 9] +Output [3]: [item_sk#7, rnk#11, item_sk#16] +Input [4]: [item_sk#7, rnk#11, item_sk#16, rnk#18] + +(33) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#19, i_product_name#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(34) CometFilter +Input [2]: [i_item_sk#19, i_product_name#20] +Condition : isnotnull(i_item_sk#19) + +(35) ColumnarToRow [codegen id : 7] +Input [2]: [i_item_sk#19, i_product_name#20] + +(36) BroadcastExchange +Input [2]: [i_item_sk#19, i_product_name#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [item_sk#7] +Right keys [1]: [i_item_sk#19] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [rnk#11, item_sk#16, i_product_name#20] +Input [5]: [item_sk#7, rnk#11, item_sk#16, i_item_sk#19, i_product_name#20] + +(39) ReusedExchange [Reuses operator id: 36] +Output [2]: [i_item_sk#21, i_product_name#22] + +(40) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [item_sk#16] +Right keys [1]: [i_item_sk#21] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 9] +Output [3]: [rnk#11, i_product_name#20 AS best_performing#23, i_product_name#22 AS worst_performing#24] +Input [5]: [rnk#11, item_sk#16, i_product_name#20, i_item_sk#21, i_product_name#22] + +(42) TakeOrderedAndProject +Input [3]: [rnk#11, best_performing#23, worst_performing#24] +Arguments: 100, [rnk#11 ASC NULLS FIRST], [rnk#11, best_performing#23, worst_performing#24] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 7 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +* ColumnarToRow (49) ++- CometHashAggregate (48) + +- CometExchange (47) + +- CometHashAggregate (46) + +- CometProject (45) + +- CometFilter (44) + +- CometScan parquet spark_catalog.default.store_sales (43) + + +(43) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_addr_sk#25, ss_store_sk#26, ss_net_profit#27, ss_sold_date_sk#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)] +ReadSchema: struct + +(44) CometFilter +Input [4]: [ss_addr_sk#25, ss_store_sk#26, ss_net_profit#27, ss_sold_date_sk#28] +Condition : ((isnotnull(ss_store_sk#26) AND (ss_store_sk#26 = 4)) AND isnull(ss_addr_sk#25)) + +(45) CometProject +Input [4]: [ss_addr_sk#25, ss_store_sk#26, ss_net_profit#27, ss_sold_date_sk#28] +Arguments: [ss_store_sk#26, ss_net_profit#27], [ss_store_sk#26, ss_net_profit#27] + +(46) CometHashAggregate +Input [2]: [ss_store_sk#26, ss_net_profit#27] +Keys [1]: [ss_store_sk#26] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#27))] + +(47) CometExchange +Input [3]: [ss_store_sk#26, sum#29, count#30] +Arguments: hashpartitioning(ss_store_sk#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(48) CometHashAggregate +Input [3]: [ss_store_sk#26, sum#29, count#30] +Keys [1]: [ss_store_sk#26] +Functions [1]: [avg(UnscaledValue(ss_net_profit#27))] + +(49) ColumnarToRow [codegen id : 1] +Input [1]: [rank_col#31] + +Subquery:2 Hosting operator id = 20 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..58687dc84b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q44.native_iceberg_compat/simplified.txt @@ -0,0 +1,72 @@ +TakeOrderedAndProject [rnk,best_performing,worst_performing] + WholeStageCodegen (9) + Project [rnk,i_product_name,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [rnk,item_sk,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [item_sk,rnk,item_sk] + SortMergeJoin [rnk,rnk] + InputAdapter + WholeStageCodegen (3) + Sort [rnk] + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WindowGroupLimit [rank_col] + WholeStageCodegen (2) + Sort [rank_col] + InputAdapter + Exchange #1 + WindowGroupLimit [rank_col] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [item_sk,rank_col] + CometFilter [item_sk,rank_col] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [rank_col,ss_store_sk,sum,count,avg(UnscaledValue(ss_net_profit))] + CometExchange [ss_store_sk] #3 + CometHashAggregate [ss_store_sk,sum,count,ss_net_profit] + CometProject [ss_store_sk,ss_net_profit] + CometFilter [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometHashAggregate [item_sk,rank_col,ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] + CometExchange [ss_item_sk] #2 + CometHashAggregate [ss_item_sk,sum,count,ss_net_profit] + CometProject [ss_item_sk,ss_net_profit] + CometFilter [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (6) + Sort [rnk] + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WindowGroupLimit [rank_col] + WholeStageCodegen (5) + Sort [rank_col] + InputAdapter + Exchange #4 + WindowGroupLimit [rank_col] + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometSort [item_sk,rank_col] + CometFilter [item_sk,rank_col] + ReusedSubquery [rank_col] #1 + CometHashAggregate [item_sk,rank_col,ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] + ReusedExchange [ss_item_sk,sum,count] #2 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometFilter [i_item_sk,i_product_name] + CometScan parquet spark_catalog.default.item [i_item_sk,i_product_name] + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_datafusion/explain.txt new file mode 100644 index 0000000000..acee604c97 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_datafusion/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * Filter (31) + +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (30) + :- * ColumnarToRow (24) + : +- CometProject (23) + : +- CometBroadcastHashJoin (22) + : :- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (8) + : : +- CometBroadcastExchange (16) + : : +- CometProject (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : +- CometBroadcastExchange (21) + : +- CometFilter (20) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (19) + +- BroadcastExchange (29) + +- * ColumnarToRow (28) + +- CometProject (27) + +- CometFilter (26) + +- CometNativeScan: `spark_catalog`.`default`.`item` (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Arguments: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] + +(2) CometFilter +Input [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Condition : (isnotnull(ws_bill_customer_sk#3) AND isnotnull(ws_item_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: [c_customer_sk#6, c_current_addr_sk#7] + +(4) CometFilter +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Condition : (isnotnull(c_customer_sk#6) AND isnotnull(c_current_addr_sk#7)) + +(5) CometBroadcastExchange +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: [c_customer_sk#6, c_current_addr_sk#7] + +(6) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Right output [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: [ws_bill_customer_sk#3], [c_customer_sk#6], Inner, BuildRight + +(7) CometProject +Input [6]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5, c_customer_sk#6, c_current_addr_sk#7] +Arguments: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7], [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [ca_address_sk#8, ca_city#9, ca_zip#10] + +(9) CometFilter +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Condition : isnotnull(ca_address_sk#8) + +(10) CometBroadcastExchange +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [ca_address_sk#8, ca_city#9, ca_zip#10] + +(11) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7] +Right output [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + +(12) CometProject +Input [7]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7, ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10], [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Arguments: [d_date_sk#11, d_year#12, d_qoy#13] + +(14) CometFilter +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Condition : ((((isnotnull(d_qoy#13) AND isnotnull(d_year#12)) AND (d_qoy#13 = 2)) AND (d_year#12 = 2001)) AND isnotnull(d_date_sk#11)) + +(15) CometProject +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Arguments: [d_date_sk#11], [d_date_sk#11] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: [d_date_sk#11] + +(17) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10] +Right output [1]: [d_date_sk#11] +Arguments: [ws_sold_date_sk#5], [d_date_sk#11], Inner, BuildRight + +(18) CometProject +Input [6]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10, d_date_sk#11] +Arguments: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10], [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10] + +(19) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#14, i_item_id#15] +Arguments: [i_item_sk#14, i_item_id#15] + +(20) CometFilter +Input [2]: [i_item_sk#14, i_item_id#15] +Condition : isnotnull(i_item_sk#14) + +(21) CometBroadcastExchange +Input [2]: [i_item_sk#14, i_item_id#15] +Arguments: [i_item_sk#14, i_item_id#15] + +(22) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10] +Right output [2]: [i_item_sk#14, i_item_id#15] +Arguments: [ws_item_sk#2], [i_item_sk#14], Inner, BuildRight + +(23) CometProject +Input [6]: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10, i_item_sk#14, i_item_id#15] +Arguments: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15], [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15] + +(24) ColumnarToRow [codegen id : 2] +Input [4]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15] + +(25) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#16, i_item_id#17] +Arguments: [i_item_sk#16, i_item_id#17] + +(26) CometFilter +Input [2]: [i_item_sk#16, i_item_id#17] +Condition : i_item_sk#16 IN (2,3,5,7,11,13,17,19,23,29) + +(27) CometProject +Input [2]: [i_item_sk#16, i_item_id#17] +Arguments: [i_item_id#17], [i_item_id#17] + +(28) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_id#17] + +(29) BroadcastExchange +Input [1]: [i_item_id#17] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=1] + +(30) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [i_item_id#15] +Right keys [1]: [i_item_id#17] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(31) Filter [codegen id : 2] +Input [5]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15, exists#1] +Condition : (substr(ca_zip#10, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) + +(32) Project [codegen id : 2] +Output [3]: [ws_sales_price#4, ca_city#9, ca_zip#10] +Input [5]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15, exists#1] + +(33) HashAggregate [codegen id : 2] +Input [3]: [ws_sales_price#4, ca_city#9, ca_zip#10] +Keys [2]: [ca_zip#10, ca_city#9] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [ca_zip#10, ca_city#9, sum#19] + +(34) Exchange +Input [3]: [ca_zip#10, ca_city#9, sum#19] +Arguments: hashpartitioning(ca_zip#10, ca_city#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(35) HashAggregate [codegen id : 3] +Input [3]: [ca_zip#10, ca_city#9, sum#19] +Keys [2]: [ca_zip#10, ca_city#9] +Functions [1]: [sum(UnscaledValue(ws_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#4))#20] +Results [3]: [ca_zip#10, ca_city#9, MakeDecimal(sum(UnscaledValue(ws_sales_price#4))#20,17,2) AS sum(ws_sales_price)#21] + +(36) TakeOrderedAndProject +Input [3]: [ca_zip#10, ca_city#9, sum(ws_sales_price)#21] +Arguments: 100, [ca_zip#10 ASC NULLS FIRST, ca_city#9 ASC NULLS FIRST], [ca_zip#10, ca_city#9, sum(ws_sales_price)#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_datafusion/simplified.txt new file mode 100644 index 0000000000..68d48dbfa2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_datafusion/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] + WholeStageCodegen (3) + HashAggregate [ca_zip,ca_city,sum] [sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price),sum] + InputAdapter + Exchange [ca_zip,ca_city] #1 + WholeStageCodegen (2) + HashAggregate [ca_zip,ca_city,ws_sales_price] [sum,sum] + Project [ws_sales_price,ca_city,ca_zip] + Filter [ca_zip,exists] + BroadcastHashJoin [i_item_id,i_item_id] + ColumnarToRow + InputAdapter + CometProject [ws_sales_price,ca_city,ca_zip,i_item_id] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ca_city,ca_zip,i_item_sk,i_item_id] + CometProject [ws_item_sk,ws_sales_price,ca_city,ca_zip] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,ca_city,ca_zip,d_date_sk] + CometProject [ws_item_sk,ws_sales_price,ws_sold_date_sk,ca_city,ca_zip] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,c_current_addr_sk,ca_address_sk,ca_city,ca_zip] + CometProject [ws_item_sk,ws_sales_price,ws_sold_date_sk,c_current_addr_sk] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk,c_customer_sk,c_current_addr_sk] + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #2 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk,ca_city,ca_zip] #3 + CometFilter [ca_address_sk,ca_city,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_city,ca_zip] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [i_item_id] + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f52f95c712 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_iceberg_compat/explain.txt @@ -0,0 +1,212 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * Filter (31) + +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (30) + :- * ColumnarToRow (24) + : +- CometProject (23) + : +- CometBroadcastHashJoin (22) + : :- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.customer (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.customer_address (8) + : : +- CometBroadcastExchange (16) + : : +- CometProject (15) + : : +- CometFilter (14) + : : +- CometScan parquet spark_catalog.default.date_dim (13) + : +- CometBroadcastExchange (21) + : +- CometFilter (20) + : +- CometScan parquet spark_catalog.default.item (19) + +- BroadcastExchange (29) + +- * ColumnarToRow (28) + +- CometProject (27) + +- CometFilter (26) + +- CometScan parquet spark_catalog.default.item (25) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#5)] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Condition : (isnotnull(ws_bill_customer_sk#3) AND isnotnull(ws_item_sk#2)) + +(3) CometScan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#6, c_current_addr_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Condition : (isnotnull(c_customer_sk#6) AND isnotnull(c_current_addr_sk#7)) + +(5) CometBroadcastExchange +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: [c_customer_sk#6, c_current_addr_sk#7] + +(6) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Right output [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: [ws_bill_customer_sk#3], [c_customer_sk#6], Inner, BuildRight + +(7) CometProject +Input [6]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5, c_customer_sk#6, c_current_addr_sk#7] +Arguments: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7], [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7] + +(8) CometScan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Condition : isnotnull(ca_address_sk#8) + +(10) CometBroadcastExchange +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [ca_address_sk#8, ca_city#9, ca_zip#10] + +(11) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7] +Right output [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + +(12) CometProject +Input [7]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7, ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10], [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Condition : ((((isnotnull(d_qoy#13) AND isnotnull(d_year#12)) AND (d_qoy#13 = 2)) AND (d_year#12 = 2001)) AND isnotnull(d_date_sk#11)) + +(15) CometProject +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Arguments: [d_date_sk#11], [d_date_sk#11] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: [d_date_sk#11] + +(17) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10] +Right output [1]: [d_date_sk#11] +Arguments: [ws_sold_date_sk#5], [d_date_sk#11], Inner, BuildRight + +(18) CometProject +Input [6]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10, d_date_sk#11] +Arguments: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10], [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10] + +(19) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#14, i_item_id#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) CometFilter +Input [2]: [i_item_sk#14, i_item_id#15] +Condition : isnotnull(i_item_sk#14) + +(21) CometBroadcastExchange +Input [2]: [i_item_sk#14, i_item_id#15] +Arguments: [i_item_sk#14, i_item_id#15] + +(22) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10] +Right output [2]: [i_item_sk#14, i_item_id#15] +Arguments: [ws_item_sk#2], [i_item_sk#14], Inner, BuildRight + +(23) CometProject +Input [6]: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10, i_item_sk#14, i_item_id#15] +Arguments: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15], [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15] + +(24) ColumnarToRow [codegen id : 2] +Input [4]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15] + +(25) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#16, i_item_id#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_item_sk, [11,13,17,19,2,23,29,3,5,7])] +ReadSchema: struct + +(26) CometFilter +Input [2]: [i_item_sk#16, i_item_id#17] +Condition : i_item_sk#16 IN (2,3,5,7,11,13,17,19,23,29) + +(27) CometProject +Input [2]: [i_item_sk#16, i_item_id#17] +Arguments: [i_item_id#17], [i_item_id#17] + +(28) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_id#17] + +(29) BroadcastExchange +Input [1]: [i_item_id#17] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=1] + +(30) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [i_item_id#15] +Right keys [1]: [i_item_id#17] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(31) Filter [codegen id : 2] +Input [5]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15, exists#1] +Condition : (substr(ca_zip#10, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) + +(32) Project [codegen id : 2] +Output [3]: [ws_sales_price#4, ca_city#9, ca_zip#10] +Input [5]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15, exists#1] + +(33) HashAggregate [codegen id : 2] +Input [3]: [ws_sales_price#4, ca_city#9, ca_zip#10] +Keys [2]: [ca_zip#10, ca_city#9] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [ca_zip#10, ca_city#9, sum#19] + +(34) Exchange +Input [3]: [ca_zip#10, ca_city#9, sum#19] +Arguments: hashpartitioning(ca_zip#10, ca_city#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(35) HashAggregate [codegen id : 3] +Input [3]: [ca_zip#10, ca_city#9, sum#19] +Keys [2]: [ca_zip#10, ca_city#9] +Functions [1]: [sum(UnscaledValue(ws_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#4))#20] +Results [3]: [ca_zip#10, ca_city#9, MakeDecimal(sum(UnscaledValue(ws_sales_price#4))#20,17,2) AS sum(ws_sales_price)#21] + +(36) TakeOrderedAndProject +Input [3]: [ca_zip#10, ca_city#9, sum(ws_sales_price)#21] +Arguments: 100, [ca_zip#10 ASC NULLS FIRST, ca_city#9 ASC NULLS FIRST], [ca_zip#10, ca_city#9, sum(ws_sales_price)#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..b60311a173 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q45.native_iceberg_compat/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] + WholeStageCodegen (3) + HashAggregate [ca_zip,ca_city,sum] [sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price),sum] + InputAdapter + Exchange [ca_zip,ca_city] #1 + WholeStageCodegen (2) + HashAggregate [ca_zip,ca_city,ws_sales_price] [sum,sum] + Project [ws_sales_price,ca_city,ca_zip] + Filter [ca_zip,exists] + BroadcastHashJoin [i_item_id,i_item_id] + ColumnarToRow + InputAdapter + CometProject [ws_sales_price,ca_city,ca_zip,i_item_id] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ca_city,ca_zip,i_item_sk,i_item_id] + CometProject [ws_item_sk,ws_sales_price,ca_city,ca_zip] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,ca_city,ca_zip,d_date_sk] + CometProject [ws_item_sk,ws_sales_price,ws_sold_date_sk,ca_city,ca_zip] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,c_current_addr_sk,ca_address_sk,ca_city,ca_zip] + CometProject [ws_item_sk,ws_sales_price,ws_sold_date_sk,c_current_addr_sk] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk,c_customer_sk,c_current_addr_sk] + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #2 + CometFilter [c_customer_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk,ca_city,ca_zip] #3 + CometFilter [ca_address_sk,ca_city,ca_zip] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city,ca_zip] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [i_item_id] + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_datafusion/explain.txt new file mode 100644 index 0000000000..1740385afd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_datafusion/explain.txt @@ -0,0 +1,199 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometTakeOrderedAndProject (37) + +- CometProject (36) + +- CometBroadcastHashJoin (35) + :- CometProject (33) + : +- CometBroadcastHashJoin (32) + : :- CometHashAggregate (28) + : : +- CometExchange (27) + : : +- CometHashAggregate (26) + : : +- CometProject (25) + : : +- CometBroadcastHashJoin (24) + : : :- CometProject (20) + : : : +- CometBroadcastHashJoin (19) + : : : :- CometProject (14) + : : : : +- CometBroadcastHashJoin (13) + : : : : :- CometProject (8) + : : : : : +- CometBroadcastHashJoin (7) + : : : : : :- CometFilter (2) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : +- CometBroadcastExchange (6) + : : : : : +- CometProject (5) + : : : : : +- CometFilter (4) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : : +- CometBroadcastExchange (12) + : : : : +- CometProject (11) + : : : : +- CometFilter (10) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : : : +- CometBroadcastExchange (18) + : : : +- CometProject (17) + : : : +- CometFilter (16) + : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + : : +- CometBroadcastExchange (23) + : : +- CometFilter (22) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (21) + : +- CometBroadcastExchange (31) + : +- CometFilter (30) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (29) + +- ReusedExchange (34) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Arguments: [d_date_sk#9, d_year#10, d_dow#11] + +(4) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : ((d_dow#11 IN (6,0) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(5) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#12, s_city#13] +Arguments: [s_store_sk#12, s_city#13] + +(10) CometFilter +Input [2]: [s_store_sk#12, s_city#13] +Condition : (s_city#13 IN (Fairview,Midway) AND isnotnull(s_store_sk#12)) + +(11) CometProject +Input [2]: [s_store_sk#12, s_city#13] +Arguments: [s_store_sk#12], [s_store_sk#12] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#12] +Arguments: [s_store_sk#12] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [1]: [s_store_sk#12] +Arguments: [ss_store_sk#4], [s_store_sk#12], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#12] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Arguments: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] + +(16) CometFilter +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : (((hd_dep_count#15 = 4) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) + +(17) CometProject +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Arguments: [hd_demo_sk#14], [hd_demo_sk#14] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#14] +Arguments: [hd_demo_sk#14] + +(19) CometBroadcastHashJoin +Left output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [1]: [hd_demo_sk#14] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#14], Inner, BuildRight + +(20) CometProject +Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#14] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(21) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#17, ca_city#18] +Arguments: [ca_address_sk#17, ca_city#18] + +(22) CometFilter +Input [2]: [ca_address_sk#17, ca_city#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_city#18)) + +(23) CometBroadcastExchange +Input [2]: [ca_address_sk#17, ca_city#18] +Arguments: [ca_address_sk#17, ca_city#18] + +(24) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [2]: [ca_address_sk#17, ca_city#18] +Arguments: [ss_addr_sk#3], [ca_address_sk#17], Inner, BuildRight + +(25) CometProject +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#17, ca_city#18] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] + +(26) CometHashAggregate +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] + +(27) CometExchange +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#19, sum#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(28) CometHashAggregate +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#19, sum#20] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] + +(29) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Arguments: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] + +(30) CometFilter +Input [4]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) + +(31) CometBroadcastExchange +Input [4]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Arguments: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] + +(32) CometBroadcastHashJoin +Left output [5]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#25, amt#26, profit#27] +Right output [4]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Arguments: [ss_customer_sk#1], [c_customer_sk#21], Inner, BuildRight + +(33) CometProject +Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#25, amt#26, profit#27, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Arguments: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#22, c_first_name#23, c_last_name#24], [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#22, c_first_name#23, c_last_name#24] + +(34) ReusedExchange [Reuses operator id: 23] +Output [2]: [ca_address_sk#28, ca_city#29] + +(35) CometBroadcastHashJoin +Left output [7]: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Right output [2]: [ca_address_sk#28, ca_city#29] +Arguments: [c_current_addr_sk#22], [ca_address_sk#28], Inner, NOT (ca_city#29 = bought_city#25), BuildRight + +(36) CometProject +Input [9]: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#22, c_first_name#23, c_last_name#24, ca_address_sk#28, ca_city#29] +Arguments: [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27], [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27] + +(37) CometTakeOrderedAndProject +Input [7]: [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_last_name#24 ASC NULLS FIRST,c_first_name#23 ASC NULLS FIRST,ca_city#29 ASC NULLS FIRST,bought_city#25 ASC NULLS FIRST,ss_ticket_number#5 ASC NULLS FIRST], output=[c_last_name#24,c_first_name#23,ca_city#29,bought_city#25,ss_ticket_number#5,amt#26,profit#27]), [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27], 100, [c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, ca_city#29 ASC NULLS FIRST, bought_city#25 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27] + +(38) ColumnarToRow [codegen id : 1] +Input [7]: [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_datafusion/simplified.txt new file mode 100644 index 0000000000..cf50411300 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_datafusion/simplified.txt @@ -0,0 +1,40 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + CometProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + CometBroadcastHashJoin [ss_ticket_number,bought_city,amt,profit,c_current_addr_sk,c_first_name,c_last_name,ca_address_sk,ca_city] + CometProject [ss_ticket_number,bought_city,amt,profit,c_current_addr_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,bought_city,amt,profit,c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + CometHashAggregate [ss_ticket_number,ss_customer_sk,bought_city,amt,profit,ss_addr_sk,ca_city,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + CometExchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + CometHashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum,ss_coupon_amt,ss_net_profit] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ca_city] + CometBroadcastHashJoin [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ca_address_sk,ca_city] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dow] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dow] + CometBroadcastExchange [s_store_sk] #3 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_city] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_city] + CometBroadcastExchange [hd_demo_sk] #4 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [ca_address_sk,ca_city] #5 + CometFilter [ca_address_sk,ca_city] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_city] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] #6 + CometFilter [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..907bb19241 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_iceberg_compat/explain.txt @@ -0,0 +1,218 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometTakeOrderedAndProject (37) + +- CometProject (36) + +- CometBroadcastHashJoin (35) + :- CometProject (33) + : +- CometBroadcastHashJoin (32) + : :- CometHashAggregate (28) + : : +- CometExchange (27) + : : +- CometHashAggregate (26) + : : +- CometProject (25) + : : +- CometBroadcastHashJoin (24) + : : :- CometProject (20) + : : : +- CometBroadcastHashJoin (19) + : : : :- CometProject (14) + : : : : +- CometBroadcastHashJoin (13) + : : : : :- CometProject (8) + : : : : : +- CometBroadcastHashJoin (7) + : : : : : :- CometFilter (2) + : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : +- CometBroadcastExchange (6) + : : : : : +- CometProject (5) + : : : : : +- CometFilter (4) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : : : +- CometBroadcastExchange (12) + : : : : +- CometProject (11) + : : : : +- CometFilter (10) + : : : : +- CometScan parquet spark_catalog.default.store (9) + : : : +- CometBroadcastExchange (18) + : : : +- CometProject (17) + : : : +- CometFilter (16) + : : : +- CometScan parquet spark_catalog.default.household_demographics (15) + : : +- CometBroadcastExchange (23) + : : +- CometFilter (22) + : : +- CometScan parquet spark_catalog.default.customer_address (21) + : +- CometBroadcastExchange (31) + : +- CometFilter (30) + : +- CometScan parquet spark_catalog.default.customer (29) + +- ReusedExchange (34) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_customer_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_dow, [0,6]), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : ((d_dow#11 IN (6,0) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(5) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(9) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#12, s_city#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [s_store_sk#12, s_city#13] +Condition : (s_city#13 IN (Fairview,Midway) AND isnotnull(s_store_sk#12)) + +(11) CometProject +Input [2]: [s_store_sk#12, s_city#13] +Arguments: [s_store_sk#12], [s_store_sk#12] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#12] +Arguments: [s_store_sk#12] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [1]: [s_store_sk#12] +Arguments: [ss_store_sk#4], [s_store_sk#12], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#12] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(15) CometScan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(16) CometFilter +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : (((hd_dep_count#15 = 4) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) + +(17) CometProject +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Arguments: [hd_demo_sk#14], [hd_demo_sk#14] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#14] +Arguments: [hd_demo_sk#14] + +(19) CometBroadcastHashJoin +Left output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [1]: [hd_demo_sk#14] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#14], Inner, BuildRight + +(20) CometProject +Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#14] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(21) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_city#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(22) CometFilter +Input [2]: [ca_address_sk#17, ca_city#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_city#18)) + +(23) CometBroadcastExchange +Input [2]: [ca_address_sk#17, ca_city#18] +Arguments: [ca_address_sk#17, ca_city#18] + +(24) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [2]: [ca_address_sk#17, ca_city#18] +Arguments: [ss_addr_sk#3], [ca_address_sk#17], Inner, BuildRight + +(25) CometProject +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#17, ca_city#18] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] + +(26) CometHashAggregate +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] + +(27) CometExchange +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#19, sum#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(28) CometHashAggregate +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#19, sum#20] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] + +(29) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(30) CometFilter +Input [4]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) + +(31) CometBroadcastExchange +Input [4]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Arguments: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] + +(32) CometBroadcastHashJoin +Left output [5]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#25, amt#26, profit#27] +Right output [4]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Arguments: [ss_customer_sk#1], [c_customer_sk#21], Inner, BuildRight + +(33) CometProject +Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#25, amt#26, profit#27, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Arguments: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#22, c_first_name#23, c_last_name#24], [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#22, c_first_name#23, c_last_name#24] + +(34) ReusedExchange [Reuses operator id: 23] +Output [2]: [ca_address_sk#28, ca_city#29] + +(35) CometBroadcastHashJoin +Left output [7]: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#22, c_first_name#23, c_last_name#24] +Right output [2]: [ca_address_sk#28, ca_city#29] +Arguments: [c_current_addr_sk#22], [ca_address_sk#28], Inner, NOT (ca_city#29 = bought_city#25), BuildRight + +(36) CometProject +Input [9]: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#22, c_first_name#23, c_last_name#24, ca_address_sk#28, ca_city#29] +Arguments: [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27], [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27] + +(37) CometTakeOrderedAndProject +Input [7]: [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_last_name#24 ASC NULLS FIRST,c_first_name#23 ASC NULLS FIRST,ca_city#29 ASC NULLS FIRST,bought_city#25 ASC NULLS FIRST,ss_ticket_number#5 ASC NULLS FIRST], output=[c_last_name#24,c_first_name#23,ca_city#29,bought_city#25,ss_ticket_number#5,amt#26,profit#27]), [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27], 100, [c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, ca_city#29 ASC NULLS FIRST, bought_city#25 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27] + +(38) ColumnarToRow [codegen id : 1] +Input [7]: [c_last_name#24, c_first_name#23, ca_city#29, bought_city#25, ss_ticket_number#5, amt#26, profit#27] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..04f6b0a121 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q46.native_iceberg_compat/simplified.txt @@ -0,0 +1,40 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + CometProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + CometBroadcastHashJoin [ss_ticket_number,bought_city,amt,profit,c_current_addr_sk,c_first_name,c_last_name,ca_address_sk,ca_city] + CometProject [ss_ticket_number,bought_city,amt,profit,c_current_addr_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,bought_city,amt,profit,c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + CometHashAggregate [ss_ticket_number,ss_customer_sk,bought_city,amt,profit,ss_addr_sk,ca_city,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + CometExchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + CometHashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum,ss_coupon_amt,ss_net_profit] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ca_city] + CometBroadcastHashJoin [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ca_address_sk,ca_city] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dow] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow] + CometBroadcastExchange [s_store_sk] #3 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_city] + CometScan parquet spark_catalog.default.store [s_store_sk,s_city] + CometBroadcastExchange [hd_demo_sk] #4 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [ca_address_sk,ca_city] #5 + CometFilter [ca_address_sk,ca_city] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] #6 + CometFilter [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_datafusion/explain.txt new file mode 100644 index 0000000000..02904e4ff1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_datafusion/explain.txt @@ -0,0 +1,243 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (28) + : : +- * Filter (27) + : : +- Window (26) + : : +- * Filter (25) + : : +- Window (24) + : : +- * ColumnarToRow (23) + : : +- CometSort (22) + : : +- CometExchange (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (13) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Arguments: [i_item_sk#1, i_brand#2, i_category#3] + +(2) CometFilter +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(4) CometFilter +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) + +(5) CometBroadcastExchange +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Right output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [i_item_sk#1], [ss_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7], [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(9) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(11) CometBroadcastHashJoin +Left output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] +Arguments: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10], [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] + +(13) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(14) CometFilter +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Condition : ((isnotnull(s_store_sk#11) AND isnotnull(s_store_name#12)) AND isnotnull(s_company_name#13)) + +(15) CometBroadcastExchange +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(16) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] +Right output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [ss_store_sk#5], [s_store_sk#11], Inner, BuildRight + +(17) CometProject +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10, s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13], [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] + +(18) CometHashAggregate +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] + +(19) CometExchange +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#14] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(20) CometHashAggregate +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#14] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] + +(21) CometExchange +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(22) CometSort +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16], [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST, s_company_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(23) ColumnarToRow [codegen id : 1] +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] + +(24) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#17], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(25) Filter [codegen id : 2] +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(26) Window +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17] +Arguments: [avg(_w0#16) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#18], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9] + +(27) Filter [codegen id : 7] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] +Condition : ((isnotnull(avg_monthly_sales#18) AND (avg_monthly_sales#18 > 0.000000)) AND CASE WHEN (avg_monthly_sales#18 > 0.000000) THEN ((abs((sum_sales#15 - avg_monthly_sales#18)) / avg_monthly_sales#18) > 0.1000000000000000) END) + +(28) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] + +(29) ReusedExchange [Reuses operator id: 19] +Output [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum#25] + +(30) CometHashAggregate +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum#25] +Keys [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24] +Functions [1]: [sum(UnscaledValue(ss_sales_price#26))] + +(31) CometExchange +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15], [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(33) ColumnarToRow [codegen id : 3] +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] + +(34) Window +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: [rank(d_year#23, d_moy#24) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#27], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(35) Project [codegen id : 4] +Output [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#15 AS sum_sales#28, rn#27] +Input [8]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15, rn#27] + +(36) BroadcastExchange +Input [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#28, rn#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=4] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#17] +Right keys [5]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#27 + 1)] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#28] +Input [15]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#28, rn#27] + +(39) ReusedExchange [Reuses operator id: 31] +Output [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] + +(40) CometSort +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] +Arguments: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15], [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST, s_company_name#32 ASC NULLS FIRST, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] + +(41) ColumnarToRow [codegen id : 5] +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] + +(42) Window +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] +Arguments: [rank(d_year#33, d_moy#34) windowspecdefinition(i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#29, i_brand#30, s_store_name#31, s_company_name#32], [d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] + +(43) Project [codegen id : 6] +Output [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#15 AS sum_sales#36, rn#35] +Input [8]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15, rn#35] + +(44) BroadcastExchange +Input [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#36, rn#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 7] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#17] +Right keys [5]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, (rn#35 - 1)] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, sum_sales#28 AS psum#37, sum_sales#36 AS nsum#38] +Input [16]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#28, i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#36, rn#35] + +(47) TakeOrderedAndProject +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, psum#37, nsum#38] +Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, psum#37, nsum#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_datafusion/simplified.txt new file mode 100644 index 0000000000..7594a9aaff --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_datafusion/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_brand,s_company_name,d_year,d_moy,psum,nsum] + WholeStageCodegen (7) + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] + WholeStageCodegen (2) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,_w0] + CometExchange [i_category,i_brand,s_store_name,s_company_name] #1 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,_w0,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum,ss_sales_price] + CometProject [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] + CometBroadcastHashJoin [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy,s_store_sk,s_store_name,s_company_name] + CometProject [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] + CometBroadcastHashJoin [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_moy] + CometProject [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_category,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_category] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_name] #5 + CometFilter [s_store_sk,s_store_name,s_company_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] + CometExchange [i_category,i_brand,s_store_name,s_company_name] #7 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(ss_sales_price))] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f311d1bff0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_iceberg_compat/explain.txt @@ -0,0 +1,256 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (28) + : : +- * Filter (27) + : : +- Window (26) + : : +- * Filter (25) + : : +- Window (24) + : : +- * ColumnarToRow (23) + : : +- CometSort (22) + : : +- CometExchange (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.item (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.store_sales (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometScan parquet spark_catalog.default.store (13) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) + + +(1) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) + +(5) CometBroadcastExchange +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Right output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [i_item_sk#1], [ss_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7], [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(11) CometBroadcastHashJoin +Left output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] +Arguments: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10], [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] + +(13) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] +ReadSchema: struct + +(14) CometFilter +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Condition : ((isnotnull(s_store_sk#11) AND isnotnull(s_store_name#12)) AND isnotnull(s_company_name#13)) + +(15) CometBroadcastExchange +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(16) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] +Right output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [ss_store_sk#5], [s_store_sk#11], Inner, BuildRight + +(17) CometProject +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10, s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13], [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] + +(18) CometHashAggregate +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] + +(19) CometExchange +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#14] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(20) CometHashAggregate +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#14] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] + +(21) CometExchange +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(22) CometSort +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16], [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST, s_company_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(23) ColumnarToRow [codegen id : 1] +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] + +(24) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#17], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(25) Filter [codegen id : 2] +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(26) Window +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17] +Arguments: [avg(_w0#16) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#18], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9] + +(27) Filter [codegen id : 7] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] +Condition : ((isnotnull(avg_monthly_sales#18) AND (avg_monthly_sales#18 > 0.000000)) AND CASE WHEN (avg_monthly_sales#18 > 0.000000) THEN ((abs((sum_sales#15 - avg_monthly_sales#18)) / avg_monthly_sales#18) > 0.1000000000000000) END) + +(28) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] + +(29) ReusedExchange [Reuses operator id: 19] +Output [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum#25] + +(30) CometHashAggregate +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum#25] +Keys [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24] +Functions [1]: [sum(UnscaledValue(ss_sales_price#26))] + +(31) CometExchange +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15], [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(33) ColumnarToRow [codegen id : 3] +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] + +(34) Window +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: [rank(d_year#23, d_moy#24) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#27], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(35) Project [codegen id : 4] +Output [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#15 AS sum_sales#28, rn#27] +Input [8]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15, rn#27] + +(36) BroadcastExchange +Input [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#28, rn#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=4] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#17] +Right keys [5]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#27 + 1)] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#28] +Input [15]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#28, rn#27] + +(39) ReusedExchange [Reuses operator id: 31] +Output [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] + +(40) CometSort +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] +Arguments: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15], [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST, s_company_name#32 ASC NULLS FIRST, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] + +(41) ColumnarToRow [codegen id : 5] +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] + +(42) Window +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] +Arguments: [rank(d_year#33, d_moy#34) windowspecdefinition(i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#29, i_brand#30, s_store_name#31, s_company_name#32], [d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] + +(43) Project [codegen id : 6] +Output [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#15 AS sum_sales#36, rn#35] +Input [8]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15, rn#35] + +(44) BroadcastExchange +Input [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#36, rn#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 7] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#17] +Right keys [5]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, (rn#35 - 1)] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, sum_sales#28 AS psum#37, sum_sales#36 AS nsum#38] +Input [16]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#28, i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#36, rn#35] + +(47) TakeOrderedAndProject +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, psum#37, nsum#38] +Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, psum#37, nsum#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c6a15d2f61 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_brand,s_company_name,d_year,d_moy,psum,nsum] + WholeStageCodegen (7) + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] + WholeStageCodegen (2) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,_w0] + CometExchange [i_category,i_brand,s_store_name,s_company_name] #1 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,_w0,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum,ss_sales_price] + CometProject [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] + CometBroadcastHashJoin [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy,s_store_sk,s_store_name,s_company_name] + CometProject [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] + CometBroadcastHashJoin [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_moy] + CometProject [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_category,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_name] #5 + CometFilter [s_store_sk,s_store_name,s_company_name] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] + CometExchange [i_category,i_brand,s_store_name,s_company_name] #7 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(ss_sales_price))] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_datafusion/explain.txt new file mode 100644 index 0000000000..ea30295898 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_datafusion/explain.txt @@ -0,0 +1,148 @@ +== Physical Plan == +* ColumnarToRow (28) ++- CometHashAggregate (27) + +- CometExchange (26) + +- CometHashAggregate (25) + +- CometProject (24) + +- CometBroadcastHashJoin (23) + :- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (13) + +- CometBroadcastExchange (22) + +- CometProject (21) + +- CometFilter (20) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (19) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((((isnotnull(ss_store_sk#3) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_addr_sk#2)) AND ((((ss_sales_price#5 >= 100.00) AND (ss_sales_price#5 <= 150.00)) OR ((ss_sales_price#5 >= 50.00) AND (ss_sales_price#5 <= 100.00))) OR ((ss_sales_price#5 >= 150.00) AND (ss_sales_price#5 <= 200.00)))) AND ((((ss_net_profit#6 >= 0.00) AND (ss_net_profit#6 <= 2000.00)) OR ((ss_net_profit#6 >= 150.00) AND (ss_net_profit#6 <= 3000.00))) OR ((ss_net_profit#6 >= 50.00) AND (ss_net_profit#6 <= 25000.00)))) + +(3) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#8] +Arguments: [s_store_sk#8] + +(4) CometFilter +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(5) CometBroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: [s_store_sk#8] + +(6) CometBroadcastHashJoin +Left output [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [1]: [s_store_sk#8] +Arguments: [ss_store_sk#3], [s_store_sk#8], Inner, BuildRight + +(7) CometProject +Input [8]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, s_store_sk#8] +Arguments: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7], [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + +(9) CometFilter +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Condition : (isnotnull(cd_demo_sk#9) AND ((((cd_marital_status#10 = M) AND (cd_education_status#11 = 4 yr Degree )) OR ((cd_marital_status#10 = D) AND (cd_education_status#11 = 2 yr Degree ))) OR ((cd_marital_status#10 = S) AND (cd_education_status#11 = College )))) + +(10) CometBroadcastExchange +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + +(11) CometBroadcastHashJoin +Left output [6]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [ss_cdemo_sk#1], [cd_demo_sk#9], Inner, ((((((cd_marital_status#10 = M) AND (cd_education_status#11 = 4 yr Degree )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#10 = D) AND (cd_education_status#11 = 2 yr Degree )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#10 = S) AND (cd_education_status#11 = College )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00))), BuildRight + +(12) CometProject +Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7], [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Arguments: [ca_address_sk#12, ca_state#13, ca_country#14] + +(14) CometFilter +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Condition : (((isnotnull(ca_country#14) AND (ca_country#14 = United States)) AND isnotnull(ca_address_sk#12)) AND ((ca_state#13 IN (CO,OH,TX) OR ca_state#13 IN (OR,MN,KY)) OR ca_state#13 IN (VA,CA,MS))) + +(15) CometProject +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Arguments: [ca_address_sk#12, ca_state#13], [ca_address_sk#12, ca_state#13] + +(16) CometBroadcastExchange +Input [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ca_address_sk#12, ca_state#13] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7] +Right output [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ss_addr_sk#2], [ca_address_sk#12], Inner, ((((ca_state#13 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#13 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#13 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00))), BuildRight + +(18) CometProject +Input [6]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7, ca_address_sk#12, ca_state#13] +Arguments: [ss_quantity#4, ss_sold_date_sk#7], [ss_quantity#4, ss_sold_date_sk#7] + +(19) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15, d_year#16] + +(20) CometFilter +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(21) CometProject +Input [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15], [d_date_sk#15] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: [d_date_sk#15] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_quantity#4, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#15] +Arguments: [ss_sold_date_sk#7], [d_date_sk#15], Inner, BuildRight + +(24) CometProject +Input [3]: [ss_quantity#4, ss_sold_date_sk#7, d_date_sk#15] +Arguments: [ss_quantity#4], [ss_quantity#4] + +(25) CometHashAggregate +Input [1]: [ss_quantity#4] +Keys: [] +Functions [1]: [partial_sum(ss_quantity#4)] + +(26) CometExchange +Input [1]: [sum#17] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [1]: [sum#17] +Keys: [] +Functions [1]: [sum(ss_quantity#4)] + +(28) ColumnarToRow [codegen id : 1] +Input [1]: [sum(ss_quantity)#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_datafusion/simplified.txt new file mode 100644 index 0000000000..aea9aa7cf6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_datafusion/simplified.txt @@ -0,0 +1,30 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [sum(ss_quantity),sum,sum(ss_quantity)] + CometExchange #1 + CometHashAggregate [sum,ss_quantity] + CometProject [ss_quantity] + CometBroadcastHashJoin [ss_quantity,ss_sold_date_sk,d_date_sk] + CometProject [ss_quantity,ss_sold_date_sk] + CometBroadcastHashJoin [ss_addr_sk,ss_quantity,ss_net_profit,ss_sold_date_sk,ca_address_sk,ca_state] + CometProject [ss_addr_sk,ss_quantity,ss_net_profit,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk,s_store_sk] + CometFilter [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk] #2 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #3 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [ca_address_sk,ca_state] #4 + CometProject [ca_address_sk,ca_state] + CometFilter [ca_address_sk,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..bd7ffa3f59 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_iceberg_compat/explain.txt @@ -0,0 +1,164 @@ +== Physical Plan == +* ColumnarToRow (28) ++- CometHashAggregate (27) + +- CometExchange (26) + +- CometHashAggregate (25) + +- CometProject (24) + +- CometBroadcastHashJoin (23) + :- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.store (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometScan parquet spark_catalog.default.customer_demographics (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometScan parquet spark_catalog.default.customer_address (13) + +- CometBroadcastExchange (22) + +- CometProject (21) + +- CometFilter (20) + +- CometScan parquet spark_catalog.default.date_dim (19) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ss_net_profit,0.00),LessThanOrEqual(ss_net_profit,2000.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,3000.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,25000.00)))] +ReadSchema: struct + +(2) CometFilter +Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((((isnotnull(ss_store_sk#3) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_addr_sk#2)) AND ((((ss_sales_price#5 >= 100.00) AND (ss_sales_price#5 <= 150.00)) OR ((ss_sales_price#5 >= 50.00) AND (ss_sales_price#5 <= 100.00))) OR ((ss_sales_price#5 >= 150.00) AND (ss_sales_price#5 <= 200.00)))) AND ((((ss_net_profit#6 >= 0.00) AND (ss_net_profit#6 <= 2000.00)) OR ((ss_net_profit#6 >= 150.00) AND (ss_net_profit#6 <= 3000.00))) OR ((ss_net_profit#6 >= 50.00) AND (ss_net_profit#6 <= 25000.00)))) + +(3) CometScan parquet spark_catalog.default.store +Output [1]: [s_store_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(4) CometFilter +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(5) CometBroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: [s_store_sk#8] + +(6) CometBroadcastHashJoin +Left output [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [1]: [s_store_sk#8] +Arguments: [ss_store_sk#3], [s_store_sk#8], Inner, BuildRight + +(7) CometProject +Input [8]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, s_store_sk#8] +Arguments: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7], [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(8) CometScan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,4 yr Degree )),And(EqualTo(cd_marital_status,D),EqualTo(cd_education_status,2 yr Degree ))),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College )))] +ReadSchema: struct + +(9) CometFilter +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Condition : (isnotnull(cd_demo_sk#9) AND ((((cd_marital_status#10 = M) AND (cd_education_status#11 = 4 yr Degree )) OR ((cd_marital_status#10 = D) AND (cd_education_status#11 = 2 yr Degree ))) OR ((cd_marital_status#10 = S) AND (cd_education_status#11 = College )))) + +(10) CometBroadcastExchange +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + +(11) CometBroadcastHashJoin +Left output [6]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [ss_cdemo_sk#1], [cd_demo_sk#9], Inner, ((((((cd_marital_status#10 = M) AND (cd_education_status#11 = 4 yr Degree )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#10 = D) AND (cd_education_status#11 = 2 yr Degree )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#10 = S) AND (cd_education_status#11 = College )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00))), BuildRight + +(12) CometProject +Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7], [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7] + +(13) CometScan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [CO,OH,TX]),In(ca_state, [KY,MN,OR])),In(ca_state, [CA,MS,VA]))] +ReadSchema: struct + +(14) CometFilter +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Condition : (((isnotnull(ca_country#14) AND (ca_country#14 = United States)) AND isnotnull(ca_address_sk#12)) AND ((ca_state#13 IN (CO,OH,TX) OR ca_state#13 IN (OR,MN,KY)) OR ca_state#13 IN (VA,CA,MS))) + +(15) CometProject +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Arguments: [ca_address_sk#12, ca_state#13], [ca_address_sk#12, ca_state#13] + +(16) CometBroadcastExchange +Input [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ca_address_sk#12, ca_state#13] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7] +Right output [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ss_addr_sk#2], [ca_address_sk#12], Inner, ((((ca_state#13 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#13 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#13 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00))), BuildRight + +(18) CometProject +Input [6]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7, ca_address_sk#12, ca_state#13] +Arguments: [ss_quantity#4, ss_sold_date_sk#7], [ss_quantity#4, ss_sold_date_sk#7] + +(19) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(20) CometFilter +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(21) CometProject +Input [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15], [d_date_sk#15] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: [d_date_sk#15] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_quantity#4, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#15] +Arguments: [ss_sold_date_sk#7], [d_date_sk#15], Inner, BuildRight + +(24) CometProject +Input [3]: [ss_quantity#4, ss_sold_date_sk#7, d_date_sk#15] +Arguments: [ss_quantity#4], [ss_quantity#4] + +(25) CometHashAggregate +Input [1]: [ss_quantity#4] +Keys: [] +Functions [1]: [partial_sum(ss_quantity#4)] + +(26) CometExchange +Input [1]: [sum#17] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [1]: [sum#17] +Keys: [] +Functions [1]: [sum(ss_quantity#4)] + +(28) ColumnarToRow [codegen id : 1] +Input [1]: [sum(ss_quantity)#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..3fafbd1e82 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q48.native_iceberg_compat/simplified.txt @@ -0,0 +1,30 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [sum(ss_quantity),sum,sum(ss_quantity)] + CometExchange #1 + CometHashAggregate [sum,ss_quantity] + CometProject [ss_quantity] + CometBroadcastHashJoin [ss_quantity,ss_sold_date_sk,d_date_sk] + CometProject [ss_quantity,ss_sold_date_sk] + CometBroadcastHashJoin [ss_addr_sk,ss_quantity,ss_net_profit,ss_sold_date_sk,ca_address_sk,ca_state] + CometProject [ss_addr_sk,ss_quantity,ss_net_profit,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk,s_store_sk] + CometFilter [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk] #2 + CometFilter [s_store_sk] + CometScan parquet spark_catalog.default.store [s_store_sk] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #3 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [ca_address_sk,ca_state] #4 + CometProject [ca_address_sk,ca_state] + CometFilter [ca_address_sk,ca_state,ca_country] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_datafusion/explain.txt new file mode 100644 index 0000000000..33be1ecefe --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_datafusion/explain.txt @@ -0,0 +1,241 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- Union (43) + :- * Project (26) + : +- * Filter (25) + : +- Window (24) + : +- * Sort (23) + : +- Window (22) + : +- * ColumnarToRow (21) + : +- CometSort (20) + : +- CometExchange (19) + : +- CometHashAggregate (18) + : +- CometExchange (17) + : +- CometHashAggregate (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometBroadcastExchange (4) + : : : +- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : +- CometProject (7) + : : +- CometFilter (6) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (5) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (10) + :- * Project (34) + : +- * Filter (33) + : +- Window (32) + : +- * Sort (31) + : +- Window (30) + : +- * ColumnarToRow (29) + : +- CometSort (28) + : +- ReusedExchange (27) + +- * Project (42) + +- * Filter (41) + +- Window (40) + +- * Sort (39) + +- Window (38) + +- * ColumnarToRow (37) + +- CometSort (36) + +- ReusedExchange (35) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Condition : (((((((isnotnull(ws_net_profit#5) AND isnotnull(ws_net_paid#4)) AND isnotnull(ws_quantity#3)) AND (ws_net_profit#5 > 1.00)) AND (ws_net_paid#4 > 0.00)) AND (ws_quantity#3 > 0)) AND isnotnull(ws_order_number#2)) AND isnotnull(ws_item_sk#1)) + +(3) CometProject +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6], [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] + +(4) CometBroadcastExchange +Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] + +(5) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Arguments: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(6) CometFilter +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(7) CometProject +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Arguments: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10], [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(8) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Right output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: [ws_order_number#2, ws_item_sk#1], [wr_order_number#8, wr_item_sk#7], Inner, BuildLeft + +(9) CometProject +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10], [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] + +(10) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Arguments: [d_date_sk#12, d_year#13, d_moy#14] + +(11) CometFilter +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(12) CometProject +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(14) CometBroadcastHashJoin +Left output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] +Right output [1]: [d_date_sk#12] +Arguments: [ws_sold_date_sk#6], [d_date_sk#12], Inner, BuildRight + +(15) CometProject +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] +Arguments: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10], [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] + +(16) CometHashAggregate +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#1] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#9, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] + +(17) CometExchange +Input [7]: [ws_item_sk#1, sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(18) CometHashAggregate +Input [7]: [ws_item_sk#1, sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Keys [1]: [ws_item_sk#1] +Functions [4]: [sum(coalesce(wr_return_quantity#9, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] + +(19) CometExchange +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometSort +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: [item#21, return_ratio#22, currency_ratio#23], [return_ratio#22 ASC NULLS FIRST] + +(21) ColumnarToRow [codegen id : 1] +Input [3]: [item#21, return_ratio#22, currency_ratio#23] + +(22) Window +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#24], [return_ratio#22 ASC NULLS FIRST] + +(23) Sort [codegen id : 2] +Input [4]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24] +Arguments: [currency_ratio#23 ASC NULLS FIRST], false, 0 + +(24) Window +Input [4]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24] +Arguments: [rank(currency_ratio#23) windowspecdefinition(currency_ratio#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#25], [currency_ratio#23 ASC NULLS FIRST] + +(25) Filter [codegen id : 3] +Input [5]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24, currency_rank#25] +Condition : ((return_rank#24 <= 10) OR (currency_rank#25 <= 10)) + +(26) Project [codegen id : 3] +Output [5]: [web AS channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Input [5]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24, currency_rank#25] + +(27) ReusedExchange [Reuses operator id: 19] +Output [3]: [item#27, return_ratio#28, currency_ratio#29] + +(28) CometSort +Input [3]: [item#27, return_ratio#28, currency_ratio#29] +Arguments: [item#27, return_ratio#28, currency_ratio#29], [return_ratio#28 ASC NULLS FIRST] + +(29) ColumnarToRow [codegen id : 4] +Input [3]: [item#27, return_ratio#28, currency_ratio#29] + +(30) Window +Input [3]: [item#27, return_ratio#28, currency_ratio#29] +Arguments: [rank(return_ratio#28) windowspecdefinition(return_ratio#28 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#30], [return_ratio#28 ASC NULLS FIRST] + +(31) Sort [codegen id : 5] +Input [4]: [item#27, return_ratio#28, currency_ratio#29, return_rank#30] +Arguments: [currency_ratio#29 ASC NULLS FIRST], false, 0 + +(32) Window +Input [4]: [item#27, return_ratio#28, currency_ratio#29, return_rank#30] +Arguments: [rank(currency_ratio#29) windowspecdefinition(currency_ratio#29 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#31], [currency_ratio#29 ASC NULLS FIRST] + +(33) Filter [codegen id : 6] +Input [5]: [item#27, return_ratio#28, currency_ratio#29, return_rank#30, currency_rank#31] +Condition : ((return_rank#30 <= 10) OR (currency_rank#31 <= 10)) + +(34) Project [codegen id : 6] +Output [5]: [catalog AS channel#32, item#27, return_ratio#28, return_rank#30, currency_rank#31] +Input [5]: [item#27, return_ratio#28, currency_ratio#29, return_rank#30, currency_rank#31] + +(35) ReusedExchange [Reuses operator id: 19] +Output [3]: [item#33, return_ratio#34, currency_ratio#35] + +(36) CometSort +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: [item#33, return_ratio#34, currency_ratio#35], [return_ratio#34 ASC NULLS FIRST] + +(37) ColumnarToRow [codegen id : 7] +Input [3]: [item#33, return_ratio#34, currency_ratio#35] + +(38) Window +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: [rank(return_ratio#34) windowspecdefinition(return_ratio#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#36], [return_ratio#34 ASC NULLS FIRST] + +(39) Sort [codegen id : 8] +Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#36] +Arguments: [currency_ratio#35 ASC NULLS FIRST], false, 0 + +(40) Window +Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#36] +Arguments: [rank(currency_ratio#35) windowspecdefinition(currency_ratio#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#37], [currency_ratio#35 ASC NULLS FIRST] + +(41) Filter [codegen id : 9] +Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#36, currency_rank#37] +Condition : ((return_rank#36 <= 10) OR (currency_rank#37 <= 10)) + +(42) Project [codegen id : 9] +Output [5]: [store AS channel#38, item#33, return_ratio#34, return_rank#36, currency_rank#37] +Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#36, currency_rank#37] + +(43) Union + +(44) HashAggregate [codegen id : 10] +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Keys [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + +(45) Exchange +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Arguments: hashpartitioning(channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(46) HashAggregate [codegen id : 11] +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Keys [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + +(47) TakeOrderedAndProject +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Arguments: 100, [channel#26 ASC NULLS FIRST, return_rank#24 ASC NULLS FIRST, currency_rank#25 ASC NULLS FIRST], [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_datafusion/simplified.txt new file mode 100644 index 0000000000..baf07c5c9a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_datafusion/simplified.txt @@ -0,0 +1,69 @@ +TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] + WholeStageCodegen (11) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 + WholeStageCodegen (10) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Union + WholeStageCodegen (3) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (2) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + CometExchange #2 + CometHashAggregate [item,return_ratio,currency_ratio,ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,sum(coalesce(wr_return_quantity, 0)),sum(coalesce(ws_quantity, 0)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + CometExchange [ws_item_sk] #3 + CometHashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] + CometProject [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + CometBroadcastHashJoin [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt,d_date_sk] + CometProject [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometBroadcastExchange [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] #4 + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] + CometFilter [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + CometProject [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometFilter [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + WholeStageCodegen (6) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (5) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + ReusedExchange [item,return_ratio,currency_ratio] #2 + WholeStageCodegen (9) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (8) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + ReusedExchange [item,return_ratio,currency_ratio] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..92b770e449 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_iceberg_compat/explain.txt @@ -0,0 +1,423 @@ +== Physical Plan == +TakeOrderedAndProject (77) ++- * HashAggregate (76) + +- Exchange (75) + +- * HashAggregate (74) + +- Union (73) + :- * Project (26) + : +- * Filter (25) + : +- Window (24) + : +- * Sort (23) + : +- Window (22) + : +- * ColumnarToRow (21) + : +- CometSort (20) + : +- CometExchange (19) + : +- CometHashAggregate (18) + : +- CometExchange (17) + : +- CometHashAggregate (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometBroadcastExchange (4) + : : : +- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : +- CometProject (7) + : : +- CometFilter (6) + : : +- CometScan parquet spark_catalog.default.web_returns (5) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometScan parquet spark_catalog.default.date_dim (10) + :- * Project (49) + : +- * Filter (48) + : +- Window (47) + : +- * Sort (46) + : +- Window (45) + : +- * ColumnarToRow (44) + : +- CometSort (43) + : +- CometExchange (42) + : +- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometProject (38) + : +- CometBroadcastHashJoin (37) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometBroadcastExchange (30) + : : : +- CometProject (29) + : : : +- CometFilter (28) + : : : +- CometScan parquet spark_catalog.default.catalog_sales (27) + : : +- CometProject (33) + : : +- CometFilter (32) + : : +- CometScan parquet spark_catalog.default.catalog_returns (31) + : +- ReusedExchange (36) + +- * Project (72) + +- * Filter (71) + +- Window (70) + +- * Sort (69) + +- Window (68) + +- * ColumnarToRow (67) + +- CometSort (66) + +- CometExchange (65) + +- CometHashAggregate (64) + +- CometExchange (63) + +- CometHashAggregate (62) + +- CometProject (61) + +- CometBroadcastHashJoin (60) + :- CometProject (58) + : +- CometBroadcastHashJoin (57) + : :- CometBroadcastExchange (53) + : : +- CometProject (52) + : : +- CometFilter (51) + : : +- CometScan parquet spark_catalog.default.store_sales (50) + : +- CometProject (56) + : +- CometFilter (55) + : +- CometScan parquet spark_catalog.default.store_returns (54) + +- ReusedExchange (59) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#6)] +PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_order_number), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Condition : (((((((isnotnull(ws_net_profit#5) AND isnotnull(ws_net_paid#4)) AND isnotnull(ws_quantity#3)) AND (ws_net_profit#5 > 1.00)) AND (ws_net_paid#4 > 0.00)) AND (ws_quantity#3 > 0)) AND isnotnull(ws_order_number#2)) AND isnotnull(ws_item_sk#1)) + +(3) CometProject +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6], [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] + +(4) CometBroadcastExchange +Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] + +(5) CometScan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(6) CometFilter +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(7) CometProject +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Arguments: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10], [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(8) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Right output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: [ws_order_number#2, ws_item_sk#1], [wr_order_number#8, wr_item_sk#7], Inner, BuildLeft + +(9) CometProject +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10], [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] + +(10) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) CometFilter +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(12) CometProject +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(14) CometBroadcastHashJoin +Left output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] +Right output [1]: [d_date_sk#12] +Arguments: [ws_sold_date_sk#6], [d_date_sk#12], Inner, BuildRight + +(15) CometProject +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] +Arguments: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10], [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] + +(16) CometHashAggregate +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#1] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#9, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] + +(17) CometExchange +Input [7]: [ws_item_sk#1, sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(18) CometHashAggregate +Input [7]: [ws_item_sk#1, sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Keys [1]: [ws_item_sk#1] +Functions [4]: [sum(coalesce(wr_return_quantity#9, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] + +(19) CometExchange +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometSort +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: [item#21, return_ratio#22, currency_ratio#23], [return_ratio#22 ASC NULLS FIRST] + +(21) ColumnarToRow [codegen id : 1] +Input [3]: [item#21, return_ratio#22, currency_ratio#23] + +(22) Window +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#24], [return_ratio#22 ASC NULLS FIRST] + +(23) Sort [codegen id : 2] +Input [4]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24] +Arguments: [currency_ratio#23 ASC NULLS FIRST], false, 0 + +(24) Window +Input [4]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24] +Arguments: [rank(currency_ratio#23) windowspecdefinition(currency_ratio#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#25], [currency_ratio#23 ASC NULLS FIRST] + +(25) Filter [codegen id : 3] +Input [5]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24, currency_rank#25] +Condition : ((return_rank#24 <= 10) OR (currency_rank#25 <= 10)) + +(26) Project [codegen id : 3] +Output [5]: [web AS channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Input [5]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24, currency_rank#25] + +(27) CometScan parquet spark_catalog.default.catalog_sales +Output [6]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_net_profit#31, cs_sold_date_sk#32] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#32)] +PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(28) CometFilter +Input [6]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_net_profit#31, cs_sold_date_sk#32] +Condition : (((((((isnotnull(cs_net_profit#31) AND isnotnull(cs_net_paid#30)) AND isnotnull(cs_quantity#29)) AND (cs_net_profit#31 > 1.00)) AND (cs_net_paid#30 > 0.00)) AND (cs_quantity#29 > 0)) AND isnotnull(cs_order_number#28)) AND isnotnull(cs_item_sk#27)) + +(29) CometProject +Input [6]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_net_profit#31, cs_sold_date_sk#32] +Arguments: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32], [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32] + +(30) CometBroadcastExchange +Input [5]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32] +Arguments: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32] + +(31) CometScan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36, cr_returned_date_sk#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(32) CometFilter +Input [5]: [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36, cr_returned_date_sk#37] +Condition : (((isnotnull(cr_return_amount#36) AND (cr_return_amount#36 > 10000.00)) AND isnotnull(cr_order_number#34)) AND isnotnull(cr_item_sk#33)) + +(33) CometProject +Input [5]: [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36, cr_returned_date_sk#37] +Arguments: [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36], [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36] + +(34) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32] +Right output [4]: [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36] +Arguments: [cs_order_number#28, cs_item_sk#27], [cr_order_number#34, cr_item_sk#33], Inner, BuildLeft + +(35) CometProject +Input [9]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32, cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36] +Arguments: [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32, cr_return_quantity#35, cr_return_amount#36], [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32, cr_return_quantity#35, cr_return_amount#36] + +(36) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#38] + +(37) CometBroadcastHashJoin +Left output [6]: [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32, cr_return_quantity#35, cr_return_amount#36] +Right output [1]: [d_date_sk#38] +Arguments: [cs_sold_date_sk#32], [d_date_sk#38], Inner, BuildRight + +(38) CometProject +Input [7]: [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32, cr_return_quantity#35, cr_return_amount#36, d_date_sk#38] +Arguments: [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cr_return_quantity#35, cr_return_amount#36], [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cr_return_quantity#35, cr_return_amount#36] + +(39) CometHashAggregate +Input [5]: [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cr_return_quantity#35, cr_return_amount#36] +Keys [1]: [cs_item_sk#27] +Functions [4]: [partial_sum(coalesce(cr_return_quantity#35, 0)), partial_sum(coalesce(cs_quantity#29, 0)), partial_sum(coalesce(cast(cr_return_amount#36 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))] + +(40) CometExchange +Input [7]: [cs_item_sk#27, sum#39, sum#40, sum#41, isEmpty#42, sum#43, isEmpty#44] +Arguments: hashpartitioning(cs_item_sk#27, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(41) CometHashAggregate +Input [7]: [cs_item_sk#27, sum#39, sum#40, sum#41, isEmpty#42, sum#43, isEmpty#44] +Keys [1]: [cs_item_sk#27] +Functions [4]: [sum(coalesce(cr_return_quantity#35, 0)), sum(coalesce(cs_quantity#29, 0)), sum(coalesce(cast(cr_return_amount#36 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))] + +(42) CometExchange +Input [3]: [item#45, return_ratio#46, currency_ratio#47] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(43) CometSort +Input [3]: [item#45, return_ratio#46, currency_ratio#47] +Arguments: [item#45, return_ratio#46, currency_ratio#47], [return_ratio#46 ASC NULLS FIRST] + +(44) ColumnarToRow [codegen id : 4] +Input [3]: [item#45, return_ratio#46, currency_ratio#47] + +(45) Window +Input [3]: [item#45, return_ratio#46, currency_ratio#47] +Arguments: [rank(return_ratio#46) windowspecdefinition(return_ratio#46 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#48], [return_ratio#46 ASC NULLS FIRST] + +(46) Sort [codegen id : 5] +Input [4]: [item#45, return_ratio#46, currency_ratio#47, return_rank#48] +Arguments: [currency_ratio#47 ASC NULLS FIRST], false, 0 + +(47) Window +Input [4]: [item#45, return_ratio#46, currency_ratio#47, return_rank#48] +Arguments: [rank(currency_ratio#47) windowspecdefinition(currency_ratio#47 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#49], [currency_ratio#47 ASC NULLS FIRST] + +(48) Filter [codegen id : 6] +Input [5]: [item#45, return_ratio#46, currency_ratio#47, return_rank#48, currency_rank#49] +Condition : ((return_rank#48 <= 10) OR (currency_rank#49 <= 10)) + +(49) Project [codegen id : 6] +Output [5]: [catalog AS channel#50, item#45, return_ratio#46, return_rank#48, currency_rank#49] +Input [5]: [item#45, return_ratio#46, currency_ratio#47, return_rank#48, currency_rank#49] + +(50) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_net_profit#55, ss_sold_date_sk#56] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#56)] +PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(51) CometFilter +Input [6]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_net_profit#55, ss_sold_date_sk#56] +Condition : (((((((isnotnull(ss_net_profit#55) AND isnotnull(ss_net_paid#54)) AND isnotnull(ss_quantity#53)) AND (ss_net_profit#55 > 1.00)) AND (ss_net_paid#54 > 0.00)) AND (ss_quantity#53 > 0)) AND isnotnull(ss_ticket_number#52)) AND isnotnull(ss_item_sk#51)) + +(52) CometProject +Input [6]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_net_profit#55, ss_sold_date_sk#56] +Arguments: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56], [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56] + +(53) CometBroadcastExchange +Input [5]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56] +Arguments: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56] + +(54) CometScan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60, sr_returned_date_sk#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(55) CometFilter +Input [5]: [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60, sr_returned_date_sk#61] +Condition : (((isnotnull(sr_return_amt#60) AND (sr_return_amt#60 > 10000.00)) AND isnotnull(sr_ticket_number#58)) AND isnotnull(sr_item_sk#57)) + +(56) CometProject +Input [5]: [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60, sr_returned_date_sk#61] +Arguments: [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60], [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60] + +(57) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56] +Right output [4]: [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60] +Arguments: [ss_ticket_number#52, ss_item_sk#51], [sr_ticket_number#58, sr_item_sk#57], Inner, BuildLeft + +(58) CometProject +Input [9]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56, sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60] +Arguments: [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56, sr_return_quantity#59, sr_return_amt#60], [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56, sr_return_quantity#59, sr_return_amt#60] + +(59) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#62] + +(60) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56, sr_return_quantity#59, sr_return_amt#60] +Right output [1]: [d_date_sk#62] +Arguments: [ss_sold_date_sk#56], [d_date_sk#62], Inner, BuildRight + +(61) CometProject +Input [7]: [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56, sr_return_quantity#59, sr_return_amt#60, d_date_sk#62] +Arguments: [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, sr_return_quantity#59, sr_return_amt#60], [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, sr_return_quantity#59, sr_return_amt#60] + +(62) CometHashAggregate +Input [5]: [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, sr_return_quantity#59, sr_return_amt#60] +Keys [1]: [ss_item_sk#51] +Functions [4]: [partial_sum(coalesce(sr_return_quantity#59, 0)), partial_sum(coalesce(ss_quantity#53, 0)), partial_sum(coalesce(cast(sr_return_amt#60 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#54 as decimal(12,2)), 0.00))] + +(63) CometExchange +Input [7]: [ss_item_sk#51, sum#63, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Arguments: hashpartitioning(ss_item_sk#51, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(64) CometHashAggregate +Input [7]: [ss_item_sk#51, sum#63, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Keys [1]: [ss_item_sk#51] +Functions [4]: [sum(coalesce(sr_return_quantity#59, 0)), sum(coalesce(ss_quantity#53, 0)), sum(coalesce(cast(sr_return_amt#60 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#54 as decimal(12,2)), 0.00))] + +(65) CometExchange +Input [3]: [item#69, return_ratio#70, currency_ratio#71] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(66) CometSort +Input [3]: [item#69, return_ratio#70, currency_ratio#71] +Arguments: [item#69, return_ratio#70, currency_ratio#71], [return_ratio#70 ASC NULLS FIRST] + +(67) ColumnarToRow [codegen id : 7] +Input [3]: [item#69, return_ratio#70, currency_ratio#71] + +(68) Window +Input [3]: [item#69, return_ratio#70, currency_ratio#71] +Arguments: [rank(return_ratio#70) windowspecdefinition(return_ratio#70 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#72], [return_ratio#70 ASC NULLS FIRST] + +(69) Sort [codegen id : 8] +Input [4]: [item#69, return_ratio#70, currency_ratio#71, return_rank#72] +Arguments: [currency_ratio#71 ASC NULLS FIRST], false, 0 + +(70) Window +Input [4]: [item#69, return_ratio#70, currency_ratio#71, return_rank#72] +Arguments: [rank(currency_ratio#71) windowspecdefinition(currency_ratio#71 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#73], [currency_ratio#71 ASC NULLS FIRST] + +(71) Filter [codegen id : 9] +Input [5]: [item#69, return_ratio#70, currency_ratio#71, return_rank#72, currency_rank#73] +Condition : ((return_rank#72 <= 10) OR (currency_rank#73 <= 10)) + +(72) Project [codegen id : 9] +Output [5]: [store AS channel#74, item#69, return_ratio#70, return_rank#72, currency_rank#73] +Input [5]: [item#69, return_ratio#70, currency_ratio#71, return_rank#72, currency_rank#73] + +(73) Union + +(74) HashAggregate [codegen id : 10] +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Keys [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + +(75) Exchange +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Arguments: hashpartitioning(channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(76) HashAggregate [codegen id : 11] +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Keys [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + +(77) TakeOrderedAndProject +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Arguments: 100, [channel#26 ASC NULLS FIRST, return_rank#24 ASC NULLS FIRST, currency_rank#25 ASC NULLS FIRST], [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..af903c445b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q49.native_iceberg_compat/simplified.txt @@ -0,0 +1,99 @@ +TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] + WholeStageCodegen (11) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 + WholeStageCodegen (10) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Union + WholeStageCodegen (3) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (2) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + CometExchange #2 + CometHashAggregate [item,return_ratio,currency_ratio,ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,sum(coalesce(wr_return_quantity, 0)),sum(coalesce(ws_quantity, 0)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + CometExchange [ws_item_sk] #3 + CometHashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] + CometProject [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + CometBroadcastHashJoin [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt,d_date_sk] + CometProject [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometBroadcastExchange [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] #4 + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] + CometFilter [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + CometProject [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometFilter [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (6) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (5) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + CometExchange #6 + CometHashAggregate [item,return_ratio,currency_ratio,cs_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,sum(coalesce(cr_return_quantity, 0)),sum(coalesce(cs_quantity, 0)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] + CometExchange [cs_item_sk] #7 + CometHashAggregate [cs_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,cr_return_quantity,cs_quantity,cr_return_amount,cs_net_paid] + CometProject [cs_item_sk,cs_quantity,cs_net_paid,cr_return_quantity,cr_return_amount] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_net_paid,cs_sold_date_sk,cr_return_quantity,cr_return_amount,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_net_paid,cs_sold_date_sk,cr_return_quantity,cr_return_amount] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_sold_date_sk,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometBroadcastExchange [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_sold_date_sk] #8 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_sold_date_sk] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk] + CometProject [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometFilter [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (9) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (8) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + CometExchange #9 + CometHashAggregate [item,return_ratio,currency_ratio,ss_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,sum(coalesce(sr_return_quantity, 0)),sum(coalesce(ss_quantity, 0)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] + CometExchange [ss_item_sk] #10 + CometHashAggregate [ss_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,sr_return_quantity,ss_quantity,sr_return_amt,ss_net_paid] + CometProject [ss_item_sk,ss_quantity,ss_net_paid,sr_return_quantity,sr_return_amt] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_net_paid,ss_sold_date_sk,sr_return_quantity,sr_return_amt,d_date_sk] + CometProject [ss_item_sk,ss_quantity,ss_net_paid,ss_sold_date_sk,sr_return_quantity,sr_return_amt] + CometBroadcastHashJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_sold_date_sk,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometBroadcastExchange [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_sold_date_sk] #11 + CometProject [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_sold_date_sk] + CometFilter [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk] + CometProject [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometFilter [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_datafusion/explain.txt new file mode 100644 index 0000000000..61caf7d64e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_datafusion/explain.txt @@ -0,0 +1,277 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * Expand (48) + +- Union (47) + :- * HashAggregate (22) + : +- * ColumnarToRow (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometUnion (7) + : : : :- CometProject (3) + : : : : +- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometProject (6) + : : : +- CometFilter (5) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (4) + : : +- CometBroadcastExchange (11) + : : +- CometProject (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : +- CometBroadcastExchange (16) + : +- CometFilter (15) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (14) + :- * HashAggregate (25) + : +- * ColumnarToRow (24) + : +- ReusedExchange (23) + +- * HashAggregate (46) + +- * ColumnarToRow (45) + +- CometExchange (44) + +- CometHashAggregate (43) + +- CometProject (42) + +- CometBroadcastHashJoin (41) + :- CometProject (39) + : +- CometBroadcastHashJoin (38) + : :- CometUnion (36) + : : :- CometProject (28) + : : : +- CometFilter (27) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (26) + : : +- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometBroadcastExchange (30) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (29) + : : +- CometProject (33) + : : +- CometFilter (32) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (31) + : +- ReusedExchange (37) + +- ReusedExchange (40) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(3) CometProject +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10], [ss_store_sk#1 AS store_sk#5, ss_sold_date_sk#4 AS date_sk#6, ss_ext_sales_price#2 AS sales_price#7, ss_net_profit#3 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Arguments: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(5) CometFilter +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#11) + +(6) CometProject +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Arguments: [store_sk#15, date_sk#16, sales_price#17, profit#18, return_amt#19, net_loss#20], [sr_store_sk#11 AS store_sk#15, sr_returned_date_sk#14 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#12 AS return_amt#19, sr_net_loss#13 AS net_loss#20] + +(7) CometUnion +Child 0 Input [6]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10] +Child 1 Input [6]: [store_sk#15, date_sk#16, sales_price#17, profit#18, return_amt#19, net_loss#20] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#21, d_date#22] +Arguments: [d_date_sk#21, d_date#22] + +(9) CometFilter +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-08-23)) AND (d_date#22 <= 2000-09-06)) AND isnotnull(d_date_sk#21)) + +(10) CometProject +Input [2]: [d_date_sk#21, d_date#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(12) CometBroadcastHashJoin +Left output [6]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10] +Right output [1]: [d_date_sk#21] +Arguments: [date_sk#6], [d_date_sk#21], Inner, BuildRight + +(13) CometProject +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] +Arguments: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10], [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] + +(14) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#23, s_store_id#24] +Arguments: [s_store_sk#23, s_store_id#24] + +(15) CometFilter +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(16) CometBroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: [s_store_sk#23, s_store_id#24] + +(17) CometBroadcastHashJoin +Left output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Right output [2]: [s_store_sk#23, s_store_id#24] +Arguments: [store_sk#5], [s_store_sk#23], Inner, BuildRight + +(18) CometProject +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#23, s_store_id#24] +Arguments: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24], [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] + +(19) CometHashAggregate +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] + +(20) CometExchange +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) ColumnarToRow [codegen id : 1] +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] + +(22) HashAggregate [codegen id : 1] +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] +Keys [1]: [s_store_id#24] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#29, sum(UnscaledValue(return_amt#9))#30, sum(UnscaledValue(profit#8))#31, sum(UnscaledValue(net_loss#10))#32] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#29,17,2) AS sales#33, MakeDecimal(sum(UnscaledValue(return_amt#9))#30,17,2) AS returns#34, (MakeDecimal(sum(UnscaledValue(profit#8))#31,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#10))#32,17,2)) AS profit#35, store channel AS channel#36, concat(store, s_store_id#24) AS id#37] + +(23) ReusedExchange [Reuses operator id: 20] +Output [5]: [cp_catalog_page_id#38, sum#39, sum#40, sum#41, sum#42] + +(24) ColumnarToRow [codegen id : 2] +Input [5]: [cp_catalog_page_id#38, sum#39, sum#40, sum#41, sum#42] + +(25) HashAggregate [codegen id : 2] +Input [5]: [cp_catalog_page_id#38, sum#39, sum#40, sum#41, sum#42] +Keys [1]: [cp_catalog_page_id#38] +Functions [4]: [sum(UnscaledValue(sales_price#43)), sum(UnscaledValue(return_amt#44)), sum(UnscaledValue(profit#45)), sum(UnscaledValue(net_loss#46))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#43))#47, sum(UnscaledValue(return_amt#44))#48, sum(UnscaledValue(profit#45))#49, sum(UnscaledValue(net_loss#46))#50] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#43))#47,17,2) AS sales#51, MakeDecimal(sum(UnscaledValue(return_amt#44))#48,17,2) AS returns#52, (MakeDecimal(sum(UnscaledValue(profit#45))#49,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#46))#50,17,2)) AS profit#53, catalog channel AS channel#54, concat(catalog_page, cp_catalog_page_id#38) AS id#55] + +(26) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_web_site_sk#56, ws_ext_sales_price#57, ws_net_profit#58, ws_sold_date_sk#59] +Arguments: [ws_web_site_sk#56, ws_ext_sales_price#57, ws_net_profit#58, ws_sold_date_sk#59] + +(27) CometFilter +Input [4]: [ws_web_site_sk#56, ws_ext_sales_price#57, ws_net_profit#58, ws_sold_date_sk#59] +Condition : isnotnull(ws_web_site_sk#56) + +(28) CometProject +Input [4]: [ws_web_site_sk#56, ws_ext_sales_price#57, ws_net_profit#58, ws_sold_date_sk#59] +Arguments: [wsr_web_site_sk#60, date_sk#61, sales_price#62, profit#63, return_amt#64, net_loss#65], [ws_web_site_sk#56 AS wsr_web_site_sk#60, ws_sold_date_sk#59 AS date_sk#61, ws_ext_sales_price#57 AS sales_price#62, ws_net_profit#58 AS profit#63, 0.00 AS return_amt#64, 0.00 AS net_loss#65] + +(29) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [5]: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70] +Arguments: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70] + +(30) CometBroadcastExchange +Input [5]: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70] +Arguments: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70] + +(31) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73, ws_sold_date_sk#74] +Arguments: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73, ws_sold_date_sk#74] + +(32) CometFilter +Input [4]: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73, ws_sold_date_sk#74] +Condition : ((isnotnull(ws_item_sk#71) AND isnotnull(ws_order_number#73)) AND isnotnull(ws_web_site_sk#72)) + +(33) CometProject +Input [4]: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73, ws_sold_date_sk#74] +Arguments: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73], [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73] + +(34) CometBroadcastHashJoin +Left output [5]: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70] +Right output [3]: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73] +Arguments: [wr_item_sk#66, wr_order_number#67], [ws_item_sk#71, ws_order_number#73], Inner, BuildLeft + +(35) CometProject +Input [8]: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70, ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73] +Arguments: [wsr_web_site_sk#75, date_sk#76, sales_price#77, profit#78, return_amt#79, net_loss#80], [ws_web_site_sk#72 AS wsr_web_site_sk#75, wr_returned_date_sk#70 AS date_sk#76, 0.00 AS sales_price#77, 0.00 AS profit#78, wr_return_amt#68 AS return_amt#79, wr_net_loss#69 AS net_loss#80] + +(36) CometUnion +Child 0 Input [6]: [wsr_web_site_sk#60, date_sk#61, sales_price#62, profit#63, return_amt#64, net_loss#65] +Child 1 Input [6]: [wsr_web_site_sk#75, date_sk#76, sales_price#77, profit#78, return_amt#79, net_loss#80] + +(37) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#81] + +(38) CometBroadcastHashJoin +Left output [6]: [wsr_web_site_sk#60, date_sk#61, sales_price#62, profit#63, return_amt#64, net_loss#65] +Right output [1]: [d_date_sk#81] +Arguments: [date_sk#61], [d_date_sk#81], Inner, BuildRight + +(39) CometProject +Input [7]: [wsr_web_site_sk#60, date_sk#61, sales_price#62, profit#63, return_amt#64, net_loss#65, d_date_sk#81] +Arguments: [wsr_web_site_sk#60, sales_price#62, profit#63, return_amt#64, net_loss#65], [wsr_web_site_sk#60, sales_price#62, profit#63, return_amt#64, net_loss#65] + +(40) ReusedExchange [Reuses operator id: 16] +Output [2]: [web_site_sk#82, web_site_id#83] + +(41) CometBroadcastHashJoin +Left output [5]: [wsr_web_site_sk#60, sales_price#62, profit#63, return_amt#64, net_loss#65] +Right output [2]: [web_site_sk#82, web_site_id#83] +Arguments: [wsr_web_site_sk#60], [web_site_sk#82], Inner, BuildRight + +(42) CometProject +Input [7]: [wsr_web_site_sk#60, sales_price#62, profit#63, return_amt#64, net_loss#65, web_site_sk#82, web_site_id#83] +Arguments: [sales_price#62, profit#63, return_amt#64, net_loss#65, web_site_id#83], [sales_price#62, profit#63, return_amt#64, net_loss#65, web_site_id#83] + +(43) CometHashAggregate +Input [5]: [sales_price#62, profit#63, return_amt#64, net_loss#65, web_site_id#83] +Keys [1]: [web_site_id#83] +Functions [4]: [partial_sum(UnscaledValue(sales_price#62)), partial_sum(UnscaledValue(return_amt#64)), partial_sum(UnscaledValue(profit#63)), partial_sum(UnscaledValue(net_loss#65))] + +(44) CometExchange +Input [5]: [web_site_id#83, sum#84, sum#85, sum#86, sum#87] +Arguments: hashpartitioning(web_site_id#83, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(45) ColumnarToRow [codegen id : 3] +Input [5]: [web_site_id#83, sum#84, sum#85, sum#86, sum#87] + +(46) HashAggregate [codegen id : 3] +Input [5]: [web_site_id#83, sum#84, sum#85, sum#86, sum#87] +Keys [1]: [web_site_id#83] +Functions [4]: [sum(UnscaledValue(sales_price#62)), sum(UnscaledValue(return_amt#64)), sum(UnscaledValue(profit#63)), sum(UnscaledValue(net_loss#65))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#62))#88, sum(UnscaledValue(return_amt#64))#89, sum(UnscaledValue(profit#63))#90, sum(UnscaledValue(net_loss#65))#91] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#62))#88,17,2) AS sales#92, MakeDecimal(sum(UnscaledValue(return_amt#64))#89,17,2) AS returns#93, (MakeDecimal(sum(UnscaledValue(profit#63))#90,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#65))#91,17,2)) AS profit#94, web channel AS channel#95, concat(web_site, web_site_id#83) AS id#96] + +(47) Union + +(48) Expand [codegen id : 4] +Input [5]: [sales#33, returns#34, profit#35, channel#36, id#37] +Arguments: [[sales#33, returns#34, profit#35, channel#36, id#37, 0], [sales#33, returns#34, profit#35, channel#36, null, 1], [sales#33, returns#34, profit#35, null, null, 3]], [sales#33, returns#34, profit#35, channel#97, id#98, spark_grouping_id#99] + +(49) HashAggregate [codegen id : 4] +Input [6]: [sales#33, returns#34, profit#35, channel#97, id#98, spark_grouping_id#99] +Keys [3]: [channel#97, id#98, spark_grouping_id#99] +Functions [3]: [partial_sum(sales#33), partial_sum(returns#34), partial_sum(profit#35)] +Aggregate Attributes [6]: [sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Results [9]: [channel#97, id#98, spark_grouping_id#99, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111] + +(50) Exchange +Input [9]: [channel#97, id#98, spark_grouping_id#99, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111] +Arguments: hashpartitioning(channel#97, id#98, spark_grouping_id#99, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(51) HashAggregate [codegen id : 5] +Input [9]: [channel#97, id#98, spark_grouping_id#99, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111] +Keys [3]: [channel#97, id#98, spark_grouping_id#99] +Functions [3]: [sum(sales#33), sum(returns#34), sum(profit#35)] +Aggregate Attributes [3]: [sum(sales#33)#112, sum(returns#34)#113, sum(profit#35)#114] +Results [5]: [channel#97, id#98, sum(sales#33)#112 AS sales#115, sum(returns#34)#113 AS returns#116, sum(profit#35)#114 AS profit#117] + +(52) TakeOrderedAndProject +Input [5]: [channel#97, id#98, sales#115, returns#116, profit#117] +Arguments: 100, [channel#97 ASC NULLS FIRST, id#98 ASC NULLS FIRST], [channel#97, id#98, sales#115, returns#116, profit#117] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_datafusion/simplified.txt new file mode 100644 index 0000000000..3f9946093f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_datafusion/simplified.txt @@ -0,0 +1,62 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (5) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (4) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (1) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExchange [s_store_id] #2 + CometHashAggregate [s_store_id,sum,sum,sum,sum,sales_price,return_amt,profit,net_loss] + CometProject [sales_price,profit,return_amt,net_loss,s_store_id] + CometBroadcastHashJoin [store_sk,sales_price,profit,return_amt,net_loss,s_store_sk,s_store_id] + CometProject [store_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [store_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometProject [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk,s_store_id] #4 + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + WholeStageCodegen (2) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + ReusedExchange [cp_catalog_page_id,sum,sum,sum,sum] #2 + WholeStageCodegen (3) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExchange [web_site_id] #5 + CometHashAggregate [web_site_id,sum,sum,sum,sum,sales_price,return_amt,profit,net_loss] + CometProject [sales_price,profit,return_amt,net_loss,web_site_id] + CometBroadcastHashJoin [wsr_web_site_sk,sales_price,profit,return_amt,net_loss,web_site_sk,web_site_id] + CometProject [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometProject [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk,ws_item_sk,ws_web_site_sk,ws_order_number] + CometBroadcastExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] #6 + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + CometProject [ws_item_sk,ws_web_site_sk,ws_order_number] + CometFilter [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [web_site_sk,web_site_id] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..68118f9903 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_iceberg_compat/explain.txt @@ -0,0 +1,409 @@ +== Physical Plan == +TakeOrderedAndProject (70) ++- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Expand (66) + +- Union (65) + :- * HashAggregate (22) + : +- * ColumnarToRow (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometUnion (7) + : : : :- CometProject (3) + : : : : +- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometProject (6) + : : : +- CometFilter (5) + : : : +- CometScan parquet spark_catalog.default.store_returns (4) + : : +- CometBroadcastExchange (11) + : : +- CometProject (10) + : : +- CometFilter (9) + : : +- CometScan parquet spark_catalog.default.date_dim (8) + : +- CometBroadcastExchange (16) + : +- CometFilter (15) + : +- CometScan parquet spark_catalog.default.store (14) + :- * HashAggregate (41) + : +- * ColumnarToRow (40) + : +- CometExchange (39) + : +- CometHashAggregate (38) + : +- CometProject (37) + : +- CometBroadcastHashJoin (36) + : :- CometProject (32) + : : +- CometBroadcastHashJoin (31) + : : :- CometUnion (29) + : : : :- CometProject (25) + : : : : +- CometFilter (24) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (23) + : : : +- CometProject (28) + : : : +- CometFilter (27) + : : : +- CometScan parquet spark_catalog.default.catalog_returns (26) + : : +- ReusedExchange (30) + : +- CometBroadcastExchange (35) + : +- CometFilter (34) + : +- CometScan parquet spark_catalog.default.catalog_page (33) + +- * HashAggregate (64) + +- * ColumnarToRow (63) + +- CometExchange (62) + +- CometHashAggregate (61) + +- CometProject (60) + +- CometBroadcastHashJoin (59) + :- CometProject (55) + : +- CometBroadcastHashJoin (54) + : :- CometUnion (52) + : : :- CometProject (44) + : : : +- CometFilter (43) + : : : +- CometScan parquet spark_catalog.default.web_sales (42) + : : +- CometProject (51) + : : +- CometBroadcastHashJoin (50) + : : :- CometBroadcastExchange (46) + : : : +- CometScan parquet spark_catalog.default.web_returns (45) + : : +- CometProject (49) + : : +- CometFilter (48) + : : +- CometScan parquet spark_catalog.default.web_sales (47) + : +- ReusedExchange (53) + +- CometBroadcastExchange (58) + +- CometFilter (57) + +- CometScan parquet spark_catalog.default.web_site (56) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(3) CometProject +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10], [ss_store_sk#1 AS store_sk#5, ss_sold_date_sk#4 AS date_sk#6, ss_ext_sales_price#2 AS sales_price#7, ss_net_profit#3 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] + +(4) CometScan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#14)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(5) CometFilter +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#11) + +(6) CometProject +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Arguments: [store_sk#15, date_sk#16, sales_price#17, profit#18, return_amt#19, net_loss#20], [sr_store_sk#11 AS store_sk#15, sr_returned_date_sk#14 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#12 AS return_amt#19, sr_net_loss#13 AS net_loss#20] + +(7) CometUnion +Child 0 Input [6]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10] +Child 1 Input [6]: [store_sk#15, date_sk#16, sales_price#17, profit#18, return_amt#19, net_loss#20] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-08-23)) AND (d_date#22 <= 2000-09-06)) AND isnotnull(d_date_sk#21)) + +(10) CometProject +Input [2]: [d_date_sk#21, d_date#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(12) CometBroadcastHashJoin +Left output [6]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10] +Right output [1]: [d_date_sk#21] +Arguments: [date_sk#6], [d_date_sk#21], Inner, BuildRight + +(13) CometProject +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] +Arguments: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10], [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] + +(14) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#23, s_store_id#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(15) CometFilter +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(16) CometBroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: [s_store_sk#23, s_store_id#24] + +(17) CometBroadcastHashJoin +Left output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Right output [2]: [s_store_sk#23, s_store_id#24] +Arguments: [store_sk#5], [s_store_sk#23], Inner, BuildRight + +(18) CometProject +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#23, s_store_id#24] +Arguments: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24], [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] + +(19) CometHashAggregate +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] + +(20) CometExchange +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) ColumnarToRow [codegen id : 1] +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] + +(22) HashAggregate [codegen id : 1] +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] +Keys [1]: [s_store_id#24] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#29, sum(UnscaledValue(return_amt#9))#30, sum(UnscaledValue(profit#8))#31, sum(UnscaledValue(net_loss#10))#32] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#29,17,2) AS sales#33, MakeDecimal(sum(UnscaledValue(return_amt#9))#30,17,2) AS returns#34, (MakeDecimal(sum(UnscaledValue(profit#8))#31,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#10))#32,17,2)) AS profit#35, store channel AS channel#36, concat(store, s_store_id#24) AS id#37] + +(23) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_catalog_page_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#41)] +PushedFilters: [IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(24) CometFilter +Input [4]: [cs_catalog_page_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Condition : isnotnull(cs_catalog_page_sk#38) + +(25) CometProject +Input [4]: [cs_catalog_page_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Arguments: [page_sk#42, date_sk#43, sales_price#44, profit#45, return_amt#46, net_loss#47], [cs_catalog_page_sk#38 AS page_sk#42, cs_sold_date_sk#41 AS date_sk#43, cs_ext_sales_price#39 AS sales_price#44, cs_net_profit#40 AS profit#45, 0.00 AS return_amt#46, 0.00 AS net_loss#47] + +(26) CometScan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_catalog_page_sk#48, cr_return_amount#49, cr_net_loss#50, cr_returned_date_sk#51] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#51)] +PushedFilters: [IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(27) CometFilter +Input [4]: [cr_catalog_page_sk#48, cr_return_amount#49, cr_net_loss#50, cr_returned_date_sk#51] +Condition : isnotnull(cr_catalog_page_sk#48) + +(28) CometProject +Input [4]: [cr_catalog_page_sk#48, cr_return_amount#49, cr_net_loss#50, cr_returned_date_sk#51] +Arguments: [page_sk#52, date_sk#53, sales_price#54, profit#55, return_amt#56, net_loss#57], [cr_catalog_page_sk#48 AS page_sk#52, cr_returned_date_sk#51 AS date_sk#53, 0.00 AS sales_price#54, 0.00 AS profit#55, cr_return_amount#49 AS return_amt#56, cr_net_loss#50 AS net_loss#57] + +(29) CometUnion +Child 0 Input [6]: [page_sk#42, date_sk#43, sales_price#44, profit#45, return_amt#46, net_loss#47] +Child 1 Input [6]: [page_sk#52, date_sk#53, sales_price#54, profit#55, return_amt#56, net_loss#57] + +(30) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#58] + +(31) CometBroadcastHashJoin +Left output [6]: [page_sk#42, date_sk#43, sales_price#44, profit#45, return_amt#46, net_loss#47] +Right output [1]: [d_date_sk#58] +Arguments: [date_sk#43], [d_date_sk#58], Inner, BuildRight + +(32) CometProject +Input [7]: [page_sk#42, date_sk#43, sales_price#44, profit#45, return_amt#46, net_loss#47, d_date_sk#58] +Arguments: [page_sk#42, sales_price#44, profit#45, return_amt#46, net_loss#47], [page_sk#42, sales_price#44, profit#45, return_amt#46, net_loss#47] + +(33) CometScan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#59, cp_catalog_page_id#60] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(34) CometFilter +Input [2]: [cp_catalog_page_sk#59, cp_catalog_page_id#60] +Condition : isnotnull(cp_catalog_page_sk#59) + +(35) CometBroadcastExchange +Input [2]: [cp_catalog_page_sk#59, cp_catalog_page_id#60] +Arguments: [cp_catalog_page_sk#59, cp_catalog_page_id#60] + +(36) CometBroadcastHashJoin +Left output [5]: [page_sk#42, sales_price#44, profit#45, return_amt#46, net_loss#47] +Right output [2]: [cp_catalog_page_sk#59, cp_catalog_page_id#60] +Arguments: [page_sk#42], [cp_catalog_page_sk#59], Inner, BuildRight + +(37) CometProject +Input [7]: [page_sk#42, sales_price#44, profit#45, return_amt#46, net_loss#47, cp_catalog_page_sk#59, cp_catalog_page_id#60] +Arguments: [sales_price#44, profit#45, return_amt#46, net_loss#47, cp_catalog_page_id#60], [sales_price#44, profit#45, return_amt#46, net_loss#47, cp_catalog_page_id#60] + +(38) CometHashAggregate +Input [5]: [sales_price#44, profit#45, return_amt#46, net_loss#47, cp_catalog_page_id#60] +Keys [1]: [cp_catalog_page_id#60] +Functions [4]: [partial_sum(UnscaledValue(sales_price#44)), partial_sum(UnscaledValue(return_amt#46)), partial_sum(UnscaledValue(profit#45)), partial_sum(UnscaledValue(net_loss#47))] + +(39) CometExchange +Input [5]: [cp_catalog_page_id#60, sum#61, sum#62, sum#63, sum#64] +Arguments: hashpartitioning(cp_catalog_page_id#60, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(40) ColumnarToRow [codegen id : 2] +Input [5]: [cp_catalog_page_id#60, sum#61, sum#62, sum#63, sum#64] + +(41) HashAggregate [codegen id : 2] +Input [5]: [cp_catalog_page_id#60, sum#61, sum#62, sum#63, sum#64] +Keys [1]: [cp_catalog_page_id#60] +Functions [4]: [sum(UnscaledValue(sales_price#44)), sum(UnscaledValue(return_amt#46)), sum(UnscaledValue(profit#45)), sum(UnscaledValue(net_loss#47))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#44))#65, sum(UnscaledValue(return_amt#46))#66, sum(UnscaledValue(profit#45))#67, sum(UnscaledValue(net_loss#47))#68] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#44))#65,17,2) AS sales#69, MakeDecimal(sum(UnscaledValue(return_amt#46))#66,17,2) AS returns#70, (MakeDecimal(sum(UnscaledValue(profit#45))#67,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#47))#68,17,2)) AS profit#71, catalog channel AS channel#72, concat(catalog_page, cp_catalog_page_id#60) AS id#73] + +(42) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_site_sk#74, ws_ext_sales_price#75, ws_net_profit#76, ws_sold_date_sk#77] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#77)] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(43) CometFilter +Input [4]: [ws_web_site_sk#74, ws_ext_sales_price#75, ws_net_profit#76, ws_sold_date_sk#77] +Condition : isnotnull(ws_web_site_sk#74) + +(44) CometProject +Input [4]: [ws_web_site_sk#74, ws_ext_sales_price#75, ws_net_profit#76, ws_sold_date_sk#77] +Arguments: [wsr_web_site_sk#78, date_sk#79, sales_price#80, profit#81, return_amt#82, net_loss#83], [ws_web_site_sk#74 AS wsr_web_site_sk#78, ws_sold_date_sk#77 AS date_sk#79, ws_ext_sales_price#75 AS sales_price#80, ws_net_profit#76 AS profit#81, 0.00 AS return_amt#82, 0.00 AS net_loss#83] + +(45) CometScan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#84, wr_order_number#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#88)] +ReadSchema: struct + +(46) CometBroadcastExchange +Input [5]: [wr_item_sk#84, wr_order_number#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Arguments: [wr_item_sk#84, wr_order_number#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] + +(47) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91, ws_sold_date_sk#92] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) CometFilter +Input [4]: [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91, ws_sold_date_sk#92] +Condition : ((isnotnull(ws_item_sk#89) AND isnotnull(ws_order_number#91)) AND isnotnull(ws_web_site_sk#90)) + +(49) CometProject +Input [4]: [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91, ws_sold_date_sk#92] +Arguments: [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91], [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91] + +(50) CometBroadcastHashJoin +Left output [5]: [wr_item_sk#84, wr_order_number#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Right output [3]: [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91] +Arguments: [wr_item_sk#84, wr_order_number#85], [ws_item_sk#89, ws_order_number#91], Inner, BuildLeft + +(51) CometProject +Input [8]: [wr_item_sk#84, wr_order_number#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88, ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91] +Arguments: [wsr_web_site_sk#93, date_sk#94, sales_price#95, profit#96, return_amt#97, net_loss#98], [ws_web_site_sk#90 AS wsr_web_site_sk#93, wr_returned_date_sk#88 AS date_sk#94, 0.00 AS sales_price#95, 0.00 AS profit#96, wr_return_amt#86 AS return_amt#97, wr_net_loss#87 AS net_loss#98] + +(52) CometUnion +Child 0 Input [6]: [wsr_web_site_sk#78, date_sk#79, sales_price#80, profit#81, return_amt#82, net_loss#83] +Child 1 Input [6]: [wsr_web_site_sk#93, date_sk#94, sales_price#95, profit#96, return_amt#97, net_loss#98] + +(53) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#99] + +(54) CometBroadcastHashJoin +Left output [6]: [wsr_web_site_sk#78, date_sk#79, sales_price#80, profit#81, return_amt#82, net_loss#83] +Right output [1]: [d_date_sk#99] +Arguments: [date_sk#79], [d_date_sk#99], Inner, BuildRight + +(55) CometProject +Input [7]: [wsr_web_site_sk#78, date_sk#79, sales_price#80, profit#81, return_amt#82, net_loss#83, d_date_sk#99] +Arguments: [wsr_web_site_sk#78, sales_price#80, profit#81, return_amt#82, net_loss#83], [wsr_web_site_sk#78, sales_price#80, profit#81, return_amt#82, net_loss#83] + +(56) CometScan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#100, web_site_id#101] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(57) CometFilter +Input [2]: [web_site_sk#100, web_site_id#101] +Condition : isnotnull(web_site_sk#100) + +(58) CometBroadcastExchange +Input [2]: [web_site_sk#100, web_site_id#101] +Arguments: [web_site_sk#100, web_site_id#101] + +(59) CometBroadcastHashJoin +Left output [5]: [wsr_web_site_sk#78, sales_price#80, profit#81, return_amt#82, net_loss#83] +Right output [2]: [web_site_sk#100, web_site_id#101] +Arguments: [wsr_web_site_sk#78], [web_site_sk#100], Inner, BuildRight + +(60) CometProject +Input [7]: [wsr_web_site_sk#78, sales_price#80, profit#81, return_amt#82, net_loss#83, web_site_sk#100, web_site_id#101] +Arguments: [sales_price#80, profit#81, return_amt#82, net_loss#83, web_site_id#101], [sales_price#80, profit#81, return_amt#82, net_loss#83, web_site_id#101] + +(61) CometHashAggregate +Input [5]: [sales_price#80, profit#81, return_amt#82, net_loss#83, web_site_id#101] +Keys [1]: [web_site_id#101] +Functions [4]: [partial_sum(UnscaledValue(sales_price#80)), partial_sum(UnscaledValue(return_amt#82)), partial_sum(UnscaledValue(profit#81)), partial_sum(UnscaledValue(net_loss#83))] + +(62) CometExchange +Input [5]: [web_site_id#101, sum#102, sum#103, sum#104, sum#105] +Arguments: hashpartitioning(web_site_id#101, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(63) ColumnarToRow [codegen id : 3] +Input [5]: [web_site_id#101, sum#102, sum#103, sum#104, sum#105] + +(64) HashAggregate [codegen id : 3] +Input [5]: [web_site_id#101, sum#102, sum#103, sum#104, sum#105] +Keys [1]: [web_site_id#101] +Functions [4]: [sum(UnscaledValue(sales_price#80)), sum(UnscaledValue(return_amt#82)), sum(UnscaledValue(profit#81)), sum(UnscaledValue(net_loss#83))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#80))#106, sum(UnscaledValue(return_amt#82))#107, sum(UnscaledValue(profit#81))#108, sum(UnscaledValue(net_loss#83))#109] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#80))#106,17,2) AS sales#110, MakeDecimal(sum(UnscaledValue(return_amt#82))#107,17,2) AS returns#111, (MakeDecimal(sum(UnscaledValue(profit#81))#108,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#83))#109,17,2)) AS profit#112, web channel AS channel#113, concat(web_site, web_site_id#101) AS id#114] + +(65) Union + +(66) Expand [codegen id : 4] +Input [5]: [sales#33, returns#34, profit#35, channel#36, id#37] +Arguments: [[sales#33, returns#34, profit#35, channel#36, id#37, 0], [sales#33, returns#34, profit#35, channel#36, null, 1], [sales#33, returns#34, profit#35, null, null, 3]], [sales#33, returns#34, profit#35, channel#115, id#116, spark_grouping_id#117] + +(67) HashAggregate [codegen id : 4] +Input [6]: [sales#33, returns#34, profit#35, channel#115, id#116, spark_grouping_id#117] +Keys [3]: [channel#115, id#116, spark_grouping_id#117] +Functions [3]: [partial_sum(sales#33), partial_sum(returns#34), partial_sum(profit#35)] +Aggregate Attributes [6]: [sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Results [9]: [channel#115, id#116, spark_grouping_id#117, sum#124, isEmpty#125, sum#126, isEmpty#127, sum#128, isEmpty#129] + +(68) Exchange +Input [9]: [channel#115, id#116, spark_grouping_id#117, sum#124, isEmpty#125, sum#126, isEmpty#127, sum#128, isEmpty#129] +Arguments: hashpartitioning(channel#115, id#116, spark_grouping_id#117, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(69) HashAggregate [codegen id : 5] +Input [9]: [channel#115, id#116, spark_grouping_id#117, sum#124, isEmpty#125, sum#126, isEmpty#127, sum#128, isEmpty#129] +Keys [3]: [channel#115, id#116, spark_grouping_id#117] +Functions [3]: [sum(sales#33), sum(returns#34), sum(profit#35)] +Aggregate Attributes [3]: [sum(sales#33)#130, sum(returns#34)#131, sum(profit#35)#132] +Results [5]: [channel#115, id#116, sum(sales#33)#130 AS sales#133, sum(returns#34)#131 AS returns#134, sum(profit#35)#132 AS profit#135] + +(70) TakeOrderedAndProject +Input [5]: [channel#115, id#116, sales#133, returns#134, profit#135] +Arguments: 100, [channel#115 ASC NULLS FIRST, id#116 ASC NULLS FIRST], [channel#115, id#116, sales#133, returns#134, profit#135] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..be9f9ace21 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q5.native_iceberg_compat/simplified.txt @@ -0,0 +1,80 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (5) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (4) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (1) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExchange [s_store_id] #2 + CometHashAggregate [s_store_id,sum,sum,sum,sum,sales_price,return_amt,profit,net_loss] + CometProject [sales_price,profit,return_amt,net_loss,s_store_id] + CometBroadcastHashJoin [store_sk,sales_price,profit,return_amt,net_loss,s_store_sk,s_store_id] + CometProject [store_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [store_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometProject [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk,s_store_id] #4 + CometFilter [s_store_sk,s_store_id] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id] + WholeStageCodegen (2) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExchange [cp_catalog_page_id] #5 + CometHashAggregate [cp_catalog_page_id,sum,sum,sum,sum,sales_price,return_amt,profit,net_loss] + CometProject [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [page_sk,sales_price,profit,return_amt,net_loss,cp_catalog_page_sk,cp_catalog_page_id] + CometProject [page_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [page_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [page_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] [page_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometProject [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] [page_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk] + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [cp_catalog_page_sk,cp_catalog_page_id] #6 + CometFilter [cp_catalog_page_sk,cp_catalog_page_id] + CometScan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen (3) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExchange [web_site_id] #7 + CometHashAggregate [web_site_id,sum,sum,sum,sum,sales_price,return_amt,profit,net_loss] + CometProject [sales_price,profit,return_amt,net_loss,web_site_id] + CometBroadcastHashJoin [wsr_web_site_sk,sales_price,profit,return_amt,net_loss,web_site_sk,web_site_id] + CometProject [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometProject [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk,ws_item_sk,ws_web_site_sk,ws_order_number] + CometBroadcastExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] #8 + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + CometProject [ws_item_sk,ws_web_site_sk,ws_order_number] + CometFilter [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [web_site_sk,web_site_id] #9 + CometFilter [web_site_sk,web_site_id] + CometScan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_datafusion/explain.txt new file mode 100644 index 0000000000..49e3721e6c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_datafusion/explain.txt @@ -0,0 +1,148 @@ +== Physical Plan == +* ColumnarToRow (28) ++- CometTakeOrderedAndProject (27) + +- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometProject (23) + +- CometBroadcastHashJoin (22) + :- CometProject (17) + : +- CometBroadcastHashJoin (16) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (8) + : +- CometBroadcastExchange (15) + : +- CometFilter (14) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + +- CometBroadcastExchange (21) + +- CometProject (20) + +- CometFilter (19) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (18) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_store_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(4) CometFilter +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : ((isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#6)) AND isnotnull(sr_customer_sk#7)) + +(5) CometBroadcastExchange +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(6) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [ss_ticket_number#4, ss_item_sk#1, ss_customer_sk#2], [sr_ticket_number#8, sr_item_sk#6, sr_customer_sk#7], Inner, BuildRight + +(7) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9], [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`store` +Output [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(9) CometFilter +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Condition : isnotnull(s_store_sk#10) + +(10) CometBroadcastExchange +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(11) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9] +Right output [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(12) CometProject +Input [14]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20], [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(14) CometFilter +Input [1]: [d_date_sk#21] +Condition : isnotnull(d_date_sk#21) + +(15) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(16) CometBroadcastHashJoin +Left output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Right output [1]: [d_date_sk#21] +Arguments: [ss_sold_date_sk#5], [d_date_sk#21], Inner, BuildRight + +(17) CometProject +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, d_date_sk#21] +Arguments: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20], [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(18) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#22, d_year#23, d_moy#24] +Arguments: [d_date_sk#22, d_year#23, d_moy#24] + +(19) CometFilter +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Condition : ((((isnotnull(d_year#23) AND isnotnull(d_moy#24)) AND (d_year#23 = 2001)) AND (d_moy#24 = 8)) AND isnotnull(d_date_sk#22)) + +(20) CometProject +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Arguments: [d_date_sk#22], [d_date_sk#22] + +(21) CometBroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: [d_date_sk#22] + +(22) CometBroadcastHashJoin +Left output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Right output [1]: [d_date_sk#22] +Arguments: [sr_returned_date_sk#9], [d_date_sk#22], Inner, BuildRight + +(23) CometProject +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, d_date_sk#22] +Arguments: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20], [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(24) CometHashAggregate +Input [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Keys [10]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(25) CometExchange +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#25, sum#26, sum#27, sum#28, sum#29] +Arguments: hashpartitioning(s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(26) CometHashAggregate +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#25, sum#26, sum#27, sum#28, sum#29] +Keys [10]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(27) CometTakeOrderedAndProject +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #30, 31 - 60 days #31, 61 - 90 days #32, 91 - 120 days #33, >120 days #34] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[s_store_name#11 ASC NULLS FIRST,s_company_id#12 ASC NULLS FIRST,s_street_number#13 ASC NULLS FIRST,s_street_name#14 ASC NULLS FIRST,s_street_type#15 ASC NULLS FIRST,s_suite_number#16 ASC NULLS FIRST,s_city#17 ASC NULLS FIRST,s_county#18 ASC NULLS FIRST,s_state#19 ASC NULLS FIRST,s_zip#20 ASC NULLS FIRST], output=[s_store_name#11,s_company_id#12,s_street_number#13,s_street_name#14,s_street_type#15,s_suite_number#16,s_city#17,s_county#18,s_state#19,s_zip#20,30 days #30,31 - 60 days #31,61 - 90 days #32,91 - 120 days #33,>120 days #34]), [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #30, 31 - 60 days #31, 61 - 90 days #32, 91 - 120 days #33, >120 days #34], 100, [s_store_name#11 ASC NULLS FIRST, s_company_id#12 ASC NULLS FIRST, s_street_number#13 ASC NULLS FIRST, s_street_name#14 ASC NULLS FIRST, s_street_type#15 ASC NULLS FIRST, s_suite_number#16 ASC NULLS FIRST, s_city#17 ASC NULLS FIRST, s_county#18 ASC NULLS FIRST, s_state#19 ASC NULLS FIRST, s_zip#20 ASC NULLS FIRST], [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #30, 31 - 60 days #31, 61 - 90 days #32, 91 - 120 days #33, >120 days #34] + +(28) ColumnarToRow [codegen id : 1] +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #30, 31 - 60 days #31, 61 - 90 days #32, 91 - 120 days #33, >120 days #34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b6e1187f76 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_datafusion/simplified.txt @@ -0,0 +1,30 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + CometHashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum,sum(CASE WHEN ((sr_returned_date_sk - ss_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 30) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 60) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 90) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((sr_returned_date_sk - ss_sold_date_sk) > 120) THEN 1 ELSE 0 END)] + CometExchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1 + CometHashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum,sr_returned_date_sk,ss_sold_date_sk] + CometProject [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastHashJoin [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,d_date_sk] + CometProject [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastHashJoin [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,d_date_sk] + CometProject [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastHashJoin [ss_store_sk,ss_sold_date_sk,sr_returned_date_sk,s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometProject [ss_store_sk,ss_sold_date_sk,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #3 + CometFilter [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastExchange [d_date_sk] #4 + CometFilter [d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..0d4624d71e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_iceberg_compat/explain.txt @@ -0,0 +1,165 @@ +== Physical Plan == +* ColumnarToRow (28) ++- CometTakeOrderedAndProject (27) + +- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometProject (23) + +- CometBroadcastHashJoin (22) + :- CometProject (17) + : +- CometBroadcastHashJoin (16) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.store_returns (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometScan parquet spark_catalog.default.store (8) + : +- CometBroadcastExchange (15) + : +- CometFilter (14) + : +- CometScan parquet spark_catalog.default.date_dim (13) + +- CometBroadcastExchange (21) + +- CometProject (20) + +- CometFilter (19) + +- CometScan parquet spark_catalog.default.date_dim (18) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_store_sk#3)) + +(3) CometScan parquet spark_catalog.default.store_returns +Output [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#9)] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : ((isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#6)) AND isnotnull(sr_customer_sk#7)) + +(5) CometBroadcastExchange +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(6) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [ss_ticket_number#4, ss_item_sk#1, ss_customer_sk#2], [sr_ticket_number#8, sr_item_sk#6, sr_customer_sk#7], Inner, BuildRight + +(7) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9], [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9] + +(8) CometScan parquet spark_catalog.default.store +Output [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(9) CometFilter +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Condition : isnotnull(s_store_sk#10) + +(10) CometBroadcastExchange +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(11) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9] +Right output [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(12) CometProject +Input [14]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20], [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [1]: [d_date_sk#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [1]: [d_date_sk#21] +Condition : isnotnull(d_date_sk#21) + +(15) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(16) CometBroadcastHashJoin +Left output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Right output [1]: [d_date_sk#21] +Arguments: [ss_sold_date_sk#5], [d_date_sk#21], Inner, BuildRight + +(17) CometProject +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, d_date_sk#21] +Arguments: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20], [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(18) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#22, d_year#23, d_moy#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) CometFilter +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Condition : ((((isnotnull(d_year#23) AND isnotnull(d_moy#24)) AND (d_year#23 = 2001)) AND (d_moy#24 = 8)) AND isnotnull(d_date_sk#22)) + +(20) CometProject +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Arguments: [d_date_sk#22], [d_date_sk#22] + +(21) CometBroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: [d_date_sk#22] + +(22) CometBroadcastHashJoin +Left output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Right output [1]: [d_date_sk#22] +Arguments: [sr_returned_date_sk#9], [d_date_sk#22], Inner, BuildRight + +(23) CometProject +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, d_date_sk#22] +Arguments: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20], [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(24) CometHashAggregate +Input [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Keys [10]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(25) CometExchange +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#25, sum#26, sum#27, sum#28, sum#29] +Arguments: hashpartitioning(s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(26) CometHashAggregate +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#25, sum#26, sum#27, sum#28, sum#29] +Keys [10]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(27) CometTakeOrderedAndProject +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #30, 31 - 60 days #31, 61 - 90 days #32, 91 - 120 days #33, >120 days #34] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[s_store_name#11 ASC NULLS FIRST,s_company_id#12 ASC NULLS FIRST,s_street_number#13 ASC NULLS FIRST,s_street_name#14 ASC NULLS FIRST,s_street_type#15 ASC NULLS FIRST,s_suite_number#16 ASC NULLS FIRST,s_city#17 ASC NULLS FIRST,s_county#18 ASC NULLS FIRST,s_state#19 ASC NULLS FIRST,s_zip#20 ASC NULLS FIRST], output=[s_store_name#11,s_company_id#12,s_street_number#13,s_street_name#14,s_street_type#15,s_suite_number#16,s_city#17,s_county#18,s_state#19,s_zip#20,30 days #30,31 - 60 days #31,61 - 90 days #32,91 - 120 days #33,>120 days #34]), [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #30, 31 - 60 days #31, 61 - 90 days #32, 91 - 120 days #33, >120 days #34], 100, [s_store_name#11 ASC NULLS FIRST, s_company_id#12 ASC NULLS FIRST, s_street_number#13 ASC NULLS FIRST, s_street_name#14 ASC NULLS FIRST, s_street_type#15 ASC NULLS FIRST, s_suite_number#16 ASC NULLS FIRST, s_city#17 ASC NULLS FIRST, s_county#18 ASC NULLS FIRST, s_state#19 ASC NULLS FIRST, s_zip#20 ASC NULLS FIRST], [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #30, 31 - 60 days #31, 61 - 90 days #32, 91 - 120 days #33, >120 days #34] + +(28) ColumnarToRow [codegen id : 1] +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #30, 31 - 60 days #31, 61 - 90 days #32, 91 - 120 days #33, >120 days #34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..06f7e28e2d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q50.native_iceberg_compat/simplified.txt @@ -0,0 +1,30 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + CometHashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum,sum(CASE WHEN ((sr_returned_date_sk - ss_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 30) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 60) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 90) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((sr_returned_date_sk - ss_sold_date_sk) > 120) THEN 1 ELSE 0 END)] + CometExchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1 + CometHashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum,sr_returned_date_sk,ss_sold_date_sk] + CometProject [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastHashJoin [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,d_date_sk] + CometProject [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastHashJoin [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,d_date_sk] + CometProject [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastHashJoin [ss_store_sk,ss_sold_date_sk,sr_returned_date_sk,s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometProject [ss_store_sk,ss_sold_date_sk,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #3 + CometFilter [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastExchange [d_date_sk] #4 + CometFilter [d_date_sk] + CometScan parquet spark_catalog.default.date_dim [d_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_datafusion/explain.txt new file mode 100644 index 0000000000..6e0895aa3e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_datafusion/explain.txt @@ -0,0 +1,141 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * Filter (26) + +- Window (25) + +- * Sort (24) + +- Exchange (23) + +- * Project (22) + +- * SortMergeJoin FullOuter (21) + :- * Sort (18) + : +- Exchange (17) + : +- * Project (16) + : +- Window (15) + : +- * ColumnarToRow (14) + : +- CometSort (13) + : +- CometExchange (12) + : +- CometHashAggregate (11) + : +- CometExchange (10) + : +- CometHashAggregate (9) + : +- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : +- CometBroadcastExchange (6) + : +- CometProject (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + +- * Sort (20) + +- ReusedExchange (19) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Arguments: [d_date_sk#4, d_date#5, d_month_seq#6] + +(4) CometFilter +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Arguments: [d_date_sk#4, d_date#5], [d_date_sk#4, d_date#5] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: [d_date_sk#4, d_date#5] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Right output [2]: [d_date_sk#4, d_date#5] +Arguments: [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [5]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3, d_date_sk#4, d_date#5] +Arguments: [ws_item_sk#1, ws_sales_price#2, d_date#5], [ws_item_sk#1, ws_sales_price#2, d_date#5] + +(9) CometHashAggregate +Input [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#2))] + +(10) CometExchange +Input [3]: [ws_item_sk#1, d_date#5, sum#7] +Arguments: hashpartitioning(ws_item_sk#1, d_date#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(11) CometHashAggregate +Input [3]: [ws_item_sk#1, d_date#5, sum#7] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] + +(12) CometExchange +Input [4]: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(13) CometSort +Input [4]: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1] +Arguments: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1], [ws_item_sk#1 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST] + +(14) ColumnarToRow [codegen id : 1] +Input [4]: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1] + +(15) Window +Input [4]: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1] +Arguments: [sum(_w0#9) windowspecdefinition(ws_item_sk#1, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#10], [ws_item_sk#1], [d_date#5 ASC NULLS FIRST] + +(16) Project [codegen id : 2] +Output [3]: [item_sk#8, d_date#5, cume_sales#10] +Input [5]: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1, cume_sales#10] + +(17) Exchange +Input [3]: [item_sk#8, d_date#5, cume_sales#10] +Arguments: hashpartitioning(item_sk#8, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 3] +Input [3]: [item_sk#8, d_date#5, cume_sales#10] +Arguments: [item_sk#8 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(19) ReusedExchange [Reuses operator id: 17] +Output [3]: [item_sk#11, d_date#12, cume_sales#13] + +(20) Sort [codegen id : 6] +Input [3]: [item_sk#11, d_date#12, cume_sales#13] +Arguments: [item_sk#11 ASC NULLS FIRST, d_date#12 ASC NULLS FIRST], false, 0 + +(21) SortMergeJoin [codegen id : 7] +Left keys [2]: [item_sk#8, d_date#5] +Right keys [2]: [item_sk#11, d_date#12] +Join type: FullOuter +Join condition: None + +(22) Project [codegen id : 7] +Output [4]: [CASE WHEN isnotnull(item_sk#8) THEN item_sk#8 ELSE item_sk#11 END AS item_sk#14, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#12 END AS d_date#15, cume_sales#10 AS web_sales#16, cume_sales#13 AS store_sales#17] +Input [6]: [item_sk#8, d_date#5, cume_sales#10, item_sk#11, d_date#12, cume_sales#13] + +(23) Exchange +Input [4]: [item_sk#14, d_date#15, web_sales#16, store_sales#17] +Arguments: hashpartitioning(item_sk#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) Sort [codegen id : 8] +Input [4]: [item_sk#14, d_date#15, web_sales#16, store_sales#17] +Arguments: [item_sk#14 ASC NULLS FIRST, d_date#15 ASC NULLS FIRST], false, 0 + +(25) Window +Input [4]: [item_sk#14, d_date#15, web_sales#16, store_sales#17] +Arguments: [max(web_sales#16) windowspecdefinition(item_sk#14, d_date#15 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#18, max(store_sales#17) windowspecdefinition(item_sk#14, d_date#15 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#19], [item_sk#14], [d_date#15 ASC NULLS FIRST] + +(26) Filter [codegen id : 9] +Input [6]: [item_sk#14, d_date#15, web_sales#16, store_sales#17, web_cumulative#18, store_cumulative#19] +Condition : ((isnotnull(web_cumulative#18) AND isnotnull(store_cumulative#19)) AND (web_cumulative#18 > store_cumulative#19)) + +(27) TakeOrderedAndProject +Input [6]: [item_sk#14, d_date#15, web_sales#16, store_sales#17, web_cumulative#18, store_cumulative#19] +Arguments: 100, [item_sk#14 ASC NULLS FIRST, d_date#15 ASC NULLS FIRST], [item_sk#14, d_date#15, web_sales#16, store_sales#17, web_cumulative#18, store_cumulative#19] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9e95125e39 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_datafusion/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] + WholeStageCodegen (9) + Filter [web_cumulative,store_cumulative] + InputAdapter + Window [web_sales,item_sk,d_date,store_sales] + WholeStageCodegen (8) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (7) + Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] + SortMergeJoin [item_sk,d_date,item_sk,d_date] + InputAdapter + WholeStageCodegen (3) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen (2) + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ws_item_sk,d_date] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [item_sk,d_date,_w0,ws_item_sk] + CometExchange [ws_item_sk] #3 + CometHashAggregate [item_sk,d_date,_w0,ws_item_sk,sum,sum(UnscaledValue(ws_sales_price))] + CometExchange [ws_item_sk,d_date] #4 + CometHashAggregate [ws_item_sk,d_date,sum,ws_sales_price] + CometProject [ws_item_sk,ws_sales_price,d_date] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,d_date_sk,d_date] + CometFilter [ws_item_sk,ws_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_sales_price,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #5 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_month_seq] + InputAdapter + WholeStageCodegen (6) + Sort [item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,cume_sales] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..6d4c637043 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_iceberg_compat/explain.txt @@ -0,0 +1,219 @@ +== Physical Plan == +TakeOrderedAndProject (40) ++- * Filter (39) + +- Window (38) + +- * Sort (37) + +- Exchange (36) + +- * Project (35) + +- * SortMergeJoin FullOuter (34) + :- * Sort (18) + : +- Exchange (17) + : +- * Project (16) + : +- Window (15) + : +- * ColumnarToRow (14) + : +- CometSort (13) + : +- CometExchange (12) + : +- CometHashAggregate (11) + : +- CometExchange (10) + : +- CometHashAggregate (9) + : +- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.web_sales (1) + : +- CometBroadcastExchange (6) + : +- CometProject (5) + : +- CometFilter (4) + : +- CometScan parquet spark_catalog.default.date_dim (3) + +- * Sort (33) + +- Exchange (32) + +- * Project (31) + +- Window (30) + +- * ColumnarToRow (29) + +- CometSort (28) + +- CometExchange (27) + +- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometProject (23) + +- CometBroadcastHashJoin (22) + :- CometFilter (20) + : +- CometScan parquet spark_catalog.default.store_sales (19) + +- ReusedExchange (21) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Arguments: [d_date_sk#4, d_date#5], [d_date_sk#4, d_date#5] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: [d_date_sk#4, d_date#5] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Right output [2]: [d_date_sk#4, d_date#5] +Arguments: [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [5]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3, d_date_sk#4, d_date#5] +Arguments: [ws_item_sk#1, ws_sales_price#2, d_date#5], [ws_item_sk#1, ws_sales_price#2, d_date#5] + +(9) CometHashAggregate +Input [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#2))] + +(10) CometExchange +Input [3]: [ws_item_sk#1, d_date#5, sum#7] +Arguments: hashpartitioning(ws_item_sk#1, d_date#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(11) CometHashAggregate +Input [3]: [ws_item_sk#1, d_date#5, sum#7] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] + +(12) CometExchange +Input [4]: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(13) CometSort +Input [4]: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1] +Arguments: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1], [ws_item_sk#1 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST] + +(14) ColumnarToRow [codegen id : 1] +Input [4]: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1] + +(15) Window +Input [4]: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1] +Arguments: [sum(_w0#9) windowspecdefinition(ws_item_sk#1, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#10], [ws_item_sk#1], [d_date#5 ASC NULLS FIRST] + +(16) Project [codegen id : 2] +Output [3]: [item_sk#8, d_date#5, cume_sales#10] +Input [5]: [item_sk#8, d_date#5, _w0#9, ws_item_sk#1, cume_sales#10] + +(17) Exchange +Input [3]: [item_sk#8, d_date#5, cume_sales#10] +Arguments: hashpartitioning(item_sk#8, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 3] +Input [3]: [item_sk#8, d_date#5, cume_sales#10] +Arguments: [item_sk#8 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(19) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#13)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(20) CometFilter +Input [3]: [ss_item_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : isnotnull(ss_item_sk#11) + +(21) ReusedExchange [Reuses operator id: 6] +Output [2]: [d_date_sk#14, d_date#15] + +(22) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Right output [2]: [d_date_sk#14, d_date#15] +Arguments: [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + +(23) CometProject +Input [5]: [ss_item_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_date#15] +Arguments: [ss_item_sk#11, ss_sales_price#12, d_date#15], [ss_item_sk#11, ss_sales_price#12, d_date#15] + +(24) CometHashAggregate +Input [3]: [ss_item_sk#11, ss_sales_price#12, d_date#15] +Keys [2]: [ss_item_sk#11, d_date#15] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] + +(25) CometExchange +Input [3]: [ss_item_sk#11, d_date#15, sum#16] +Arguments: hashpartitioning(ss_item_sk#11, d_date#15, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(26) CometHashAggregate +Input [3]: [ss_item_sk#11, d_date#15, sum#16] +Keys [2]: [ss_item_sk#11, d_date#15] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] + +(27) CometExchange +Input [4]: [item_sk#17, d_date#15, _w0#18, ss_item_sk#11] +Arguments: hashpartitioning(ss_item_sk#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(28) CometSort +Input [4]: [item_sk#17, d_date#15, _w0#18, ss_item_sk#11] +Arguments: [item_sk#17, d_date#15, _w0#18, ss_item_sk#11], [ss_item_sk#11 ASC NULLS FIRST, d_date#15 ASC NULLS FIRST] + +(29) ColumnarToRow [codegen id : 4] +Input [4]: [item_sk#17, d_date#15, _w0#18, ss_item_sk#11] + +(30) Window +Input [4]: [item_sk#17, d_date#15, _w0#18, ss_item_sk#11] +Arguments: [sum(_w0#18) windowspecdefinition(ss_item_sk#11, d_date#15 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#19], [ss_item_sk#11], [d_date#15 ASC NULLS FIRST] + +(31) Project [codegen id : 5] +Output [3]: [item_sk#17, d_date#15, cume_sales#19] +Input [5]: [item_sk#17, d_date#15, _w0#18, ss_item_sk#11, cume_sales#19] + +(32) Exchange +Input [3]: [item_sk#17, d_date#15, cume_sales#19] +Arguments: hashpartitioning(item_sk#17, d_date#15, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(33) Sort [codegen id : 6] +Input [3]: [item_sk#17, d_date#15, cume_sales#19] +Arguments: [item_sk#17 ASC NULLS FIRST, d_date#15 ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin [codegen id : 7] +Left keys [2]: [item_sk#8, d_date#5] +Right keys [2]: [item_sk#17, d_date#15] +Join type: FullOuter +Join condition: None + +(35) Project [codegen id : 7] +Output [4]: [CASE WHEN isnotnull(item_sk#8) THEN item_sk#8 ELSE item_sk#17 END AS item_sk#20, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#15 END AS d_date#21, cume_sales#10 AS web_sales#22, cume_sales#19 AS store_sales#23] +Input [6]: [item_sk#8, d_date#5, cume_sales#10, item_sk#17, d_date#15, cume_sales#19] + +(36) Exchange +Input [4]: [item_sk#20, d_date#21, web_sales#22, store_sales#23] +Arguments: hashpartitioning(item_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(37) Sort [codegen id : 8] +Input [4]: [item_sk#20, d_date#21, web_sales#22, store_sales#23] +Arguments: [item_sk#20 ASC NULLS FIRST, d_date#21 ASC NULLS FIRST], false, 0 + +(38) Window +Input [4]: [item_sk#20, d_date#21, web_sales#22, store_sales#23] +Arguments: [max(web_sales#22) windowspecdefinition(item_sk#20, d_date#21 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#24, max(store_sales#23) windowspecdefinition(item_sk#20, d_date#21 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#25], [item_sk#20], [d_date#21 ASC NULLS FIRST] + +(39) Filter [codegen id : 9] +Input [6]: [item_sk#20, d_date#21, web_sales#22, store_sales#23, web_cumulative#24, store_cumulative#25] +Condition : ((isnotnull(web_cumulative#24) AND isnotnull(store_cumulative#25)) AND (web_cumulative#24 > store_cumulative#25)) + +(40) TakeOrderedAndProject +Input [6]: [item_sk#20, d_date#21, web_sales#22, store_sales#23, web_cumulative#24, store_cumulative#25] +Arguments: 100, [item_sk#20 ASC NULLS FIRST, d_date#21 ASC NULLS FIRST], [item_sk#20, d_date#21, web_sales#22, store_sales#23, web_cumulative#24, store_cumulative#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..19eca39217 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q51.native_iceberg_compat/simplified.txt @@ -0,0 +1,59 @@ +TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] + WholeStageCodegen (9) + Filter [web_cumulative,store_cumulative] + InputAdapter + Window [web_sales,item_sk,d_date,store_sales] + WholeStageCodegen (8) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (7) + Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] + SortMergeJoin [item_sk,d_date,item_sk,d_date] + InputAdapter + WholeStageCodegen (3) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen (2) + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ws_item_sk,d_date] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [item_sk,d_date,_w0,ws_item_sk] + CometExchange [ws_item_sk] #3 + CometHashAggregate [item_sk,d_date,_w0,ws_item_sk,sum,sum(UnscaledValue(ws_sales_price))] + CometExchange [ws_item_sk,d_date] #4 + CometHashAggregate [ws_item_sk,d_date,sum,ws_sales_price] + CometProject [ws_item_sk,ws_sales_price,d_date] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,d_date_sk,d_date] + CometFilter [ws_item_sk,ws_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #5 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + WholeStageCodegen (6) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #6 + WholeStageCodegen (5) + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ss_item_sk,d_date] + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometSort [item_sk,d_date,_w0,ss_item_sk] + CometExchange [ss_item_sk] #7 + CometHashAggregate [item_sk,d_date,_w0,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [ss_item_sk,d_date] #8 + CometHashAggregate [ss_item_sk,d_date,sum,ss_sales_price] + CometProject [ss_item_sk,ss_sales_price,d_date] + CometBroadcastHashJoin [ss_item_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_item_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] + ReusedExchange [d_date_sk,d_date] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_datafusion/explain.txt new file mode 100644 index 0000000000..b9de030234 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_datafusion/explain.txt @@ -0,0 +1,101 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2, d_moy#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2], [d_date_sk#1, d_year#2] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_year#2] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5], [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(10) CometFilter +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9], [i_item_sk#7, i_brand_id#8, i_brand#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9], [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(15) CometHashAggregate +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] + +(16) CometExchange +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#11] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#11] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] + +(18) CometTakeOrderedAndProject +Input [4]: [d_year#2, brand_id#12, brand#13, ext_price#14] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[d_year#2 ASC NULLS FIRST,ext_price#14 DESC NULLS LAST,brand_id#12 ASC NULLS FIRST], output=[d_year#2,brand_id#12,brand#13,ext_price#14]), [d_year#2, brand_id#12, brand#13, ext_price#14], 100, [d_year#2 ASC NULLS FIRST, ext_price#14 DESC NULLS LAST, brand_id#12 ASC NULLS FIRST], [d_year#2, brand_id#12, brand#13, ext_price#14] + +(19) ColumnarToRow [codegen id : 1] +Input [4]: [d_year#2, brand_id#12, brand#13, ext_price#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_datafusion/simplified.txt new file mode 100644 index 0000000000..24f033cc73 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_datafusion/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [d_year,brand_id,brand,ext_price] + CometHashAggregate [d_year,brand_id,brand,ext_price,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [d_year,i_brand,i_brand_id] #1 + CometHashAggregate [d_year,i_brand,i_brand_id,sum,ss_ext_sales_price] + CometProject [d_year,ss_ext_sales_price,i_brand_id,i_brand] + CometBroadcastHashJoin [d_year,ss_item_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand] + CometProject [d_year,ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,d_year,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_year] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..2c5bab474f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_iceberg_compat/explain.txt @@ -0,0 +1,111 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.date_dim (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometScan parquet spark_catalog.default.store_sales (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometScan parquet spark_catalog.default.item (9) + + +(1) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2], [d_date_sk#1, d_year#2] + +(4) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_year#2] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5], [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9], [i_item_sk#7, i_brand_id#8, i_brand#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9], [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(15) CometHashAggregate +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] + +(16) CometExchange +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#11] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#11] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] + +(18) CometTakeOrderedAndProject +Input [4]: [d_year#2, brand_id#12, brand#13, ext_price#14] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[d_year#2 ASC NULLS FIRST,ext_price#14 DESC NULLS LAST,brand_id#12 ASC NULLS FIRST], output=[d_year#2,brand_id#12,brand#13,ext_price#14]), [d_year#2, brand_id#12, brand#13, ext_price#14], 100, [d_year#2 ASC NULLS FIRST, ext_price#14 DESC NULLS LAST, brand_id#12 ASC NULLS FIRST], [d_year#2, brand_id#12, brand#13, ext_price#14] + +(19) ColumnarToRow [codegen id : 1] +Input [4]: [d_year#2, brand_id#12, brand#13, ext_price#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..af5223b69e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q52.native_iceberg_compat/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [d_year,brand_id,brand,ext_price] + CometHashAggregate [d_year,brand_id,brand,ext_price,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [d_year,i_brand,i_brand_id] #1 + CometHashAggregate [d_year,i_brand,i_brand_id,sum,ss_ext_sales_price] + CometProject [d_year,ss_ext_sales_price,i_brand_id,i_brand] + CometBroadcastHashJoin [d_year,ss_item_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand] + CometProject [d_year,ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,d_year,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_year] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_datafusion/explain.txt new file mode 100644 index 0000000000..827fbeb9d3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_datafusion/explain.txt @@ -0,0 +1,152 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * Project (28) + +- * Filter (27) + +- Window (26) + +- * ColumnarToRow (25) + +- CometSort (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometExchange (21) + +- CometHashAggregate (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (14) + : +- CometBroadcastHashJoin (13) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : +- CometBroadcastExchange (6) + : : +- CometFilter (5) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + : +- CometBroadcastExchange (12) + : +- CometProject (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + +- CometBroadcastExchange (17) + +- CometFilter (16) + +- CometNativeScan: `spark_catalog`.`default`.`store` (15) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(2) CometFilter +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Condition : ((((i_category#4 IN (Books ,Children ,Electronics ) AND i_class#3 IN (personal ,portable ,reference ,self-help )) AND i_brand#2 IN (scholaramalgamalg #6 ,scholaramalgamalg #7 ,exportiunivamalg #8 ,scholaramalgamalg #8 )) OR ((i_category#4 IN (Women ,Music ,Men ) AND i_class#3 IN (accessories ,classical ,fragrances ,pants )) AND i_brand#2 IN (amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ))) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_manufact_id#5], [i_item_sk#1, i_manufact_id#5] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(5) CometFilter +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) + +(6) CometBroadcastExchange +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(7) CometBroadcastHashJoin +Left output [2]: [i_item_sk#1, i_manufact_id#5] +Right output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_item_sk#1], [ss_item_sk#10], Inner, BuildRight + +(8) CometProject +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13], [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Arguments: [d_date_sk#14, d_month_seq#15, d_qoy#16] + +(10) CometFilter +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Condition : (d_month_seq#15 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#14)) + +(11) CometProject +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Arguments: [d_date_sk#14, d_qoy#16], [d_date_sk#14, d_qoy#16] + +(12) CometBroadcastExchange +Input [2]: [d_date_sk#14, d_qoy#16] +Arguments: [d_date_sk#14, d_qoy#16] + +(13) CometBroadcastHashJoin +Left output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Right output [2]: [d_date_sk#14, d_qoy#16] +Arguments: [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + +(14) CometProject +Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_qoy#16] +Arguments: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16], [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16] + +(15) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#17] +Arguments: [s_store_sk#17] + +(16) CometFilter +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: [s_store_sk#17] + +(18) CometBroadcastHashJoin +Left output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16] +Right output [1]: [s_store_sk#17] +Arguments: [ss_store_sk#11], [s_store_sk#17], Inner, BuildRight + +(19) CometProject +Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16, s_store_sk#17] +Arguments: [i_manufact_id#5, ss_sales_price#12, d_qoy#16], [i_manufact_id#5, ss_sales_price#12, d_qoy#16] + +(20) CometHashAggregate +Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Keys [2]: [i_manufact_id#5, d_qoy#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] + +(21) CometExchange +Input [3]: [i_manufact_id#5, d_qoy#16, sum#18] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [3]: [i_manufact_id#5, d_qoy#16, sum#18] +Keys [2]: [i_manufact_id#5, d_qoy#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] + +(23) CometExchange +Input [3]: [i_manufact_id#5, sum_sales#19, _w0#20] +Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(24) CometSort +Input [3]: [i_manufact_id#5, sum_sales#19, _w0#20] +Arguments: [i_manufact_id#5, sum_sales#19, _w0#20], [i_manufact_id#5 ASC NULLS FIRST] + +(25) ColumnarToRow [codegen id : 1] +Input [3]: [i_manufact_id#5, sum_sales#19, _w0#20] + +(26) Window +Input [3]: [i_manufact_id#5, sum_sales#19, _w0#20] +Arguments: [avg(_w0#20) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#21], [i_manufact_id#5] + +(27) Filter [codegen id : 2] +Input [4]: [i_manufact_id#5, sum_sales#19, _w0#20, avg_quarterly_sales#21] +Condition : CASE WHEN (avg_quarterly_sales#21 > 0.000000) THEN ((abs((sum_sales#19 - avg_quarterly_sales#21)) / avg_quarterly_sales#21) > 0.1000000000000000) ELSE false END + +(28) Project [codegen id : 2] +Output [3]: [i_manufact_id#5, sum_sales#19, avg_quarterly_sales#21] +Input [4]: [i_manufact_id#5, sum_sales#19, _w0#20, avg_quarterly_sales#21] + +(29) TakeOrderedAndProject +Input [3]: [i_manufact_id#5, sum_sales#19, avg_quarterly_sales#21] +Arguments: 100, [avg_quarterly_sales#21 ASC NULLS FIRST, sum_sales#19 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#19, avg_quarterly_sales#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c4574bc31b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_datafusion/simplified.txt @@ -0,0 +1,33 @@ +TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] + WholeStageCodegen (2) + Project [i_manufact_id,sum_sales,avg_quarterly_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_manufact_id,sum_sales,_w0] + CometExchange [i_manufact_id] #1 + CometHashAggregate [i_manufact_id,sum_sales,_w0,d_qoy,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [i_manufact_id,d_qoy] #2 + CometHashAggregate [i_manufact_id,d_qoy,sum,ss_sales_price] + CometProject [i_manufact_id,ss_sales_price,d_qoy] + CometBroadcastHashJoin [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy,s_store_sk] + CometProject [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] + CometBroadcastHashJoin [i_manufact_id,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_qoy] + CometProject [i_manufact_id,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_manufact_id,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometProject [i_item_sk,i_manufact_id] + CometFilter [i_item_sk,i_brand,i_class,i_category,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_manufact_id] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_qoy] #4 + CometProject [d_date_sk,d_qoy] + CometFilter [d_date_sk,d_month_seq,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq,d_qoy] + CometBroadcastExchange [s_store_sk] #5 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..3f45e99216 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_iceberg_compat/explain.txt @@ -0,0 +1,165 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * Project (28) + +- * Filter (27) + +- Window (26) + +- * ColumnarToRow (25) + +- CometSort (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometExchange (21) + +- CometHashAggregate (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (14) + : +- CometBroadcastHashJoin (13) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.item (1) + : : +- CometBroadcastExchange (6) + : : +- CometFilter (5) + : : +- CometScan parquet spark_catalog.default.store_sales (4) + : +- CometBroadcastExchange (12) + : +- CometProject (11) + : +- CometFilter (10) + : +- CometScan parquet spark_catalog.default.date_dim (9) + +- CometBroadcastExchange (17) + +- CometFilter (16) + +- CometScan parquet spark_catalog.default.store (15) + + +(1) CometScan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(And(In(i_category, [Books ,Children ,Electronics ]),In(i_class, [personal ,portable ,reference ,self-help ])),In(i_brand, [exportiunivamalg #6 ,scholaramalgamalg #7 ,scholaramalgamalg #8 ,scholaramalgamalg #6 ])),And(And(In(i_category, [Men ,Music ,Women ]),In(i_class, [accessories ,classical ,fragrances ,pants ])),In(i_brand, [amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Condition : ((((i_category#4 IN (Books ,Children ,Electronics ) AND i_class#3 IN (personal ,portable ,reference ,self-help )) AND i_brand#2 IN (scholaramalgamalg #7 ,scholaramalgamalg #8 ,exportiunivamalg #6 ,scholaramalgamalg #6 )) OR ((i_category#4 IN (Women ,Music ,Men ) AND i_class#3 IN (accessories ,classical ,fragrances ,pants )) AND i_brand#2 IN (amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ))) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_manufact_id#5], [i_item_sk#1, i_manufact_id#5] + +(4) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#13)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) CometFilter +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) + +(6) CometBroadcastExchange +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(7) CometBroadcastHashJoin +Left output [2]: [i_item_sk#1, i_manufact_id#5] +Right output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_item_sk#1], [ss_item_sk#10], Inner, BuildRight + +(8) CometProject +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13], [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(9) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_month_seq, [1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Condition : (d_month_seq#15 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#14)) + +(11) CometProject +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Arguments: [d_date_sk#14, d_qoy#16], [d_date_sk#14, d_qoy#16] + +(12) CometBroadcastExchange +Input [2]: [d_date_sk#14, d_qoy#16] +Arguments: [d_date_sk#14, d_qoy#16] + +(13) CometBroadcastHashJoin +Left output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Right output [2]: [d_date_sk#14, d_qoy#16] +Arguments: [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + +(14) CometProject +Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_qoy#16] +Arguments: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16], [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16] + +(15) CometScan parquet spark_catalog.default.store +Output [1]: [s_store_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(16) CometFilter +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: [s_store_sk#17] + +(18) CometBroadcastHashJoin +Left output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16] +Right output [1]: [s_store_sk#17] +Arguments: [ss_store_sk#11], [s_store_sk#17], Inner, BuildRight + +(19) CometProject +Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16, s_store_sk#17] +Arguments: [i_manufact_id#5, ss_sales_price#12, d_qoy#16], [i_manufact_id#5, ss_sales_price#12, d_qoy#16] + +(20) CometHashAggregate +Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Keys [2]: [i_manufact_id#5, d_qoy#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] + +(21) CometExchange +Input [3]: [i_manufact_id#5, d_qoy#16, sum#18] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [3]: [i_manufact_id#5, d_qoy#16, sum#18] +Keys [2]: [i_manufact_id#5, d_qoy#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] + +(23) CometExchange +Input [3]: [i_manufact_id#5, sum_sales#19, _w0#20] +Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(24) CometSort +Input [3]: [i_manufact_id#5, sum_sales#19, _w0#20] +Arguments: [i_manufact_id#5, sum_sales#19, _w0#20], [i_manufact_id#5 ASC NULLS FIRST] + +(25) ColumnarToRow [codegen id : 1] +Input [3]: [i_manufact_id#5, sum_sales#19, _w0#20] + +(26) Window +Input [3]: [i_manufact_id#5, sum_sales#19, _w0#20] +Arguments: [avg(_w0#20) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#21], [i_manufact_id#5] + +(27) Filter [codegen id : 2] +Input [4]: [i_manufact_id#5, sum_sales#19, _w0#20, avg_quarterly_sales#21] +Condition : CASE WHEN (avg_quarterly_sales#21 > 0.000000) THEN ((abs((sum_sales#19 - avg_quarterly_sales#21)) / avg_quarterly_sales#21) > 0.1000000000000000) ELSE false END + +(28) Project [codegen id : 2] +Output [3]: [i_manufact_id#5, sum_sales#19, avg_quarterly_sales#21] +Input [4]: [i_manufact_id#5, sum_sales#19, _w0#20, avg_quarterly_sales#21] + +(29) TakeOrderedAndProject +Input [3]: [i_manufact_id#5, sum_sales#19, avg_quarterly_sales#21] +Arguments: 100, [avg_quarterly_sales#21 ASC NULLS FIRST, sum_sales#19 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#19, avg_quarterly_sales#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..57b34132a4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q53.native_iceberg_compat/simplified.txt @@ -0,0 +1,33 @@ +TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] + WholeStageCodegen (2) + Project [i_manufact_id,sum_sales,avg_quarterly_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_manufact_id,sum_sales,_w0] + CometExchange [i_manufact_id] #1 + CometHashAggregate [i_manufact_id,sum_sales,_w0,d_qoy,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [i_manufact_id,d_qoy] #2 + CometHashAggregate [i_manufact_id,d_qoy,sum,ss_sales_price] + CometProject [i_manufact_id,ss_sales_price,d_qoy] + CometBroadcastHashJoin [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy,s_store_sk] + CometProject [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] + CometBroadcastHashJoin [i_manufact_id,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_qoy] + CometProject [i_manufact_id,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_manufact_id,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometProject [i_item_sk,i_manufact_id] + CometFilter [i_item_sk,i_brand,i_class,i_category,i_manufact_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_qoy] #4 + CometProject [d_date_sk,d_qoy] + CometFilter [d_date_sk,d_month_seq,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_qoy] + CometBroadcastExchange [s_store_sk] #5 + CometFilter [s_store_sk] + CometScan parquet spark_catalog.default.store [s_store_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_datafusion/explain.txt new file mode 100644 index 0000000000..40376430a5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_datafusion/explain.txt @@ -0,0 +1,376 @@ +== Physical Plan == +* ColumnarToRow (56) ++- CometTakeOrderedAndProject (55) + +- CometHashAggregate (54) + +- CometExchange (53) + +- CometHashAggregate (52) + +- CometHashAggregate (51) + +- CometExchange (50) + +- CometHashAggregate (49) + +- CometProject (48) + +- CometBroadcastHashJoin (47) + :- CometProject (42) + : +- CometBroadcastHashJoin (41) + : :- CometProject (37) + : : +- CometBroadcastHashJoin (36) + : : :- CometProject (32) + : : : +- CometBroadcastHashJoin (31) + : : : :- CometHashAggregate (27) + : : : : +- CometExchange (26) + : : : : +- CometHashAggregate (25) + : : : : +- CometProject (24) + : : : : +- CometBroadcastHashJoin (23) + : : : : :- CometProject (19) + : : : : : +- CometBroadcastHashJoin (18) + : : : : : :- CometProject (13) + : : : : : : +- CometBroadcastHashJoin (12) + : : : : : : :- CometUnion (7) + : : : : : : : :- CometProject (3) + : : : : : : : : +- CometFilter (2) + : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : : : : +- CometProject (6) + : : : : : : : +- CometFilter (5) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (4) + : : : : : : +- CometBroadcastExchange (11) + : : : : : : +- CometProject (10) + : : : : : : +- CometFilter (9) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (8) + : : : : : +- CometBroadcastExchange (17) + : : : : : +- CometProject (16) + : : : : : +- CometFilter (15) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (14) + : : : : +- CometBroadcastExchange (22) + : : : : +- CometFilter (21) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (20) + : : : +- CometBroadcastExchange (30) + : : : +- CometFilter (29) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (28) + : : +- CometBroadcastExchange (35) + : : +- CometFilter (34) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (33) + : +- CometBroadcastExchange (40) + : +- CometFilter (39) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (38) + +- CometBroadcastExchange (46) + +- CometProject (45) + +- CometFilter (44) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (43) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] +Arguments: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] + +(2) CometFilter +Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_bill_customer_sk#1)) + +(3) CometProject +Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] +Arguments: [sold_date_sk#4, customer_sk#5, item_sk#6], [cs_sold_date_sk#3 AS sold_date_sk#4, cs_bill_customer_sk#1 AS customer_sk#5, cs_item_sk#2 AS item_sk#6] + +(4) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] +Arguments: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] + +(5) CometFilter +Input [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] +Condition : (isnotnull(ws_item_sk#7) AND isnotnull(ws_bill_customer_sk#8)) + +(6) CometProject +Input [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] +Arguments: [sold_date_sk#10, customer_sk#11, item_sk#12], [ws_sold_date_sk#9 AS sold_date_sk#10, ws_bill_customer_sk#8 AS customer_sk#11, ws_item_sk#7 AS item_sk#12] + +(7) CometUnion +Child 0 Input [3]: [sold_date_sk#4, customer_sk#5, item_sk#6] +Child 1 Input [3]: [sold_date_sk#10, customer_sk#11, item_sk#12] + +(8) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#13, i_class#14, i_category#15] +Arguments: [i_item_sk#13, i_class#14, i_category#15] + +(9) CometFilter +Input [3]: [i_item_sk#13, i_class#14, i_category#15] +Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women )) AND (i_class#14 = maternity )) AND isnotnull(i_item_sk#13)) + +(10) CometProject +Input [3]: [i_item_sk#13, i_class#14, i_category#15] +Arguments: [i_item_sk#13], [i_item_sk#13] + +(11) CometBroadcastExchange +Input [1]: [i_item_sk#13] +Arguments: [i_item_sk#13] + +(12) CometBroadcastHashJoin +Left output [3]: [sold_date_sk#4, customer_sk#5, item_sk#6] +Right output [1]: [i_item_sk#13] +Arguments: [item_sk#6], [i_item_sk#13], Inner, BuildRight + +(13) CometProject +Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13] +Arguments: [sold_date_sk#4, customer_sk#5], [sold_date_sk#4, customer_sk#5] + +(14) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16, d_year#17, d_moy#18] + +(15) CometFilter +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 12)) AND (d_year#17 = 1998)) AND isnotnull(d_date_sk#16)) + +(16) CometProject +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(17) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(18) CometBroadcastHashJoin +Left output [2]: [sold_date_sk#4, customer_sk#5] +Right output [1]: [d_date_sk#16] +Arguments: [sold_date_sk#4], [d_date_sk#16], Inner, BuildRight + +(19) CometProject +Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#16] +Arguments: [customer_sk#5], [customer_sk#5] + +(20) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: [c_customer_sk#19, c_current_addr_sk#20] + +(21) CometFilter +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Condition : (isnotnull(c_customer_sk#19) AND isnotnull(c_current_addr_sk#20)) + +(22) CometBroadcastExchange +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: [c_customer_sk#19, c_current_addr_sk#20] + +(23) CometBroadcastHashJoin +Left output [1]: [customer_sk#5] +Right output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: [customer_sk#5], [c_customer_sk#19], Inner, BuildRight + +(24) CometProject +Input [3]: [customer_sk#5, c_customer_sk#19, c_current_addr_sk#20] +Arguments: [c_customer_sk#19, c_current_addr_sk#20], [c_customer_sk#19, c_current_addr_sk#20] + +(25) CometHashAggregate +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Keys [2]: [c_customer_sk#19, c_current_addr_sk#20] +Functions: [] + +(26) CometExchange +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: hashpartitioning(c_customer_sk#19, c_current_addr_sk#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Keys [2]: [c_customer_sk#19, c_current_addr_sk#20] +Functions: [] + +(28) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Arguments: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(29) CometFilter +Input [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Condition : isnotnull(ss_customer_sk#21) + +(30) CometBroadcastExchange +Input [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Arguments: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(31) CometBroadcastHashJoin +Left output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Right output [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Arguments: [c_customer_sk#19], [ss_customer_sk#21], Inner, BuildRight + +(32) CometProject +Input [5]: [c_customer_sk#19, c_current_addr_sk#20, ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Arguments: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23], [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(33) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Arguments: [ca_address_sk#24, ca_county#25, ca_state#26] + +(34) CometFilter +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Condition : ((isnotnull(ca_address_sk#24) AND isnotnull(ca_county#25)) AND isnotnull(ca_state#26)) + +(35) CometBroadcastExchange +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Arguments: [ca_address_sk#24, ca_county#25, ca_state#26] + +(36) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23] +Right output [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Arguments: [c_current_addr_sk#20], [ca_address_sk#24], Inner, BuildRight + +(37) CometProject +Input [7]: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_address_sk#24, ca_county#25, ca_state#26] +Arguments: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26], [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26] + +(38) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_county#27, s_state#28] +Arguments: [s_county#27, s_state#28] + +(39) CometFilter +Input [2]: [s_county#27, s_state#28] +Condition : (isnotnull(s_county#27) AND isnotnull(s_state#28)) + +(40) CometBroadcastExchange +Input [2]: [s_county#27, s_state#28] +Arguments: [s_county#27, s_state#28] + +(41) CometBroadcastHashJoin +Left output [5]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26] +Right output [2]: [s_county#27, s_state#28] +Arguments: [ca_county#25, ca_state#26], [s_county#27, s_state#28], Inner, BuildRight + +(42) CometProject +Input [7]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26, s_county#27, s_state#28] +Arguments: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23], [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(43) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#29, d_month_seq#30] +Arguments: [d_date_sk#29, d_month_seq#30] + +(44) CometFilter +Input [2]: [d_date_sk#29, d_month_seq#30] +Condition : (((isnotnull(d_month_seq#30) AND (d_month_seq#30 >= Subquery scalar-subquery#31, [id=#32])) AND (d_month_seq#30 <= Subquery scalar-subquery#33, [id=#34])) AND isnotnull(d_date_sk#29)) + +(45) CometProject +Input [2]: [d_date_sk#29, d_month_seq#30] +Arguments: [d_date_sk#29], [d_date_sk#29] + +(46) CometBroadcastExchange +Input [1]: [d_date_sk#29] +Arguments: [d_date_sk#29] + +(47) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23] +Right output [1]: [d_date_sk#29] +Arguments: [ss_sold_date_sk#23], [d_date_sk#29], Inner, BuildRight + +(48) CometProject +Input [4]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, d_date_sk#29] +Arguments: [c_customer_sk#19, ss_ext_sales_price#22], [c_customer_sk#19, ss_ext_sales_price#22] + +(49) CometHashAggregate +Input [2]: [c_customer_sk#19, ss_ext_sales_price#22] +Keys [1]: [c_customer_sk#19] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#22))] + +(50) CometExchange +Input [2]: [c_customer_sk#19, sum#35] +Arguments: hashpartitioning(c_customer_sk#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(51) CometHashAggregate +Input [2]: [c_customer_sk#19, sum#35] +Keys [1]: [c_customer_sk#19] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#22))] + +(52) CometHashAggregate +Input [1]: [segment#36] +Keys [1]: [segment#36] +Functions [1]: [partial_count(1)] + +(53) CometExchange +Input [2]: [segment#36, count#37] +Arguments: hashpartitioning(segment#36, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(54) CometHashAggregate +Input [2]: [segment#36, count#37] +Keys [1]: [segment#36] +Functions [1]: [count(1)] + +(55) CometTakeOrderedAndProject +Input [3]: [segment#36, num_customers#38, segment_base#39] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[segment#36 ASC NULLS FIRST,num_customers#38 ASC NULLS FIRST], output=[segment#36,num_customers#38,segment_base#39]), [segment#36, num_customers#38, segment_base#39], 100, [segment#36 ASC NULLS FIRST, num_customers#38 ASC NULLS FIRST], [segment#36, num_customers#38, segment_base#39] + +(56) ColumnarToRow [codegen id : 1] +Input [3]: [segment#36, num_customers#38, segment_base#39] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquery#31, [id=#32] +* ColumnarToRow (63) ++- CometHashAggregate (62) + +- CometExchange (61) + +- CometHashAggregate (60) + +- CometProject (59) + +- CometFilter (58) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (57) + + +(57) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_month_seq#40, d_year#41, d_moy#42] +Arguments: [d_month_seq#40, d_year#41, d_moy#42] + +(58) CometFilter +Input [3]: [d_month_seq#40, d_year#41, d_moy#42] +Condition : (((isnotnull(d_year#41) AND isnotnull(d_moy#42)) AND (d_year#41 = 1998)) AND (d_moy#42 = 12)) + +(59) CometProject +Input [3]: [d_month_seq#40, d_year#41, d_moy#42] +Arguments: [(d_month_seq + 1)#43], [(d_month_seq#40 + 1) AS (d_month_seq + 1)#43] + +(60) CometHashAggregate +Input [1]: [(d_month_seq + 1)#43] +Keys [1]: [(d_month_seq + 1)#43] +Functions: [] + +(61) CometExchange +Input [1]: [(d_month_seq + 1)#43] +Arguments: hashpartitioning((d_month_seq + 1)#43, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(62) CometHashAggregate +Input [1]: [(d_month_seq + 1)#43] +Keys [1]: [(d_month_seq + 1)#43] +Functions: [] + +(63) ColumnarToRow [codegen id : 1] +Input [1]: [(d_month_seq + 1)#43] + +Subquery:2 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquery#33, [id=#34] +* ColumnarToRow (70) ++- CometHashAggregate (69) + +- CometExchange (68) + +- CometHashAggregate (67) + +- CometProject (66) + +- CometFilter (65) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (64) + + +(64) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_month_seq#44, d_year#45, d_moy#46] +Arguments: [d_month_seq#44, d_year#45, d_moy#46] + +(65) CometFilter +Input [3]: [d_month_seq#44, d_year#45, d_moy#46] +Condition : (((isnotnull(d_year#45) AND isnotnull(d_moy#46)) AND (d_year#45 = 1998)) AND (d_moy#46 = 12)) + +(66) CometProject +Input [3]: [d_month_seq#44, d_year#45, d_moy#46] +Arguments: [(d_month_seq + 3)#47], [(d_month_seq#44 + 3) AS (d_month_seq + 3)#47] + +(67) CometHashAggregate +Input [1]: [(d_month_seq + 3)#47] +Keys [1]: [(d_month_seq + 3)#47] +Functions: [] + +(68) CometExchange +Input [1]: [(d_month_seq + 3)#47] +Arguments: hashpartitioning((d_month_seq + 3)#47, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(69) CometHashAggregate +Input [1]: [(d_month_seq + 3)#47] +Keys [1]: [(d_month_seq + 3)#47] +Functions: [] + +(70) ColumnarToRow [codegen id : 1] +Input [1]: [(d_month_seq + 3)#47] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_datafusion/simplified.txt new file mode 100644 index 0000000000..4956f2825d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_datafusion/simplified.txt @@ -0,0 +1,78 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [segment,num_customers,segment_base] + CometHashAggregate [segment,num_customers,segment_base,count,count(1)] + CometExchange [segment] #1 + CometHashAggregate [segment,count] + CometHashAggregate [segment,c_customer_sk,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [c_customer_sk] #2 + CometHashAggregate [c_customer_sk,sum,ss_ext_sales_price] + CometProject [c_customer_sk,ss_ext_sales_price] + CometBroadcastHashJoin [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometProject [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ca_county,ca_state,s_county,s_state] + CometProject [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ca_county,ca_state] + CometBroadcastHashJoin [c_customer_sk,c_current_addr_sk,ss_ext_sales_price,ss_sold_date_sk,ca_address_sk,ca_county,ca_state] + CometProject [c_customer_sk,c_current_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_current_addr_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometHashAggregate [c_customer_sk,c_current_addr_sk] + CometExchange [c_customer_sk,c_current_addr_sk] #3 + CometHashAggregate [c_customer_sk,c_current_addr_sk] + CometProject [c_customer_sk,c_current_addr_sk] + CometBroadcastHashJoin [customer_sk,c_customer_sk,c_current_addr_sk] + CometProject [customer_sk] + CometBroadcastHashJoin [sold_date_sk,customer_sk,d_date_sk] + CometProject [sold_date_sk,customer_sk] + CometBroadcastHashJoin [sold_date_sk,customer_sk,item_sk,i_item_sk] + CometUnion [sold_date_sk,customer_sk,item_sk] + CometProject [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] [sold_date_sk,customer_sk,item_sk] + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + CometProject [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] [sold_date_sk,customer_sk,item_sk] + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_bill_customer_sk,ws_sold_date_sk] + CometBroadcastExchange [i_item_sk] #4 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_class,i_category] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #6 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] #7 + CometFilter [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [ca_address_sk,ca_county,ca_state] #8 + CometFilter [ca_address_sk,ca_county,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_county,ca_state] + CometBroadcastExchange [s_county,s_state] #9 + CometFilter [s_county,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_county,s_state] + CometBroadcastExchange [d_date_sk] #10 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [(d_month_seq + 1)] + CometExchange [(d_month_seq + 1)] #11 + CometHashAggregate [(d_month_seq + 1)] + CometProject [d_month_seq] [(d_month_seq + 1)] + CometFilter [d_month_seq,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_month_seq,d_year,d_moy] + Subquery #2 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [(d_month_seq + 3)] + CometExchange [(d_month_seq + 3)] #12 + CometHashAggregate [(d_month_seq + 3)] + CometProject [d_month_seq] [(d_month_seq + 3)] + CometFilter [d_month_seq,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_month_seq,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ce37d40247 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_iceberg_compat/explain.txt @@ -0,0 +1,412 @@ +== Physical Plan == +* ColumnarToRow (56) ++- CometTakeOrderedAndProject (55) + +- CometHashAggregate (54) + +- CometExchange (53) + +- CometHashAggregate (52) + +- CometHashAggregate (51) + +- CometExchange (50) + +- CometHashAggregate (49) + +- CometProject (48) + +- CometBroadcastHashJoin (47) + :- CometProject (42) + : +- CometBroadcastHashJoin (41) + : :- CometProject (37) + : : +- CometBroadcastHashJoin (36) + : : :- CometProject (32) + : : : +- CometBroadcastHashJoin (31) + : : : :- CometHashAggregate (27) + : : : : +- CometExchange (26) + : : : : +- CometHashAggregate (25) + : : : : +- CometProject (24) + : : : : +- CometBroadcastHashJoin (23) + : : : : :- CometProject (19) + : : : : : +- CometBroadcastHashJoin (18) + : : : : : :- CometProject (13) + : : : : : : +- CometBroadcastHashJoin (12) + : : : : : : :- CometUnion (7) + : : : : : : : :- CometProject (3) + : : : : : : : : +- CometFilter (2) + : : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : : : : : +- CometProject (6) + : : : : : : : +- CometFilter (5) + : : : : : : : +- CometScan parquet spark_catalog.default.web_sales (4) + : : : : : : +- CometBroadcastExchange (11) + : : : : : : +- CometProject (10) + : : : : : : +- CometFilter (9) + : : : : : : +- CometScan parquet spark_catalog.default.item (8) + : : : : : +- CometBroadcastExchange (17) + : : : : : +- CometProject (16) + : : : : : +- CometFilter (15) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (14) + : : : : +- CometBroadcastExchange (22) + : : : : +- CometFilter (21) + : : : : +- CometScan parquet spark_catalog.default.customer (20) + : : : +- CometBroadcastExchange (30) + : : : +- CometFilter (29) + : : : +- CometScan parquet spark_catalog.default.store_sales (28) + : : +- CometBroadcastExchange (35) + : : +- CometFilter (34) + : : +- CometScan parquet spark_catalog.default.customer_address (33) + : +- CometBroadcastExchange (40) + : +- CometFilter (39) + : +- CometScan parquet spark_catalog.default.store (38) + +- CometBroadcastExchange (46) + +- CometProject (45) + +- CometFilter (44) + +- CometScan parquet spark_catalog.default.date_dim (43) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_bill_customer_sk#1)) + +(3) CometProject +Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] +Arguments: [sold_date_sk#4, customer_sk#5, item_sk#6], [cs_sold_date_sk#3 AS sold_date_sk#4, cs_bill_customer_sk#1 AS customer_sk#5, cs_item_sk#2 AS item_sk#6] + +(4) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#9)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] +Condition : (isnotnull(ws_item_sk#7) AND isnotnull(ws_bill_customer_sk#8)) + +(6) CometProject +Input [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] +Arguments: [sold_date_sk#10, customer_sk#11, item_sk#12], [ws_sold_date_sk#9 AS sold_date_sk#10, ws_bill_customer_sk#8 AS customer_sk#11, ws_item_sk#7 AS item_sk#12] + +(7) CometUnion +Child 0 Input [3]: [sold_date_sk#4, customer_sk#5, item_sk#6] +Child 1 Input [3]: [sold_date_sk#10, customer_sk#11, item_sk#12] + +(8) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#13, i_class#14, i_category#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women ), EqualTo(i_class,maternity ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [i_item_sk#13, i_class#14, i_category#15] +Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women )) AND (i_class#14 = maternity )) AND isnotnull(i_item_sk#13)) + +(10) CometProject +Input [3]: [i_item_sk#13, i_class#14, i_category#15] +Arguments: [i_item_sk#13], [i_item_sk#13] + +(11) CometBroadcastExchange +Input [1]: [i_item_sk#13] +Arguments: [i_item_sk#13] + +(12) CometBroadcastHashJoin +Left output [3]: [sold_date_sk#4, customer_sk#5, item_sk#6] +Right output [1]: [i_item_sk#13] +Arguments: [item_sk#6], [i_item_sk#13], Inner, BuildRight + +(13) CometProject +Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13] +Arguments: [sold_date_sk#4, customer_sk#5], [sold_date_sk#4, customer_sk#5] + +(14) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) CometFilter +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 12)) AND (d_year#17 = 1998)) AND isnotnull(d_date_sk#16)) + +(16) CometProject +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(17) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(18) CometBroadcastHashJoin +Left output [2]: [sold_date_sk#4, customer_sk#5] +Right output [1]: [d_date_sk#16] +Arguments: [sold_date_sk#4], [d_date_sk#16], Inner, BuildRight + +(19) CometProject +Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#16] +Arguments: [customer_sk#5], [customer_sk#5] + +(20) CometScan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(21) CometFilter +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Condition : (isnotnull(c_customer_sk#19) AND isnotnull(c_current_addr_sk#20)) + +(22) CometBroadcastExchange +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: [c_customer_sk#19, c_current_addr_sk#20] + +(23) CometBroadcastHashJoin +Left output [1]: [customer_sk#5] +Right output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: [customer_sk#5], [c_customer_sk#19], Inner, BuildRight + +(24) CometProject +Input [3]: [customer_sk#5, c_customer_sk#19, c_current_addr_sk#20] +Arguments: [c_customer_sk#19, c_current_addr_sk#20], [c_customer_sk#19, c_current_addr_sk#20] + +(25) CometHashAggregate +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Keys [2]: [c_customer_sk#19, c_current_addr_sk#20] +Functions: [] + +(26) CometExchange +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: hashpartitioning(c_customer_sk#19, c_current_addr_sk#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Keys [2]: [c_customer_sk#19, c_current_addr_sk#20] +Functions: [] + +(28) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#23)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(29) CometFilter +Input [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Condition : isnotnull(ss_customer_sk#21) + +(30) CometBroadcastExchange +Input [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Arguments: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(31) CometBroadcastHashJoin +Left output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Right output [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Arguments: [c_customer_sk#19], [ss_customer_sk#21], Inner, BuildRight + +(32) CometProject +Input [5]: [c_customer_sk#19, c_current_addr_sk#20, ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Arguments: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23], [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(33) CometScan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] +ReadSchema: struct + +(34) CometFilter +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Condition : ((isnotnull(ca_address_sk#24) AND isnotnull(ca_county#25)) AND isnotnull(ca_state#26)) + +(35) CometBroadcastExchange +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Arguments: [ca_address_sk#24, ca_county#25, ca_state#26] + +(36) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23] +Right output [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Arguments: [c_current_addr_sk#20], [ca_address_sk#24], Inner, BuildRight + +(37) CometProject +Input [7]: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_address_sk#24, ca_county#25, ca_state#26] +Arguments: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26], [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26] + +(38) CometScan parquet spark_catalog.default.store +Output [2]: [s_county#27, s_state#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)] +ReadSchema: struct + +(39) CometFilter +Input [2]: [s_county#27, s_state#28] +Condition : (isnotnull(s_county#27) AND isnotnull(s_state#28)) + +(40) CometBroadcastExchange +Input [2]: [s_county#27, s_state#28] +Arguments: [s_county#27, s_state#28] + +(41) CometBroadcastHashJoin +Left output [5]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26] +Right output [2]: [s_county#27, s_state#28] +Arguments: [ca_county#25, ca_state#26], [s_county#27, s_state#28], Inner, BuildRight + +(42) CometProject +Input [7]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26, s_county#27, s_state#28] +Arguments: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23], [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(43) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#29, d_month_seq#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(44) CometFilter +Input [2]: [d_date_sk#29, d_month_seq#30] +Condition : (((isnotnull(d_month_seq#30) AND (d_month_seq#30 >= Subquery scalar-subquery#31, [id=#32])) AND (d_month_seq#30 <= Subquery scalar-subquery#33, [id=#34])) AND isnotnull(d_date_sk#29)) + +(45) CometProject +Input [2]: [d_date_sk#29, d_month_seq#30] +Arguments: [d_date_sk#29], [d_date_sk#29] + +(46) CometBroadcastExchange +Input [1]: [d_date_sk#29] +Arguments: [d_date_sk#29] + +(47) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23] +Right output [1]: [d_date_sk#29] +Arguments: [ss_sold_date_sk#23], [d_date_sk#29], Inner, BuildRight + +(48) CometProject +Input [4]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, d_date_sk#29] +Arguments: [c_customer_sk#19, ss_ext_sales_price#22], [c_customer_sk#19, ss_ext_sales_price#22] + +(49) CometHashAggregate +Input [2]: [c_customer_sk#19, ss_ext_sales_price#22] +Keys [1]: [c_customer_sk#19] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#22))] + +(50) CometExchange +Input [2]: [c_customer_sk#19, sum#35] +Arguments: hashpartitioning(c_customer_sk#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(51) CometHashAggregate +Input [2]: [c_customer_sk#19, sum#35] +Keys [1]: [c_customer_sk#19] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#22))] + +(52) CometHashAggregate +Input [1]: [segment#36] +Keys [1]: [segment#36] +Functions [1]: [partial_count(1)] + +(53) CometExchange +Input [2]: [segment#36, count#37] +Arguments: hashpartitioning(segment#36, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(54) CometHashAggregate +Input [2]: [segment#36, count#37] +Keys [1]: [segment#36] +Functions [1]: [count(1)] + +(55) CometTakeOrderedAndProject +Input [3]: [segment#36, num_customers#38, segment_base#39] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[segment#36 ASC NULLS FIRST,num_customers#38 ASC NULLS FIRST], output=[segment#36,num_customers#38,segment_base#39]), [segment#36, num_customers#38, segment_base#39], 100, [segment#36 ASC NULLS FIRST, num_customers#38 ASC NULLS FIRST], [segment#36, num_customers#38, segment_base#39] + +(56) ColumnarToRow [codegen id : 1] +Input [3]: [segment#36, num_customers#38, segment_base#39] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquery#31, [id=#32] +* ColumnarToRow (63) ++- CometHashAggregate (62) + +- CometExchange (61) + +- CometHashAggregate (60) + +- CometProject (59) + +- CometFilter (58) + +- CometScan parquet spark_catalog.default.date_dim (57) + + +(57) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#40, d_year#41, d_moy#42] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(58) CometFilter +Input [3]: [d_month_seq#40, d_year#41, d_moy#42] +Condition : (((isnotnull(d_year#41) AND isnotnull(d_moy#42)) AND (d_year#41 = 1998)) AND (d_moy#42 = 12)) + +(59) CometProject +Input [3]: [d_month_seq#40, d_year#41, d_moy#42] +Arguments: [(d_month_seq + 1)#43], [(d_month_seq#40 + 1) AS (d_month_seq + 1)#43] + +(60) CometHashAggregate +Input [1]: [(d_month_seq + 1)#43] +Keys [1]: [(d_month_seq + 1)#43] +Functions: [] + +(61) CometExchange +Input [1]: [(d_month_seq + 1)#43] +Arguments: hashpartitioning((d_month_seq + 1)#43, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(62) CometHashAggregate +Input [1]: [(d_month_seq + 1)#43] +Keys [1]: [(d_month_seq + 1)#43] +Functions: [] + +(63) ColumnarToRow [codegen id : 1] +Input [1]: [(d_month_seq + 1)#43] + +Subquery:2 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquery#33, [id=#34] +* ColumnarToRow (70) ++- CometHashAggregate (69) + +- CometExchange (68) + +- CometHashAggregate (67) + +- CometProject (66) + +- CometFilter (65) + +- CometScan parquet spark_catalog.default.date_dim (64) + + +(64) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#44, d_year#45, d_moy#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(65) CometFilter +Input [3]: [d_month_seq#44, d_year#45, d_moy#46] +Condition : (((isnotnull(d_year#45) AND isnotnull(d_moy#46)) AND (d_year#45 = 1998)) AND (d_moy#46 = 12)) + +(66) CometProject +Input [3]: [d_month_seq#44, d_year#45, d_moy#46] +Arguments: [(d_month_seq + 3)#47], [(d_month_seq#44 + 3) AS (d_month_seq + 3)#47] + +(67) CometHashAggregate +Input [1]: [(d_month_seq + 3)#47] +Keys [1]: [(d_month_seq + 3)#47] +Functions: [] + +(68) CometExchange +Input [1]: [(d_month_seq + 3)#47] +Arguments: hashpartitioning((d_month_seq + 3)#47, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(69) CometHashAggregate +Input [1]: [(d_month_seq + 3)#47] +Keys [1]: [(d_month_seq + 3)#47] +Functions: [] + +(70) ColumnarToRow [codegen id : 1] +Input [1]: [(d_month_seq + 3)#47] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9f3c5c133f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q54.native_iceberg_compat/simplified.txt @@ -0,0 +1,78 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [segment,num_customers,segment_base] + CometHashAggregate [segment,num_customers,segment_base,count,count(1)] + CometExchange [segment] #1 + CometHashAggregate [segment,count] + CometHashAggregate [segment,c_customer_sk,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [c_customer_sk] #2 + CometHashAggregate [c_customer_sk,sum,ss_ext_sales_price] + CometProject [c_customer_sk,ss_ext_sales_price] + CometBroadcastHashJoin [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometProject [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ca_county,ca_state,s_county,s_state] + CometProject [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ca_county,ca_state] + CometBroadcastHashJoin [c_customer_sk,c_current_addr_sk,ss_ext_sales_price,ss_sold_date_sk,ca_address_sk,ca_county,ca_state] + CometProject [c_customer_sk,c_current_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_current_addr_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometHashAggregate [c_customer_sk,c_current_addr_sk] + CometExchange [c_customer_sk,c_current_addr_sk] #3 + CometHashAggregate [c_customer_sk,c_current_addr_sk] + CometProject [c_customer_sk,c_current_addr_sk] + CometBroadcastHashJoin [customer_sk,c_customer_sk,c_current_addr_sk] + CometProject [customer_sk] + CometBroadcastHashJoin [sold_date_sk,customer_sk,d_date_sk] + CometProject [sold_date_sk,customer_sk] + CometBroadcastHashJoin [sold_date_sk,customer_sk,item_sk,i_item_sk] + CometUnion [sold_date_sk,customer_sk,item_sk] + CometProject [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] [sold_date_sk,customer_sk,item_sk] + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + CometProject [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] [sold_date_sk,customer_sk,item_sk] + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sold_date_sk] + CometBroadcastExchange [i_item_sk] #4 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #6 + CometFilter [c_customer_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] #7 + CometFilter [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [ca_address_sk,ca_county,ca_state] #8 + CometFilter [ca_address_sk,ca_county,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state] + CometBroadcastExchange [s_county,s_state] #9 + CometFilter [s_county,s_state] + CometScan parquet spark_catalog.default.store [s_county,s_state] + CometBroadcastExchange [d_date_sk] #10 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [(d_month_seq + 1)] + CometExchange [(d_month_seq + 1)] #11 + CometHashAggregate [(d_month_seq + 1)] + CometProject [d_month_seq] [(d_month_seq + 1)] + CometFilter [d_month_seq,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + Subquery #2 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [(d_month_seq + 3)] + CometExchange [(d_month_seq + 3)] #12 + CometHashAggregate [(d_month_seq + 3)] + CometProject [d_month_seq] [(d_month_seq + 3)] + CometFilter [d_month_seq,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_datafusion/explain.txt new file mode 100644 index 0000000000..b06aa5ec9b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_datafusion/explain.txt @@ -0,0 +1,101 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2, d_moy#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1], [d_date_sk#1] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [1]: [d_date_sk#1] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5], [ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(10) CometFilter +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 28)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9], [i_item_sk#7, i_brand_id#8, i_brand#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9] + +(13) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9], [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(15) CometHashAggregate +Input [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [2]: [i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] + +(16) CometExchange +Input [3]: [i_brand#9, i_brand_id#8, sum#11] +Arguments: hashpartitioning(i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [3]: [i_brand#9, i_brand_id#8, sum#11] +Keys [2]: [i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] + +(18) CometTakeOrderedAndProject +Input [3]: [brand_id#12, brand#13, ext_price#14] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ext_price#14 DESC NULLS LAST,brand_id#12 ASC NULLS FIRST], output=[brand_id#12,brand#13,ext_price#14]), [brand_id#12, brand#13, ext_price#14], 100, [ext_price#14 DESC NULLS LAST, brand_id#12 ASC NULLS FIRST], [brand_id#12, brand#13, ext_price#14] + +(19) ColumnarToRow [codegen id : 1] +Input [3]: [brand_id#12, brand#13, ext_price#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_datafusion/simplified.txt new file mode 100644 index 0000000000..737a74576d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_datafusion/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [brand_id,brand,ext_price] + CometHashAggregate [brand_id,brand,ext_price,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_brand,i_brand_id] #1 + CometHashAggregate [i_brand,i_brand_id,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_brand_id,i_brand] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..7854758c23 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_iceberg_compat/explain.txt @@ -0,0 +1,111 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.date_dim (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometScan parquet spark_catalog.default.store_sales (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometScan parquet spark_catalog.default.item (9) + + +(1) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1], [d_date_sk#1] + +(4) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [1]: [d_date_sk#1] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5], [ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 28)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9], [i_item_sk#7, i_brand_id#8, i_brand#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9] + +(13) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9], [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(15) CometHashAggregate +Input [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [2]: [i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] + +(16) CometExchange +Input [3]: [i_brand#9, i_brand_id#8, sum#11] +Arguments: hashpartitioning(i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [3]: [i_brand#9, i_brand_id#8, sum#11] +Keys [2]: [i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] + +(18) CometTakeOrderedAndProject +Input [3]: [brand_id#12, brand#13, ext_price#14] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ext_price#14 DESC NULLS LAST,brand_id#12 ASC NULLS FIRST], output=[brand_id#12,brand#13,ext_price#14]), [brand_id#12, brand#13, ext_price#14], 100, [ext_price#14 DESC NULLS LAST, brand_id#12 ASC NULLS FIRST], [brand_id#12, brand#13, ext_price#14] + +(19) ColumnarToRow [codegen id : 1] +Input [3]: [brand_id#12, brand#13, ext_price#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..999c8a6c4c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q55.native_iceberg_compat/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [brand_id,brand,ext_price] + CometHashAggregate [brand_id,brand,ext_price,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_brand,i_brand_id] #1 + CometHashAggregate [i_brand,i_brand_id,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_brand_id,i_brand] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_datafusion/explain.txt new file mode 100644 index 0000000000..fbf528bb90 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_datafusion/explain.txt @@ -0,0 +1,258 @@ +== Physical Plan == +* ColumnarToRow (49) ++- CometTakeOrderedAndProject (48) + +- CometHashAggregate (47) + +- CometExchange (46) + +- CometHashAggregate (45) + +- CometUnion (44) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (9) + : +- CometBroadcastExchange (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (16) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (15) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (17) + :- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometProject (38) + : +- CometBroadcastHashJoin (37) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometProject (32) + : : : +- CometBroadcastHashJoin (31) + : : : :- CometFilter (29) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (28) + : : : +- ReusedExchange (30) + : : +- ReusedExchange (33) + : +- ReusedExchange (36) + +- CometHashAggregate (43) + +- ReusedExchange (42) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5, d_year#6, d_moy#7] + +(4) CometFilter +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 2)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3], [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8, ca_gmt_offset#9] + +(10) CometFilter +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(11) CometProject +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8], [ca_address_sk#8] + +(12) CometBroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: [ca_address_sk#8] + +(13) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Right output [1]: [ca_address_sk#8] +Arguments: [ss_addr_sk#2], [ca_address_sk#8], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] +Arguments: [ss_item_sk#1, ss_ext_sales_price#3], [ss_item_sk#1, ss_ext_sales_price#3] + +(15) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#10, i_item_id#11] +Arguments: [i_item_sk#10, i_item_id#11] + +(16) CometFilter +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(17) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_id#12, i_color#13] +Arguments: [i_item_id#12, i_color#13] + +(18) CometFilter +Input [2]: [i_item_id#12, i_color#13] +Condition : i_color#13 IN (slate ,blanched ,burnished ) + +(19) CometProject +Input [2]: [i_item_id#12, i_color#13] +Arguments: [i_item_id#12], [i_item_id#12] + +(20) CometBroadcastExchange +Input [1]: [i_item_id#12] +Arguments: [i_item_id#12] + +(21) CometBroadcastHashJoin +Left output [2]: [i_item_sk#10, i_item_id#11] +Right output [1]: [i_item_id#12] +Arguments: [i_item_id#11], [i_item_id#12], LeftSemi, BuildRight + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: [i_item_sk#10, i_item_id#11] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Right output [2]: [i_item_sk#10, i_item_id#11] +Arguments: [ss_item_sk#1], [i_item_sk#10], Inner, BuildRight + +(24) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] +Arguments: [ss_ext_sales_price#3, i_item_id#11], [ss_ext_sales_price#3, i_item_id#11] + +(25) CometHashAggregate +Input [2]: [ss_ext_sales_price#3, i_item_id#11] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] + +(26) CometExchange +Input [2]: [i_item_id#11, sum#14] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [i_item_id#11, sum#14] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] + +(28) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Arguments: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] + +(29) CometFilter +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_bill_addr_sk#15) AND isnotnull(cs_item_sk#16)) + +(30) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#19] + +(31) CometBroadcastHashJoin +Left output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [cs_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(32) CometProject +Input [5]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Arguments: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17], [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] + +(33) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#20] + +(34) CometBroadcastHashJoin +Left output [3]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] +Right output [1]: [ca_address_sk#20] +Arguments: [cs_bill_addr_sk#15], [ca_address_sk#20], Inner, BuildRight + +(35) CometProject +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, ca_address_sk#20] +Arguments: [cs_item_sk#16, cs_ext_sales_price#17], [cs_item_sk#16, cs_ext_sales_price#17] + +(36) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#21, i_item_id#22] + +(37) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#16, cs_ext_sales_price#17] +Right output [2]: [i_item_sk#21, i_item_id#22] +Arguments: [cs_item_sk#16], [i_item_sk#21], Inner, BuildRight + +(38) CometProject +Input [4]: [cs_item_sk#16, cs_ext_sales_price#17, i_item_sk#21, i_item_id#22] +Arguments: [cs_ext_sales_price#17, i_item_id#22], [cs_ext_sales_price#17, i_item_id#22] + +(39) CometHashAggregate +Input [2]: [cs_ext_sales_price#17, i_item_id#22] +Keys [1]: [i_item_id#22] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#17))] + +(40) CometExchange +Input [2]: [i_item_id#22, sum#23] +Arguments: hashpartitioning(i_item_id#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(41) CometHashAggregate +Input [2]: [i_item_id#22, sum#23] +Keys [1]: [i_item_id#22] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#17))] + +(42) ReusedExchange [Reuses operator id: 26] +Output [2]: [i_item_id#24, sum#25] + +(43) CometHashAggregate +Input [2]: [i_item_id#24, sum#25] +Keys [1]: [i_item_id#24] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#26))] + +(44) CometUnion +Child 0 Input [2]: [i_item_id#11, total_sales#27] +Child 1 Input [2]: [i_item_id#22, total_sales#28] +Child 2 Input [2]: [i_item_id#24, total_sales#29] + +(45) CometHashAggregate +Input [2]: [i_item_id#11, total_sales#27] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(total_sales#27)] + +(46) CometExchange +Input [3]: [i_item_id#11, sum#30, isEmpty#31] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(47) CometHashAggregate +Input [3]: [i_item_id#11, sum#30, isEmpty#31] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(total_sales#27)] + +(48) CometTakeOrderedAndProject +Input [2]: [i_item_id#11, total_sales#32] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[total_sales#32 ASC NULLS FIRST], output=[i_item_id#11,total_sales#32]), [i_item_id#11, total_sales#32], 100, [total_sales#32 ASC NULLS FIRST], [i_item_id#11, total_sales#32] + +(49) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_id#11, total_sales#32] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_datafusion/simplified.txt new file mode 100644 index 0000000000..56464c9b57 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_datafusion/simplified.txt @@ -0,0 +1,51 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,total_sales] + CometHashAggregate [i_item_id,total_sales,sum,isEmpty,sum(total_sales)] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,isEmpty,total_sales] + CometUnion [i_item_id,total_sales] + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_item_id] #2 + CometHashAggregate [i_item_id,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ca_address_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ca_address_sk] #4 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_id] + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [i_item_id] #6 + CometProject [i_item_id] + CometFilter [i_item_id,i_color] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_id,i_color] + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_item_id] #7 + CometHashAggregate [i_item_id,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,ca_address_sk] + CometProject [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_item_id] #5 + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(ws_ext_sales_price))] + ReusedExchange [i_item_id,sum] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..179c625503 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_iceberg_compat/explain.txt @@ -0,0 +1,344 @@ +== Physical Plan == +* ColumnarToRow (61) ++- CometTakeOrderedAndProject (60) + +- CometHashAggregate (59) + +- CometExchange (58) + +- CometHashAggregate (57) + +- CometUnion (56) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.customer_address (9) + : +- CometBroadcastExchange (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (16) + : : +- CometScan parquet spark_catalog.default.item (15) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometScan parquet spark_catalog.default.item (17) + :- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometProject (38) + : +- CometBroadcastHashJoin (37) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometProject (32) + : : : +- CometBroadcastHashJoin (31) + : : : :- CometFilter (29) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (28) + : : : +- ReusedExchange (30) + : : +- ReusedExchange (33) + : +- ReusedExchange (36) + +- CometHashAggregate (55) + +- CometExchange (54) + +- CometHashAggregate (53) + +- CometProject (52) + +- CometBroadcastHashJoin (51) + :- CometProject (49) + : +- CometBroadcastHashJoin (48) + : :- CometProject (46) + : : +- CometBroadcastHashJoin (45) + : : :- CometFilter (43) + : : : +- CometScan parquet spark_catalog.default.web_sales (42) + : : +- ReusedExchange (44) + : +- ReusedExchange (47) + +- ReusedExchange (50) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 2)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3], [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(9) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(11) CometProject +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8], [ca_address_sk#8] + +(12) CometBroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: [ca_address_sk#8] + +(13) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Right output [1]: [ca_address_sk#8] +Arguments: [ss_addr_sk#2], [ca_address_sk#8], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] +Arguments: [ss_item_sk#1, ss_ext_sales_price#3], [ss_item_sk#1, ss_ext_sales_price#3] + +(15) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#10, i_item_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(16) CometFilter +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(17) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_id#12, i_color#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_color, [blanched ,burnished ,slate ])] +ReadSchema: struct + +(18) CometFilter +Input [2]: [i_item_id#12, i_color#13] +Condition : i_color#13 IN (slate ,blanched ,burnished ) + +(19) CometProject +Input [2]: [i_item_id#12, i_color#13] +Arguments: [i_item_id#12], [i_item_id#12] + +(20) CometBroadcastExchange +Input [1]: [i_item_id#12] +Arguments: [i_item_id#12] + +(21) CometBroadcastHashJoin +Left output [2]: [i_item_sk#10, i_item_id#11] +Right output [1]: [i_item_id#12] +Arguments: [i_item_id#11], [i_item_id#12], LeftSemi, BuildRight + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: [i_item_sk#10, i_item_id#11] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Right output [2]: [i_item_sk#10, i_item_id#11] +Arguments: [ss_item_sk#1], [i_item_sk#10], Inner, BuildRight + +(24) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] +Arguments: [ss_ext_sales_price#3, i_item_id#11], [ss_ext_sales_price#3, i_item_id#11] + +(25) CometHashAggregate +Input [2]: [ss_ext_sales_price#3, i_item_id#11] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] + +(26) CometExchange +Input [2]: [i_item_id#11, sum#14] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [i_item_id#11, sum#14] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] + +(28) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#18)] +PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(29) CometFilter +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_bill_addr_sk#15) AND isnotnull(cs_item_sk#16)) + +(30) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#19] + +(31) CometBroadcastHashJoin +Left output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [cs_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(32) CometProject +Input [5]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Arguments: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17], [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] + +(33) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#20] + +(34) CometBroadcastHashJoin +Left output [3]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] +Right output [1]: [ca_address_sk#20] +Arguments: [cs_bill_addr_sk#15], [ca_address_sk#20], Inner, BuildRight + +(35) CometProject +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, ca_address_sk#20] +Arguments: [cs_item_sk#16, cs_ext_sales_price#17], [cs_item_sk#16, cs_ext_sales_price#17] + +(36) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#21, i_item_id#22] + +(37) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#16, cs_ext_sales_price#17] +Right output [2]: [i_item_sk#21, i_item_id#22] +Arguments: [cs_item_sk#16], [i_item_sk#21], Inner, BuildRight + +(38) CometProject +Input [4]: [cs_item_sk#16, cs_ext_sales_price#17, i_item_sk#21, i_item_id#22] +Arguments: [cs_ext_sales_price#17, i_item_id#22], [cs_ext_sales_price#17, i_item_id#22] + +(39) CometHashAggregate +Input [2]: [cs_ext_sales_price#17, i_item_id#22] +Keys [1]: [i_item_id#22] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#17))] + +(40) CometExchange +Input [2]: [i_item_id#22, sum#23] +Arguments: hashpartitioning(i_item_id#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(41) CometHashAggregate +Input [2]: [i_item_id#22, sum#23] +Keys [1]: [i_item_id#22] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#17))] + +(42) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#27)] +PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(43) CometFilter +Input [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27] +Condition : (isnotnull(ws_bill_addr_sk#25) AND isnotnull(ws_item_sk#24)) + +(44) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#28] + +(45) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27] +Right output [1]: [d_date_sk#28] +Arguments: [ws_sold_date_sk#27], [d_date_sk#28], Inner, BuildRight + +(46) CometProject +Input [5]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27, d_date_sk#28] +Arguments: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26], [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26] + +(47) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#29] + +(48) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26] +Right output [1]: [ca_address_sk#29] +Arguments: [ws_bill_addr_sk#25], [ca_address_sk#29], Inner, BuildRight + +(49) CometProject +Input [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ca_address_sk#29] +Arguments: [ws_item_sk#24, ws_ext_sales_price#26], [ws_item_sk#24, ws_ext_sales_price#26] + +(50) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#30, i_item_id#31] + +(51) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#24, ws_ext_sales_price#26] +Right output [2]: [i_item_sk#30, i_item_id#31] +Arguments: [ws_item_sk#24], [i_item_sk#30], Inner, BuildRight + +(52) CometProject +Input [4]: [ws_item_sk#24, ws_ext_sales_price#26, i_item_sk#30, i_item_id#31] +Arguments: [ws_ext_sales_price#26, i_item_id#31], [ws_ext_sales_price#26, i_item_id#31] + +(53) CometHashAggregate +Input [2]: [ws_ext_sales_price#26, i_item_id#31] +Keys [1]: [i_item_id#31] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#26))] + +(54) CometExchange +Input [2]: [i_item_id#31, sum#32] +Arguments: hashpartitioning(i_item_id#31, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(55) CometHashAggregate +Input [2]: [i_item_id#31, sum#32] +Keys [1]: [i_item_id#31] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#26))] + +(56) CometUnion +Child 0 Input [2]: [i_item_id#11, total_sales#33] +Child 1 Input [2]: [i_item_id#22, total_sales#34] +Child 2 Input [2]: [i_item_id#31, total_sales#35] + +(57) CometHashAggregate +Input [2]: [i_item_id#11, total_sales#33] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(total_sales#33)] + +(58) CometExchange +Input [3]: [i_item_id#11, sum#36, isEmpty#37] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(59) CometHashAggregate +Input [3]: [i_item_id#11, sum#36, isEmpty#37] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(total_sales#33)] + +(60) CometTakeOrderedAndProject +Input [2]: [i_item_id#11, total_sales#38] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[total_sales#38 ASC NULLS FIRST], output=[i_item_id#11,total_sales#38]), [i_item_id#11, total_sales#38], 100, [total_sales#38 ASC NULLS FIRST], [i_item_id#11, total_sales#38] + +(61) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_id#11, total_sales#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..85e6da9350 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q56.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,total_sales] + CometHashAggregate [i_item_id,total_sales,sum,isEmpty,sum(total_sales)] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,isEmpty,total_sales] + CometUnion [i_item_id,total_sales] + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_item_id] #2 + CometHashAggregate [i_item_id,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ca_address_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ca_address_sk] #4 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_id] + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] + CometBroadcastExchange [i_item_id] #6 + CometProject [i_item_id] + CometFilter [i_item_id,i_color] + CometScan parquet spark_catalog.default.item [i_item_id,i_color] + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_item_id] #7 + CometHashAggregate [i_item_id,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,ca_address_sk] + CometProject [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_item_id] #5 + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [i_item_id] #8 + CometHashAggregate [i_item_id,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ws_item_sk,ws_ext_sales_price,i_item_sk,i_item_id] + CometProject [ws_item_sk,ws_ext_sales_price] + CometBroadcastHashJoin [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ca_address_sk] + CometProject [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + CometBroadcastHashJoin [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_item_id] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_datafusion/explain.txt new file mode 100644 index 0000000000..1e3ab6de8f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_datafusion/explain.txt @@ -0,0 +1,243 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (28) + : : +- * Filter (27) + : : +- Window (26) + : : +- * Filter (25) + : : +- Window (24) + : : +- * ColumnarToRow (23) + : : +- CometSort (22) + : : +- CometExchange (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`call_center` (13) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Arguments: [i_item_sk#1, i_brand#2, i_category#3] + +(2) CometFilter +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(4) CometFilter +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) + +(5) CometBroadcastExchange +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Right output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [i_item_sk#1], [cs_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7], [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(9) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(11) CometBroadcastHashJoin +Left output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] +Right output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [cs_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] +Arguments: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10], [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] + +(13) CometNativeScan: `spark_catalog`.`default`.`call_center` +Output [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cc_call_center_sk#11, cc_name#12] + +(14) CometFilter +Input [2]: [cc_call_center_sk#11, cc_name#12] +Condition : (isnotnull(cc_call_center_sk#11) AND isnotnull(cc_name#12)) + +(15) CometBroadcastExchange +Input [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cc_call_center_sk#11, cc_name#12] + +(16) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] +Right output [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cs_call_center_sk#4], [cc_call_center_sk#11], Inner, BuildRight + +(17) CometProject +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10, cc_call_center_sk#11, cc_name#12] +Arguments: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12], [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] + +(18) CometHashAggregate +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] + +(19) CometExchange +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#13] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(20) CometHashAggregate +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#13] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] + +(21) CometExchange +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(22) CometSort +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15], [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(23) ColumnarToRow [codegen id : 1] +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] + +(24) Window +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#16], [i_category#3, i_brand#2, cc_name#12], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(25) Filter [codegen id : 2] +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(26) Window +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16] +Arguments: [avg(_w0#15) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#17], [i_category#3, i_brand#2, cc_name#12, d_year#9] + +(27) Filter [codegen id : 7] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16, avg_monthly_sales#17] +Condition : ((isnotnull(avg_monthly_sales#17) AND (avg_monthly_sales#17 > 0.000000)) AND CASE WHEN (avg_monthly_sales#17 > 0.000000) THEN ((abs((sum_sales#14 - avg_monthly_sales#17)) / avg_monthly_sales#17) > 0.1000000000000000) END) + +(28) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16, avg_monthly_sales#17] + +(29) ReusedExchange [Reuses operator id: 19] +Output [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum#23] + +(30) CometHashAggregate +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum#23] +Keys [5]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22] +Functions [1]: [sum(UnscaledValue(cs_sales_price#24))] + +(31) CometExchange +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: hashpartitioning(i_category#18, i_brand#19, cc_name#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14], [i_category#18 ASC NULLS FIRST, i_brand#19 ASC NULLS FIRST, cc_name#20 ASC NULLS FIRST, d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST] + +(33) ColumnarToRow [codegen id : 3] +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] + +(34) Window +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: [rank(d_year#21, d_moy#22) windowspecdefinition(i_category#18, i_brand#19, cc_name#20, d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#25], [i_category#18, i_brand#19, cc_name#20], [d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST] + +(35) Project [codegen id : 4] +Output [5]: [i_category#18, i_brand#19, cc_name#20, sum_sales#14 AS sum_sales#26, rn#25] +Input [7]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14, rn#25] + +(36) BroadcastExchange +Input [5]: [i_category#18, i_brand#19, cc_name#20, sum_sales#26, rn#25] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=4] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#16] +Right keys [4]: [i_category#18, i_brand#19, cc_name#20, (rn#25 + 1)] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, sum_sales#26] +Input [13]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, i_category#18, i_brand#19, cc_name#20, sum_sales#26, rn#25] + +(39) ReusedExchange [Reuses operator id: 31] +Output [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] + +(40) CometSort +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] +Arguments: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14], [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] + +(41) ColumnarToRow [codegen id : 5] +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] + +(42) Window +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] +Arguments: [rank(d_year#30, d_moy#31) windowspecdefinition(i_category#27, i_brand#28, cc_name#29, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#32], [i_category#27, i_brand#28, cc_name#29], [d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] + +(43) Project [codegen id : 6] +Output [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#14 AS sum_sales#33, rn#32] +Input [7]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14, rn#32] + +(44) BroadcastExchange +Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#33, rn#32] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 7] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#16] +Right keys [4]: [i_category#27, i_brand#28, cc_name#29, (rn#32 - 1)] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, sum_sales#26 AS psum#34, sum_sales#33 AS nsum#35] +Input [14]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, sum_sales#26, i_category#27, i_brand#28, cc_name#29, sum_sales#33, rn#32] + +(47) TakeOrderedAndProject +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, psum#34, nsum#35] +Arguments: 100, [(sum_sales#14 - avg_monthly_sales#17) ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, psum#34, nsum#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9666c6c7ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_datafusion/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_year,d_moy,psum,nsum] + WholeStageCodegen (7) + Project [i_category,i_brand,cc_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,cc_name,d_year] + WholeStageCodegen (2) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,_w0] + CometExchange [i_category,i_brand,cc_name] #1 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,_w0,sum,sum(UnscaledValue(cs_sales_price))] + CometExchange [i_category,i_brand,cc_name,d_year,d_moy] #2 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum,cs_sales_price] + CometProject [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] + CometBroadcastHashJoin [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy,cc_call_center_sk,cc_name] + CometProject [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] + CometBroadcastHashJoin [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk,d_date_sk,d_year,d_moy] + CometProject [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_category,cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_category] + CometBroadcastExchange [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] #3 + CometFilter [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [cc_call_center_sk,cc_name] #5 + CometFilter [cc_call_center_sk,cc_name] + CometNativeScan: `spark_catalog`.`default`.`call_center` [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] + CometExchange [i_category,i_brand,cc_name] #7 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(cs_sales_price))] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..04a0ccb434 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_iceberg_compat/explain.txt @@ -0,0 +1,256 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (28) + : : +- * Filter (27) + : : +- Window (26) + : : +- * Filter (25) + : : +- Window (24) + : : +- * ColumnarToRow (23) + : : +- CometSort (22) + : : +- CometExchange (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.item (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometScan parquet spark_catalog.default.call_center (13) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) + + +(1) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(3) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#7)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) + +(5) CometBroadcastExchange +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Right output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [i_item_sk#1], [cs_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7], [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(11) CometBroadcastHashJoin +Left output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] +Right output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [cs_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] +Arguments: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10], [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] + +(13) CometScan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#11, cc_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [cc_call_center_sk#11, cc_name#12] +Condition : (isnotnull(cc_call_center_sk#11) AND isnotnull(cc_name#12)) + +(15) CometBroadcastExchange +Input [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cc_call_center_sk#11, cc_name#12] + +(16) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] +Right output [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cs_call_center_sk#4], [cc_call_center_sk#11], Inner, BuildRight + +(17) CometProject +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10, cc_call_center_sk#11, cc_name#12] +Arguments: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12], [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] + +(18) CometHashAggregate +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] + +(19) CometExchange +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#13] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(20) CometHashAggregate +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#13] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] + +(21) CometExchange +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(22) CometSort +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15], [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(23) ColumnarToRow [codegen id : 1] +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] + +(24) Window +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#16], [i_category#3, i_brand#2, cc_name#12], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(25) Filter [codegen id : 2] +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(26) Window +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16] +Arguments: [avg(_w0#15) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#17], [i_category#3, i_brand#2, cc_name#12, d_year#9] + +(27) Filter [codegen id : 7] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16, avg_monthly_sales#17] +Condition : ((isnotnull(avg_monthly_sales#17) AND (avg_monthly_sales#17 > 0.000000)) AND CASE WHEN (avg_monthly_sales#17 > 0.000000) THEN ((abs((sum_sales#14 - avg_monthly_sales#17)) / avg_monthly_sales#17) > 0.1000000000000000) END) + +(28) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16, avg_monthly_sales#17] + +(29) ReusedExchange [Reuses operator id: 19] +Output [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum#23] + +(30) CometHashAggregate +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum#23] +Keys [5]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22] +Functions [1]: [sum(UnscaledValue(cs_sales_price#24))] + +(31) CometExchange +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: hashpartitioning(i_category#18, i_brand#19, cc_name#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14], [i_category#18 ASC NULLS FIRST, i_brand#19 ASC NULLS FIRST, cc_name#20 ASC NULLS FIRST, d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST] + +(33) ColumnarToRow [codegen id : 3] +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] + +(34) Window +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: [rank(d_year#21, d_moy#22) windowspecdefinition(i_category#18, i_brand#19, cc_name#20, d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#25], [i_category#18, i_brand#19, cc_name#20], [d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST] + +(35) Project [codegen id : 4] +Output [5]: [i_category#18, i_brand#19, cc_name#20, sum_sales#14 AS sum_sales#26, rn#25] +Input [7]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14, rn#25] + +(36) BroadcastExchange +Input [5]: [i_category#18, i_brand#19, cc_name#20, sum_sales#26, rn#25] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=4] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#16] +Right keys [4]: [i_category#18, i_brand#19, cc_name#20, (rn#25 + 1)] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, sum_sales#26] +Input [13]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, i_category#18, i_brand#19, cc_name#20, sum_sales#26, rn#25] + +(39) ReusedExchange [Reuses operator id: 31] +Output [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] + +(40) CometSort +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] +Arguments: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14], [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] + +(41) ColumnarToRow [codegen id : 5] +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] + +(42) Window +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] +Arguments: [rank(d_year#30, d_moy#31) windowspecdefinition(i_category#27, i_brand#28, cc_name#29, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#32], [i_category#27, i_brand#28, cc_name#29], [d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] + +(43) Project [codegen id : 6] +Output [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#14 AS sum_sales#33, rn#32] +Input [7]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14, rn#32] + +(44) BroadcastExchange +Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#33, rn#32] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 7] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#16] +Right keys [4]: [i_category#27, i_brand#28, cc_name#29, (rn#32 - 1)] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, sum_sales#26 AS psum#34, sum_sales#33 AS nsum#35] +Input [14]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, sum_sales#26, i_category#27, i_brand#28, cc_name#29, sum_sales#33, rn#32] + +(47) TakeOrderedAndProject +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, psum#34, nsum#35] +Arguments: 100, [(sum_sales#14 - avg_monthly_sales#17) ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, psum#34, nsum#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..4d62f6ff44 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_year,d_moy,psum,nsum] + WholeStageCodegen (7) + Project [i_category,i_brand,cc_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,cc_name,d_year] + WholeStageCodegen (2) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,_w0] + CometExchange [i_category,i_brand,cc_name] #1 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,_w0,sum,sum(UnscaledValue(cs_sales_price))] + CometExchange [i_category,i_brand,cc_name,d_year,d_moy] #2 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum,cs_sales_price] + CometProject [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] + CometBroadcastHashJoin [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy,cc_call_center_sk,cc_name] + CometProject [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] + CometBroadcastHashJoin [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk,d_date_sk,d_year,d_moy] + CometProject [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_category,cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + CometBroadcastExchange [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] #3 + CometFilter [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [cc_call_center_sk,cc_name] #5 + CometFilter [cc_call_center_sk,cc_name] + CometScan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] + CometExchange [i_category,i_brand,cc_name] #7 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(cs_sales_price))] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_datafusion/explain.txt new file mode 100644 index 0000000000..f9908c19a1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_datafusion/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +* ColumnarToRow (33) ++- CometTakeOrderedAndProject (32) + +- CometProject (31) + +- CometBroadcastHashJoin (30) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometFilter (22) + : : +- CometHashAggregate (21) + : : +- CometExchange (20) + : : +- CometHashAggregate (19) + : : +- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : +- CometBroadcastExchange (16) + : : +- CometProject (15) + : : +- CometBroadcastHashJoin (14) + : : :- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (13) + : : +- CometProject (12) + : : +- CometFilter (11) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (10) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometHashAggregate (24) + : +- ReusedExchange (23) + +- ReusedExchange (29) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Arguments: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#4, i_item_id#5] +Arguments: [i_item_sk#4, i_item_id#5] + +(4) CometFilter +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(5) CometBroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: [i_item_sk#4, i_item_id#5] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Right output [2]: [i_item_sk#4, i_item_id#5] +Arguments: [ss_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [5]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5] +Arguments: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5], [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#6, d_date#7] +Arguments: [d_date_sk#6, d_date#7] + +(9) CometFilter +Input [2]: [d_date_sk#6, d_date#7] +Condition : isnotnull(d_date_sk#6) + +(10) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date#8, d_week_seq#9] +Arguments: [d_date#8, d_week_seq#9] + +(11) CometFilter +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = Subquery scalar-subquery#10, [id=#11])) + +(12) CometProject +Input [2]: [d_date#8, d_week_seq#9] +Arguments: [d_date#8], [d_date#8] + +(13) CometBroadcastExchange +Input [1]: [d_date#8] +Arguments: [d_date#8] + +(14) CometBroadcastHashJoin +Left output [2]: [d_date_sk#6, d_date#7] +Right output [1]: [d_date#8] +Arguments: [d_date#7], [d_date#8], LeftSemi, BuildRight + +(15) CometProject +Input [2]: [d_date_sk#6, d_date#7] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(17) CometBroadcastHashJoin +Left output [3]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#3], [d_date_sk#6], Inner, BuildRight + +(18) CometProject +Input [4]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, d_date_sk#6] +Arguments: [ss_ext_sales_price#2, i_item_id#5], [ss_ext_sales_price#2, i_item_id#5] + +(19) CometHashAggregate +Input [2]: [ss_ext_sales_price#2, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] + +(20) CometExchange +Input [2]: [i_item_id#5, sum#12] +Arguments: hashpartitioning(i_item_id#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [2]: [i_item_id#5, sum#12] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] + +(22) CometFilter +Input [2]: [item_id#13, ss_item_rev#14] +Condition : isnotnull(ss_item_rev#14) + +(23) ReusedExchange [Reuses operator id: 20] +Output [2]: [i_item_id#15, sum#16] + +(24) CometHashAggregate +Input [2]: [i_item_id#15, sum#16] +Keys [1]: [i_item_id#15] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#17))] + +(25) CometFilter +Input [2]: [item_id#18, cs_item_rev#19] +Condition : isnotnull(cs_item_rev#19) + +(26) CometBroadcastExchange +Input [2]: [item_id#18, cs_item_rev#19] +Arguments: [item_id#18, cs_item_rev#19] + +(27) CometBroadcastHashJoin +Left output [2]: [item_id#13, ss_item_rev#14] +Right output [2]: [item_id#18, cs_item_rev#19] +Arguments: [item_id#13], [item_id#18], Inner, ((((cast(ss_item_rev#14 as decimal(19,3)) >= (0.9 * cs_item_rev#19)) AND (cast(ss_item_rev#14 as decimal(20,3)) <= (1.1 * cs_item_rev#19))) AND (cast(cs_item_rev#19 as decimal(19,3)) >= (0.9 * ss_item_rev#14))) AND (cast(cs_item_rev#19 as decimal(20,3)) <= (1.1 * ss_item_rev#14))), BuildRight + +(28) CometProject +Input [4]: [item_id#13, ss_item_rev#14, item_id#18, cs_item_rev#19] +Arguments: [item_id#13, ss_item_rev#14, cs_item_rev#19], [item_id#13, ss_item_rev#14, cs_item_rev#19] + +(29) ReusedExchange [Reuses operator id: 26] +Output [2]: [item_id#20, ws_item_rev#21] + +(30) CometBroadcastHashJoin +Left output [3]: [item_id#13, ss_item_rev#14, cs_item_rev#19] +Right output [2]: [item_id#20, ws_item_rev#21] +Arguments: [item_id#13], [item_id#20], Inner, ((((((((cast(ss_item_rev#14 as decimal(19,3)) >= (0.9 * ws_item_rev#21)) AND (cast(ss_item_rev#14 as decimal(20,3)) <= (1.1 * ws_item_rev#21))) AND (cast(cs_item_rev#19 as decimal(19,3)) >= (0.9 * ws_item_rev#21))) AND (cast(cs_item_rev#19 as decimal(20,3)) <= (1.1 * ws_item_rev#21))) AND (cast(ws_item_rev#21 as decimal(19,3)) >= (0.9 * ss_item_rev#14))) AND (cast(ws_item_rev#21 as decimal(20,3)) <= (1.1 * ss_item_rev#14))) AND (cast(ws_item_rev#21 as decimal(19,3)) >= (0.9 * cs_item_rev#19))) AND (cast(ws_item_rev#21 as decimal(20,3)) <= (1.1 * cs_item_rev#19))), BuildRight + +(31) CometProject +Input [5]: [item_id#13, ss_item_rev#14, cs_item_rev#19, item_id#20, ws_item_rev#21] +Arguments: [item_id#13, ss_item_rev#14, ss_dev#22, cs_item_rev#19, cs_dev#23, ws_item_rev#21, ws_dev#24, average#25], [item_id#13, ss_item_rev#14, (((ss_item_rev#14 / ((ss_item_rev#14 + cs_item_rev#19) + ws_item_rev#21)) / 3) * 100) AS ss_dev#22, cs_item_rev#19, (((cs_item_rev#19 / ((ss_item_rev#14 + cs_item_rev#19) + ws_item_rev#21)) / 3) * 100) AS cs_dev#23, ws_item_rev#21, (((ws_item_rev#21 / ((ss_item_rev#14 + cs_item_rev#19) + ws_item_rev#21)) / 3) * 100) AS ws_dev#24, (((ss_item_rev#14 + cs_item_rev#19) + ws_item_rev#21) / 3) AS average#25] + +(32) CometTakeOrderedAndProject +Input [8]: [item_id#13, ss_item_rev#14, ss_dev#22, cs_item_rev#19, cs_dev#23, ws_item_rev#21, ws_dev#24, average#25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[item_id#13 ASC NULLS FIRST,ss_item_rev#14 ASC NULLS FIRST], output=[item_id#13,ss_item_rev#14,ss_dev#22,cs_item_rev#19,cs_dev#23,ws_item_rev#21,ws_dev#24,average#25]), [item_id#13, ss_item_rev#14, ss_dev#22, cs_item_rev#19, cs_dev#23, ws_item_rev#21, ws_dev#24, average#25], 100, [item_id#13 ASC NULLS FIRST, ss_item_rev#14 ASC NULLS FIRST], [item_id#13, ss_item_rev#14, ss_dev#22, cs_item_rev#19, cs_dev#23, ws_item_rev#21, ws_dev#24, average#25] + +(33) ColumnarToRow [codegen id : 1] +Input [8]: [item_id#13, ss_item_rev#14, ss_dev#22, cs_item_rev#19, cs_dev#23, ws_item_rev#21, ws_dev#24, average#25] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 11 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* ColumnarToRow (37) ++- CometProject (36) + +- CometFilter (35) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (34) + + +(34) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date#26, d_week_seq#27] +Arguments: [d_date#26, d_week_seq#27] + +(35) CometFilter +Input [2]: [d_date#26, d_week_seq#27] +Condition : (isnotnull(d_date#26) AND (d_date#26 = 2000-01-03)) + +(36) CometProject +Input [2]: [d_date#26, d_week_seq#27] +Arguments: [d_week_seq#27], [d_week_seq#27] + +(37) ColumnarToRow [codegen id : 1] +Input [1]: [d_week_seq#27] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c0a83eeddd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_datafusion/simplified.txt @@ -0,0 +1,42 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev,ws_dev,average] + CometProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev,ws_dev,average] + CometBroadcastHashJoin [item_id,ss_item_rev,cs_item_rev,item_id,ws_item_rev] + CometProject [item_id,ss_item_rev,cs_item_rev] + CometBroadcastHashJoin [item_id,ss_item_rev,item_id,cs_item_rev] + CometFilter [item_id,ss_item_rev] + CometHashAggregate [item_id,ss_item_rev,i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ss_ext_sales_price,ss_sold_date_sk,i_item_id,d_date_sk] + CometProject [ss_ext_sales_price,ss_sold_date_sk,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_item_id] + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id] #2 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometBroadcastHashJoin [d_date_sk,d_date,d_date] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [d_date] #4 + CometProject [d_date] + CometFilter [d_date,d_week_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_date,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date,d_week_seq] + CometBroadcastExchange [item_id,cs_item_rev] #5 + CometFilter [item_id,cs_item_rev] + CometHashAggregate [item_id,cs_item_rev,i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] + ReusedExchange [i_item_id,sum] #1 + ReusedExchange [item_id,ws_item_rev] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..7f69981903 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_iceberg_compat/explain.txt @@ -0,0 +1,332 @@ +== Physical Plan == +* ColumnarToRow (54) ++- CometTakeOrderedAndProject (53) + +- CometProject (52) + +- CometBroadcastHashJoin (51) + :- CometProject (37) + : +- CometBroadcastHashJoin (36) + : :- CometFilter (22) + : : +- CometHashAggregate (21) + : : +- CometExchange (20) + : : +- CometHashAggregate (19) + : : +- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.item (3) + : : +- CometBroadcastExchange (16) + : : +- CometProject (15) + : : +- CometBroadcastHashJoin (14) + : : :- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (13) + : : +- CometProject (12) + : : +- CometFilter (11) + : : +- CometScan parquet spark_catalog.default.date_dim (10) + : +- CometBroadcastExchange (35) + : +- CometFilter (34) + : +- CometHashAggregate (33) + : +- CometExchange (32) + : +- CometHashAggregate (31) + : +- CometProject (30) + : +- CometBroadcastHashJoin (29) + : :- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometFilter (24) + : : : +- CometScan parquet spark_catalog.default.catalog_sales (23) + : : +- ReusedExchange (25) + : +- ReusedExchange (28) + +- CometBroadcastExchange (50) + +- CometFilter (49) + +- CometHashAggregate (48) + +- CometExchange (47) + +- CometHashAggregate (46) + +- CometProject (45) + +- CometBroadcastHashJoin (44) + :- CometProject (42) + : +- CometBroadcastHashJoin (41) + : :- CometFilter (39) + : : +- CometScan parquet spark_catalog.default.web_sales (38) + : +- ReusedExchange (40) + +- ReusedExchange (43) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_item_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(5) CometBroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: [i_item_sk#4, i_item_id#5] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Right output [2]: [i_item_sk#4, i_item_id#5] +Arguments: [ss_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [5]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5] +Arguments: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5], [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_date#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#6, d_date#7] +Condition : isnotnull(d_date_sk#6) + +(10) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(11) CometFilter +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = Subquery scalar-subquery#10, [id=#11])) + +(12) CometProject +Input [2]: [d_date#8, d_week_seq#9] +Arguments: [d_date#8], [d_date#8] + +(13) CometBroadcastExchange +Input [1]: [d_date#8] +Arguments: [d_date#8] + +(14) CometBroadcastHashJoin +Left output [2]: [d_date_sk#6, d_date#7] +Right output [1]: [d_date#8] +Arguments: [d_date#7], [d_date#8], LeftSemi, BuildRight + +(15) CometProject +Input [2]: [d_date_sk#6, d_date#7] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(17) CometBroadcastHashJoin +Left output [3]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#3], [d_date_sk#6], Inner, BuildRight + +(18) CometProject +Input [4]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, d_date_sk#6] +Arguments: [ss_ext_sales_price#2, i_item_id#5], [ss_ext_sales_price#2, i_item_id#5] + +(19) CometHashAggregate +Input [2]: [ss_ext_sales_price#2, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] + +(20) CometExchange +Input [2]: [i_item_id#5, sum#12] +Arguments: hashpartitioning(i_item_id#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [2]: [i_item_id#5, sum#12] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] + +(22) CometFilter +Input [2]: [item_id#13, ss_item_rev#14] +Condition : isnotnull(ss_item_rev#14) + +(23) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#17)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(24) CometFilter +Input [3]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] +Condition : isnotnull(cs_item_sk#15) + +(25) ReusedExchange [Reuses operator id: 5] +Output [2]: [i_item_sk#18, i_item_id#19] + +(26) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] +Right output [2]: [i_item_sk#18, i_item_id#19] +Arguments: [cs_item_sk#15], [i_item_sk#18], Inner, BuildRight + +(27) CometProject +Input [5]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17, i_item_sk#18, i_item_id#19] +Arguments: [cs_ext_sales_price#16, cs_sold_date_sk#17, i_item_id#19], [cs_ext_sales_price#16, cs_sold_date_sk#17, i_item_id#19] + +(28) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#20] + +(29) CometBroadcastHashJoin +Left output [3]: [cs_ext_sales_price#16, cs_sold_date_sk#17, i_item_id#19] +Right output [1]: [d_date_sk#20] +Arguments: [cs_sold_date_sk#17], [d_date_sk#20], Inner, BuildRight + +(30) CometProject +Input [4]: [cs_ext_sales_price#16, cs_sold_date_sk#17, i_item_id#19, d_date_sk#20] +Arguments: [cs_ext_sales_price#16, i_item_id#19], [cs_ext_sales_price#16, i_item_id#19] + +(31) CometHashAggregate +Input [2]: [cs_ext_sales_price#16, i_item_id#19] +Keys [1]: [i_item_id#19] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#16))] + +(32) CometExchange +Input [2]: [i_item_id#19, sum#21] +Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(33) CometHashAggregate +Input [2]: [i_item_id#19, sum#21] +Keys [1]: [i_item_id#19] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#16))] + +(34) CometFilter +Input [2]: [item_id#22, cs_item_rev#23] +Condition : isnotnull(cs_item_rev#23) + +(35) CometBroadcastExchange +Input [2]: [item_id#22, cs_item_rev#23] +Arguments: [item_id#22, cs_item_rev#23] + +(36) CometBroadcastHashJoin +Left output [2]: [item_id#13, ss_item_rev#14] +Right output [2]: [item_id#22, cs_item_rev#23] +Arguments: [item_id#13], [item_id#22], Inner, ((((cast(ss_item_rev#14 as decimal(19,3)) >= (0.9 * cs_item_rev#23)) AND (cast(ss_item_rev#14 as decimal(20,3)) <= (1.1 * cs_item_rev#23))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= (0.9 * ss_item_rev#14))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= (1.1 * ss_item_rev#14))), BuildRight + +(37) CometProject +Input [4]: [item_id#13, ss_item_rev#14, item_id#22, cs_item_rev#23] +Arguments: [item_id#13, ss_item_rev#14, cs_item_rev#23], [item_id#13, ss_item_rev#14, cs_item_rev#23] + +(38) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#26)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(39) CometFilter +Input [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26] +Condition : isnotnull(ws_item_sk#24) + +(40) ReusedExchange [Reuses operator id: 5] +Output [2]: [i_item_sk#27, i_item_id#28] + +(41) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26] +Right output [2]: [i_item_sk#27, i_item_id#28] +Arguments: [ws_item_sk#24], [i_item_sk#27], Inner, BuildRight + +(42) CometProject +Input [5]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26, i_item_sk#27, i_item_id#28] +Arguments: [ws_ext_sales_price#25, ws_sold_date_sk#26, i_item_id#28], [ws_ext_sales_price#25, ws_sold_date_sk#26, i_item_id#28] + +(43) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#29] + +(44) CometBroadcastHashJoin +Left output [3]: [ws_ext_sales_price#25, ws_sold_date_sk#26, i_item_id#28] +Right output [1]: [d_date_sk#29] +Arguments: [ws_sold_date_sk#26], [d_date_sk#29], Inner, BuildRight + +(45) CometProject +Input [4]: [ws_ext_sales_price#25, ws_sold_date_sk#26, i_item_id#28, d_date_sk#29] +Arguments: [ws_ext_sales_price#25, i_item_id#28], [ws_ext_sales_price#25, i_item_id#28] + +(46) CometHashAggregate +Input [2]: [ws_ext_sales_price#25, i_item_id#28] +Keys [1]: [i_item_id#28] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#25))] + +(47) CometExchange +Input [2]: [i_item_id#28, sum#30] +Arguments: hashpartitioning(i_item_id#28, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(48) CometHashAggregate +Input [2]: [i_item_id#28, sum#30] +Keys [1]: [i_item_id#28] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#25))] + +(49) CometFilter +Input [2]: [item_id#31, ws_item_rev#32] +Condition : isnotnull(ws_item_rev#32) + +(50) CometBroadcastExchange +Input [2]: [item_id#31, ws_item_rev#32] +Arguments: [item_id#31, ws_item_rev#32] + +(51) CometBroadcastHashJoin +Left output [3]: [item_id#13, ss_item_rev#14, cs_item_rev#23] +Right output [2]: [item_id#31, ws_item_rev#32] +Arguments: [item_id#13], [item_id#31], Inner, ((((((((cast(ss_item_rev#14 as decimal(19,3)) >= (0.9 * ws_item_rev#32)) AND (cast(ss_item_rev#14 as decimal(20,3)) <= (1.1 * ws_item_rev#32))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= (0.9 * ws_item_rev#32))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= (1.1 * ws_item_rev#32))) AND (cast(ws_item_rev#32 as decimal(19,3)) >= (0.9 * ss_item_rev#14))) AND (cast(ws_item_rev#32 as decimal(20,3)) <= (1.1 * ss_item_rev#14))) AND (cast(ws_item_rev#32 as decimal(19,3)) >= (0.9 * cs_item_rev#23))) AND (cast(ws_item_rev#32 as decimal(20,3)) <= (1.1 * cs_item_rev#23))), BuildRight + +(52) CometProject +Input [5]: [item_id#13, ss_item_rev#14, cs_item_rev#23, item_id#31, ws_item_rev#32] +Arguments: [item_id#13, ss_item_rev#14, ss_dev#33, cs_item_rev#23, cs_dev#34, ws_item_rev#32, ws_dev#35, average#36], [item_id#13, ss_item_rev#14, (((ss_item_rev#14 / ((ss_item_rev#14 + cs_item_rev#23) + ws_item_rev#32)) / 3) * 100) AS ss_dev#33, cs_item_rev#23, (((cs_item_rev#23 / ((ss_item_rev#14 + cs_item_rev#23) + ws_item_rev#32)) / 3) * 100) AS cs_dev#34, ws_item_rev#32, (((ws_item_rev#32 / ((ss_item_rev#14 + cs_item_rev#23) + ws_item_rev#32)) / 3) * 100) AS ws_dev#35, (((ss_item_rev#14 + cs_item_rev#23) + ws_item_rev#32) / 3) AS average#36] + +(53) CometTakeOrderedAndProject +Input [8]: [item_id#13, ss_item_rev#14, ss_dev#33, cs_item_rev#23, cs_dev#34, ws_item_rev#32, ws_dev#35, average#36] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[item_id#13 ASC NULLS FIRST,ss_item_rev#14 ASC NULLS FIRST], output=[item_id#13,ss_item_rev#14,ss_dev#33,cs_item_rev#23,cs_dev#34,ws_item_rev#32,ws_dev#35,average#36]), [item_id#13, ss_item_rev#14, ss_dev#33, cs_item_rev#23, cs_dev#34, ws_item_rev#32, ws_dev#35, average#36], 100, [item_id#13 ASC NULLS FIRST, ss_item_rev#14 ASC NULLS FIRST], [item_id#13, ss_item_rev#14, ss_dev#33, cs_item_rev#23, cs_dev#34, ws_item_rev#32, ws_dev#35, average#36] + +(54) ColumnarToRow [codegen id : 1] +Input [8]: [item_id#13, ss_item_rev#14, ss_dev#33, cs_item_rev#23, cs_dev#34, ws_item_rev#32, ws_dev#35, average#36] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 11 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* ColumnarToRow (58) ++- CometProject (57) + +- CometFilter (56) + +- CometScan parquet spark_catalog.default.date_dim (55) + + +(55) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date#37, d_week_seq#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), EqualTo(d_date,2000-01-03)] +ReadSchema: struct + +(56) CometFilter +Input [2]: [d_date#37, d_week_seq#38] +Condition : (isnotnull(d_date#37) AND (d_date#37 = 2000-01-03)) + +(57) CometProject +Input [2]: [d_date#37, d_week_seq#38] +Arguments: [d_week_seq#38], [d_week_seq#38] + +(58) ColumnarToRow [codegen id : 1] +Input [1]: [d_week_seq#38] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..ca4f03aa99 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q58.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev,ws_dev,average] + CometProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev,ws_dev,average] + CometBroadcastHashJoin [item_id,ss_item_rev,cs_item_rev,item_id,ws_item_rev] + CometProject [item_id,ss_item_rev,cs_item_rev] + CometBroadcastHashJoin [item_id,ss_item_rev,item_id,cs_item_rev] + CometFilter [item_id,ss_item_rev] + CometHashAggregate [item_id,ss_item_rev,i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ss_ext_sales_price,ss_sold_date_sk,i_item_id,d_date_sk] + CometProject [ss_ext_sales_price,ss_sold_date_sk,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_item_id] + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id] #2 + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometBroadcastHashJoin [d_date_sk,d_date,d_date] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [d_date] #4 + CometProject [d_date] + CometFilter [d_date,d_week_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_date,d_week_seq] + CometScan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + CometScan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + CometBroadcastExchange [item_id,cs_item_rev] #5 + CometFilter [item_id,cs_item_rev] + CometHashAggregate [item_id,cs_item_rev,i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_item_id] #6 + CometHashAggregate [i_item_id,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_item_id] + CometBroadcastHashJoin [cs_ext_sales_price,cs_sold_date_sk,i_item_id,d_date_sk] + CometProject [cs_ext_sales_price,cs_sold_date_sk,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_item_id] + CometFilter [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [i_item_sk,i_item_id] #2 + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [item_id,ws_item_rev] #7 + CometFilter [item_id,ws_item_rev] + CometHashAggregate [item_id,ws_item_rev,i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [i_item_id] #8 + CometHashAggregate [i_item_id,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ws_ext_sales_price,ws_sold_date_sk,i_item_id,d_date_sk] + CometProject [ws_ext_sales_price,ws_sold_date_sk,i_item_id] + CometBroadcastHashJoin [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_item_id] + CometFilter [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_item_id] #2 + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_datafusion/explain.txt new file mode 100644 index 0000000000..0235ad1c25 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_datafusion/explain.txt @@ -0,0 +1,205 @@ +== Physical Plan == +* ColumnarToRow (39) ++- CometTakeOrderedAndProject (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (21) + : +- CometBroadcastHashJoin (20) + : :- CometProject (15) + : : +- CometBroadcastHashJoin (14) + : : :- CometHashAggregate (10) + : : : +- CometExchange (9) + : : : +- CometHashAggregate (8) + : : : +- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (13) + : : +- CometFilter (12) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (11) + : +- CometBroadcastExchange (19) + : +- CometProject (18) + : +- CometFilter (17) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (16) + +- CometBroadcastExchange (35) + +- CometProject (34) + +- CometBroadcastHashJoin (33) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometHashAggregate (23) + : : +- ReusedExchange (22) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (24) + +- CometBroadcastExchange (32) + +- CometProject (31) + +- CometFilter (30) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (29) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Arguments: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(4) CometFilter +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) + +(5) CometBroadcastExchange +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Right output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(7) CometProject +Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6], [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] + +(8) CometHashAggregate +Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] + +(9) CometExchange +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometHashAggregate +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] + +(11) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#14, s_store_id#15, s_store_name#16] +Arguments: [s_store_sk#14, s_store_id#15, s_store_name#16] + +(12) CometFilter +Input [3]: [s_store_sk#14, s_store_id#15, s_store_name#16] +Condition : (isnotnull(s_store_sk#14) AND isnotnull(s_store_id#15)) + +(13) CometBroadcastExchange +Input [3]: [s_store_sk#14, s_store_id#15, s_store_name#16] +Arguments: [s_store_sk#14, s_store_id#15, s_store_name#16] + +(14) CometBroadcastHashJoin +Left output [9]: [d_week_seq#5, ss_store_sk#1, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23] +Right output [3]: [s_store_sk#14, s_store_id#15, s_store_name#16] +Arguments: [ss_store_sk#1], [s_store_sk#14], Inner, BuildRight + +(15) CometProject +Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23, s_store_sk#14, s_store_id#15, s_store_name#16] +Arguments: [d_week_seq#5, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23, s_store_id#15, s_store_name#16], [d_week_seq#5, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23, s_store_id#15, s_store_name#16] + +(16) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_month_seq#24, d_week_seq#25] +Arguments: [d_month_seq#24, d_week_seq#25] + +(17) CometFilter +Input [2]: [d_month_seq#24, d_week_seq#25] +Condition : (((isnotnull(d_month_seq#24) AND (d_month_seq#24 >= 1212)) AND (d_month_seq#24 <= 1223)) AND isnotnull(d_week_seq#25)) + +(18) CometProject +Input [2]: [d_month_seq#24, d_week_seq#25] +Arguments: [d_week_seq#25], [d_week_seq#25] + +(19) CometBroadcastExchange +Input [1]: [d_week_seq#25] +Arguments: [d_week_seq#25] + +(20) CometBroadcastHashJoin +Left output [10]: [d_week_seq#5, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23, s_store_id#15, s_store_name#16] +Right output [1]: [d_week_seq#25] +Arguments: [d_week_seq#5], [d_week_seq#25], Inner, BuildRight + +(21) CometProject +Input [11]: [d_week_seq#5, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23, s_store_id#15, s_store_name#16, d_week_seq#25] +Arguments: [s_store_name1#26, d_week_seq1#27, s_store_id1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35], [s_store_name#16 AS s_store_name1#26, d_week_seq#5 AS d_week_seq1#27, s_store_id#15 AS s_store_id1#28, sun_sales#17 AS sun_sales1#29, mon_sales#18 AS mon_sales1#30, tue_sales#19 AS tue_sales1#31, wed_sales#20 AS wed_sales1#32, thu_sales#21 AS thu_sales1#33, fri_sales#22 AS fri_sales1#34, sat_sales#23 AS sat_sales1#35] + +(22) ReusedExchange [Reuses operator id: 9] +Output [9]: [d_week_seq#36, ss_store_sk#37, sum#38, sum#39, sum#40, sum#41, sum#42, sum#43, sum#44] + +(23) CometHashAggregate +Input [9]: [d_week_seq#36, ss_store_sk#37, sum#38, sum#39, sum#40, sum#41, sum#42, sum#43, sum#44] +Keys [2]: [d_week_seq#36, ss_store_sk#37] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#45 = Sunday ) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Monday ) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Tuesday ) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Wednesday) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Thursday ) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Friday ) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Saturday ) THEN ss_sales_price#46 END))] + +(24) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#47, s_store_id#48] +Arguments: [s_store_sk#47, s_store_id#48] + +(25) CometFilter +Input [2]: [s_store_sk#47, s_store_id#48] +Condition : (isnotnull(s_store_sk#47) AND isnotnull(s_store_id#48)) + +(26) CometBroadcastExchange +Input [2]: [s_store_sk#47, s_store_id#48] +Arguments: [s_store_sk#47, s_store_id#48] + +(27) CometBroadcastHashJoin +Left output [9]: [d_week_seq#36, ss_store_sk#37, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55] +Right output [2]: [s_store_sk#47, s_store_id#48] +Arguments: [ss_store_sk#37], [s_store_sk#47], Inner, BuildRight + +(28) CometProject +Input [11]: [d_week_seq#36, ss_store_sk#37, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55, s_store_sk#47, s_store_id#48] +Arguments: [d_week_seq#36, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55, s_store_id#48], [d_week_seq#36, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55, s_store_id#48] + +(29) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_month_seq#56, d_week_seq#57] +Arguments: [d_month_seq#56, d_week_seq#57] + +(30) CometFilter +Input [2]: [d_month_seq#56, d_week_seq#57] +Condition : (((isnotnull(d_month_seq#56) AND (d_month_seq#56 >= 1224)) AND (d_month_seq#56 <= 1235)) AND isnotnull(d_week_seq#57)) + +(31) CometProject +Input [2]: [d_month_seq#56, d_week_seq#57] +Arguments: [d_week_seq#57], [d_week_seq#57] + +(32) CometBroadcastExchange +Input [1]: [d_week_seq#57] +Arguments: [d_week_seq#57] + +(33) CometBroadcastHashJoin +Left output [9]: [d_week_seq#36, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55, s_store_id#48] +Right output [1]: [d_week_seq#57] +Arguments: [d_week_seq#36], [d_week_seq#57], Inner, BuildRight + +(34) CometProject +Input [10]: [d_week_seq#36, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55, s_store_id#48, d_week_seq#57] +Arguments: [d_week_seq2#58, s_store_id2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66], [d_week_seq#36 AS d_week_seq2#58, s_store_id#48 AS s_store_id2#59, sun_sales#49 AS sun_sales2#60, mon_sales#50 AS mon_sales2#61, tue_sales#51 AS tue_sales2#62, wed_sales#52 AS wed_sales2#63, thu_sales#53 AS thu_sales2#64, fri_sales#54 AS fri_sales2#65, sat_sales#55 AS sat_sales2#66] + +(35) CometBroadcastExchange +Input [9]: [d_week_seq2#58, s_store_id2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] +Arguments: [d_week_seq2#58, s_store_id2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] + +(36) CometBroadcastHashJoin +Left output [10]: [s_store_name1#26, d_week_seq1#27, s_store_id1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35] +Right output [9]: [d_week_seq2#58, s_store_id2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] +Arguments: [s_store_id1#28, d_week_seq1#27], [s_store_id2#59, (d_week_seq2#58 - 52)], Inner, BuildRight + +(37) CometProject +Input [19]: [s_store_name1#26, d_week_seq1#27, s_store_id1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35, d_week_seq2#58, s_store_id2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] +Arguments: [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1 / sun_sales2)#67, (mon_sales1 / mon_sales2)#68, (tue_sales1 / tue_sales2)#69, (wed_sales1 / wed_sales2)#70, (thu_sales1 / thu_sales2)#71, (fri_sales1 / fri_sales2)#72, (sat_sales1 / sat_sales2)#73], [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1#29 / sun_sales2#60) AS (sun_sales1 / sun_sales2)#67, (mon_sales1#30 / mon_sales2#61) AS (mon_sales1 / mon_sales2)#68, (tue_sales1#31 / tue_sales2#62) AS (tue_sales1 / tue_sales2)#69, (wed_sales1#32 / wed_sales2#63) AS (wed_sales1 / wed_sales2)#70, (thu_sales1#33 / thu_sales2#64) AS (thu_sales1 / thu_sales2)#71, (fri_sales1#34 / fri_sales2#65) AS (fri_sales1 / fri_sales2)#72, (sat_sales1#35 / sat_sales2#66) AS (sat_sales1 / sat_sales2)#73] + +(38) CometTakeOrderedAndProject +Input [10]: [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1 / sun_sales2)#67, (mon_sales1 / mon_sales2)#68, (tue_sales1 / tue_sales2)#69, (wed_sales1 / wed_sales2)#70, (thu_sales1 / thu_sales2)#71, (fri_sales1 / fri_sales2)#72, (sat_sales1 / sat_sales2)#73] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#26 ASC NULLS FIRST,s_store_id1#28 ASC NULLS FIRST,d_week_seq1#27 ASC NULLS FIRST], output=[s_store_name1#26,s_store_id1#28,d_week_seq1#27,(sun_sales1 / sun_sales2)#67,(mon_sales1 / mon_sales2)#68,(tue_sales1 / tue_sales2)#69,(wed_sales1 / wed_sales2)#70,(thu_sales1 / thu_sales2)#71,(fri_sales1 / fri_sales2)#72,(sat_sales1 / sat_sales2)#73]), [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1 / sun_sales2)#67, (mon_sales1 / mon_sales2)#68, (tue_sales1 / tue_sales2)#69, (wed_sales1 / wed_sales2)#70, (thu_sales1 / thu_sales2)#71, (fri_sales1 / fri_sales2)#72, (sat_sales1 / sat_sales2)#73], 100, [s_store_name1#26 ASC NULLS FIRST, s_store_id1#28 ASC NULLS FIRST, d_week_seq1#27 ASC NULLS FIRST], [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1 / sun_sales2)#67, (mon_sales1 / mon_sales2)#68, (tue_sales1 / tue_sales2)#69, (wed_sales1 / wed_sales2)#70, (thu_sales1 / thu_sales2)#71, (fri_sales1 / fri_sales2)#72, (sat_sales1 / sat_sales2)#73] + +(39) ColumnarToRow [codegen id : 1] +Input [10]: [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1 / sun_sales2)#67, (mon_sales1 / mon_sales2)#68, (tue_sales1 / tue_sales2)#69, (wed_sales1 / wed_sales2)#70, (thu_sales1 / thu_sales2)#71, (fri_sales1 / fri_sales2)#72, (sat_sales1 / sat_sales2)#73] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_datafusion/simplified.txt new file mode 100644 index 0000000000..e95d05323f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_datafusion/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_sales2),(mon_sales1 / mon_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(sat_sales1 / sat_sales2)] + CometProject [sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_sales2),(mon_sales1 / mon_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(sat_sales1 / sat_sales2)] + CometBroadcastHashJoin [s_store_name1,d_week_seq1,s_store_id1,sun_sales1,mon_sales1,tue_sales1,wed_sales1,thu_sales1,fri_sales1,sat_sales1,d_week_seq2,s_store_id2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] + CometProject [s_store_name,d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] [s_store_name1,d_week_seq1,s_store_id1,sun_sales1,mon_sales1,tue_sales1,wed_sales1,thu_sales1,fri_sales1,sat_sales1] + CometBroadcastHashJoin [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,s_store_name,d_week_seq] + CometProject [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,s_store_name] + CometBroadcastHashJoin [d_week_seq,ss_store_sk,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_sk,s_store_id,s_store_name] + CometHashAggregate [d_week_seq,ss_store_sk,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END))] + CometExchange [d_week_seq,ss_store_sk] #1 + CometHashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,d_day_name,ss_sales_price] + CometProject [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] + CometBroadcastHashJoin [ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_week_seq,d_day_name] + CometFilter [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_week_seq,d_day_name] #2 + CometFilter [d_date_sk,d_week_seq,d_day_name] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq,d_day_name] + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #3 + CometFilter [s_store_sk,s_store_id,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id,s_store_name] + CometBroadcastExchange [d_week_seq] #4 + CometProject [d_week_seq] + CometFilter [d_month_seq,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_month_seq,d_week_seq] + CometBroadcastExchange [d_week_seq2,s_store_id2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] #5 + CometProject [d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] [d_week_seq2,s_store_id2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] + CometBroadcastHashJoin [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,d_week_seq] + CometProject [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id] + CometBroadcastHashJoin [d_week_seq,ss_store_sk,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_sk,s_store_id] + CometHashAggregate [d_week_seq,ss_store_sk,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END))] + ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 + CometBroadcastExchange [s_store_sk,s_store_id] #6 + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + CometBroadcastExchange [d_week_seq] #7 + CometProject [d_week_seq] + CometFilter [d_month_seq,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_month_seq,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..62311fb338 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_iceberg_compat/explain.txt @@ -0,0 +1,224 @@ +== Physical Plan == +* ColumnarToRow (39) ++- CometTakeOrderedAndProject (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (21) + : +- CometBroadcastHashJoin (20) + : :- CometProject (15) + : : +- CometBroadcastHashJoin (14) + : : :- CometHashAggregate (10) + : : : +- CometExchange (9) + : : : +- CometHashAggregate (8) + : : : +- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (13) + : : +- CometFilter (12) + : : +- CometScan parquet spark_catalog.default.store (11) + : +- CometBroadcastExchange (19) + : +- CometProject (18) + : +- CometFilter (17) + : +- CometScan parquet spark_catalog.default.date_dim (16) + +- CometBroadcastExchange (35) + +- CometProject (34) + +- CometBroadcastHashJoin (33) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometHashAggregate (23) + : : +- ReusedExchange (22) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometScan parquet spark_catalog.default.store (24) + +- CometBroadcastExchange (32) + +- CometProject (31) + +- CometFilter (30) + +- CometScan parquet spark_catalog.default.date_dim (29) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) + +(5) CometBroadcastExchange +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Right output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(7) CometProject +Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6], [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] + +(8) CometHashAggregate +Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] + +(9) CometExchange +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometHashAggregate +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] + +(11) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#14, s_store_id#15, s_store_name#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(12) CometFilter +Input [3]: [s_store_sk#14, s_store_id#15, s_store_name#16] +Condition : (isnotnull(s_store_sk#14) AND isnotnull(s_store_id#15)) + +(13) CometBroadcastExchange +Input [3]: [s_store_sk#14, s_store_id#15, s_store_name#16] +Arguments: [s_store_sk#14, s_store_id#15, s_store_name#16] + +(14) CometBroadcastHashJoin +Left output [9]: [d_week_seq#5, ss_store_sk#1, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23] +Right output [3]: [s_store_sk#14, s_store_id#15, s_store_name#16] +Arguments: [ss_store_sk#1], [s_store_sk#14], Inner, BuildRight + +(15) CometProject +Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23, s_store_sk#14, s_store_id#15, s_store_name#16] +Arguments: [d_week_seq#5, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23, s_store_id#15, s_store_name#16], [d_week_seq#5, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23, s_store_id#15, s_store_name#16] + +(16) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_month_seq#24, d_week_seq#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)] +ReadSchema: struct + +(17) CometFilter +Input [2]: [d_month_seq#24, d_week_seq#25] +Condition : (((isnotnull(d_month_seq#24) AND (d_month_seq#24 >= 1212)) AND (d_month_seq#24 <= 1223)) AND isnotnull(d_week_seq#25)) + +(18) CometProject +Input [2]: [d_month_seq#24, d_week_seq#25] +Arguments: [d_week_seq#25], [d_week_seq#25] + +(19) CometBroadcastExchange +Input [1]: [d_week_seq#25] +Arguments: [d_week_seq#25] + +(20) CometBroadcastHashJoin +Left output [10]: [d_week_seq#5, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23, s_store_id#15, s_store_name#16] +Right output [1]: [d_week_seq#25] +Arguments: [d_week_seq#5], [d_week_seq#25], Inner, BuildRight + +(21) CometProject +Input [11]: [d_week_seq#5, sun_sales#17, mon_sales#18, tue_sales#19, wed_sales#20, thu_sales#21, fri_sales#22, sat_sales#23, s_store_id#15, s_store_name#16, d_week_seq#25] +Arguments: [s_store_name1#26, d_week_seq1#27, s_store_id1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35], [s_store_name#16 AS s_store_name1#26, d_week_seq#5 AS d_week_seq1#27, s_store_id#15 AS s_store_id1#28, sun_sales#17 AS sun_sales1#29, mon_sales#18 AS mon_sales1#30, tue_sales#19 AS tue_sales1#31, wed_sales#20 AS wed_sales1#32, thu_sales#21 AS thu_sales1#33, fri_sales#22 AS fri_sales1#34, sat_sales#23 AS sat_sales1#35] + +(22) ReusedExchange [Reuses operator id: 9] +Output [9]: [d_week_seq#36, ss_store_sk#37, sum#38, sum#39, sum#40, sum#41, sum#42, sum#43, sum#44] + +(23) CometHashAggregate +Input [9]: [d_week_seq#36, ss_store_sk#37, sum#38, sum#39, sum#40, sum#41, sum#42, sum#43, sum#44] +Keys [2]: [d_week_seq#36, ss_store_sk#37] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#45 = Sunday ) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Monday ) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Tuesday ) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Wednesday) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Thursday ) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Friday ) THEN ss_sales_price#46 END)), sum(UnscaledValue(CASE WHEN (d_day_name#45 = Saturday ) THEN ss_sales_price#46 END))] + +(24) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#47, s_store_id#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(25) CometFilter +Input [2]: [s_store_sk#47, s_store_id#48] +Condition : (isnotnull(s_store_sk#47) AND isnotnull(s_store_id#48)) + +(26) CometBroadcastExchange +Input [2]: [s_store_sk#47, s_store_id#48] +Arguments: [s_store_sk#47, s_store_id#48] + +(27) CometBroadcastHashJoin +Left output [9]: [d_week_seq#36, ss_store_sk#37, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55] +Right output [2]: [s_store_sk#47, s_store_id#48] +Arguments: [ss_store_sk#37], [s_store_sk#47], Inner, BuildRight + +(28) CometProject +Input [11]: [d_week_seq#36, ss_store_sk#37, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55, s_store_sk#47, s_store_id#48] +Arguments: [d_week_seq#36, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55, s_store_id#48], [d_week_seq#36, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55, s_store_id#48] + +(29) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_month_seq#56, d_week_seq#57] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)] +ReadSchema: struct + +(30) CometFilter +Input [2]: [d_month_seq#56, d_week_seq#57] +Condition : (((isnotnull(d_month_seq#56) AND (d_month_seq#56 >= 1224)) AND (d_month_seq#56 <= 1235)) AND isnotnull(d_week_seq#57)) + +(31) CometProject +Input [2]: [d_month_seq#56, d_week_seq#57] +Arguments: [d_week_seq#57], [d_week_seq#57] + +(32) CometBroadcastExchange +Input [1]: [d_week_seq#57] +Arguments: [d_week_seq#57] + +(33) CometBroadcastHashJoin +Left output [9]: [d_week_seq#36, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55, s_store_id#48] +Right output [1]: [d_week_seq#57] +Arguments: [d_week_seq#36], [d_week_seq#57], Inner, BuildRight + +(34) CometProject +Input [10]: [d_week_seq#36, sun_sales#49, mon_sales#50, tue_sales#51, wed_sales#52, thu_sales#53, fri_sales#54, sat_sales#55, s_store_id#48, d_week_seq#57] +Arguments: [d_week_seq2#58, s_store_id2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66], [d_week_seq#36 AS d_week_seq2#58, s_store_id#48 AS s_store_id2#59, sun_sales#49 AS sun_sales2#60, mon_sales#50 AS mon_sales2#61, tue_sales#51 AS tue_sales2#62, wed_sales#52 AS wed_sales2#63, thu_sales#53 AS thu_sales2#64, fri_sales#54 AS fri_sales2#65, sat_sales#55 AS sat_sales2#66] + +(35) CometBroadcastExchange +Input [9]: [d_week_seq2#58, s_store_id2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] +Arguments: [d_week_seq2#58, s_store_id2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] + +(36) CometBroadcastHashJoin +Left output [10]: [s_store_name1#26, d_week_seq1#27, s_store_id1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35] +Right output [9]: [d_week_seq2#58, s_store_id2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] +Arguments: [s_store_id1#28, d_week_seq1#27], [s_store_id2#59, (d_week_seq2#58 - 52)], Inner, BuildRight + +(37) CometProject +Input [19]: [s_store_name1#26, d_week_seq1#27, s_store_id1#28, sun_sales1#29, mon_sales1#30, tue_sales1#31, wed_sales1#32, thu_sales1#33, fri_sales1#34, sat_sales1#35, d_week_seq2#58, s_store_id2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] +Arguments: [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1 / sun_sales2)#67, (mon_sales1 / mon_sales2)#68, (tue_sales1 / tue_sales2)#69, (wed_sales1 / wed_sales2)#70, (thu_sales1 / thu_sales2)#71, (fri_sales1 / fri_sales2)#72, (sat_sales1 / sat_sales2)#73], [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1#29 / sun_sales2#60) AS (sun_sales1 / sun_sales2)#67, (mon_sales1#30 / mon_sales2#61) AS (mon_sales1 / mon_sales2)#68, (tue_sales1#31 / tue_sales2#62) AS (tue_sales1 / tue_sales2)#69, (wed_sales1#32 / wed_sales2#63) AS (wed_sales1 / wed_sales2)#70, (thu_sales1#33 / thu_sales2#64) AS (thu_sales1 / thu_sales2)#71, (fri_sales1#34 / fri_sales2#65) AS (fri_sales1 / fri_sales2)#72, (sat_sales1#35 / sat_sales2#66) AS (sat_sales1 / sat_sales2)#73] + +(38) CometTakeOrderedAndProject +Input [10]: [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1 / sun_sales2)#67, (mon_sales1 / mon_sales2)#68, (tue_sales1 / tue_sales2)#69, (wed_sales1 / wed_sales2)#70, (thu_sales1 / thu_sales2)#71, (fri_sales1 / fri_sales2)#72, (sat_sales1 / sat_sales2)#73] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#26 ASC NULLS FIRST,s_store_id1#28 ASC NULLS FIRST,d_week_seq1#27 ASC NULLS FIRST], output=[s_store_name1#26,s_store_id1#28,d_week_seq1#27,(sun_sales1 / sun_sales2)#67,(mon_sales1 / mon_sales2)#68,(tue_sales1 / tue_sales2)#69,(wed_sales1 / wed_sales2)#70,(thu_sales1 / thu_sales2)#71,(fri_sales1 / fri_sales2)#72,(sat_sales1 / sat_sales2)#73]), [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1 / sun_sales2)#67, (mon_sales1 / mon_sales2)#68, (tue_sales1 / tue_sales2)#69, (wed_sales1 / wed_sales2)#70, (thu_sales1 / thu_sales2)#71, (fri_sales1 / fri_sales2)#72, (sat_sales1 / sat_sales2)#73], 100, [s_store_name1#26 ASC NULLS FIRST, s_store_id1#28 ASC NULLS FIRST, d_week_seq1#27 ASC NULLS FIRST], [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1 / sun_sales2)#67, (mon_sales1 / mon_sales2)#68, (tue_sales1 / tue_sales2)#69, (wed_sales1 / wed_sales2)#70, (thu_sales1 / thu_sales2)#71, (fri_sales1 / fri_sales2)#72, (sat_sales1 / sat_sales2)#73] + +(39) ColumnarToRow [codegen id : 1] +Input [10]: [s_store_name1#26, s_store_id1#28, d_week_seq1#27, (sun_sales1 / sun_sales2)#67, (mon_sales1 / mon_sales2)#68, (tue_sales1 / tue_sales2)#69, (wed_sales1 / wed_sales2)#70, (thu_sales1 / thu_sales2)#71, (fri_sales1 / fri_sales2)#72, (sat_sales1 / sat_sales2)#73] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..140a764391 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q59.native_iceberg_compat/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_sales2),(mon_sales1 / mon_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(sat_sales1 / sat_sales2)] + CometProject [sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_sales2),(mon_sales1 / mon_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(sat_sales1 / sat_sales2)] + CometBroadcastHashJoin [s_store_name1,d_week_seq1,s_store_id1,sun_sales1,mon_sales1,tue_sales1,wed_sales1,thu_sales1,fri_sales1,sat_sales1,d_week_seq2,s_store_id2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] + CometProject [s_store_name,d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] [s_store_name1,d_week_seq1,s_store_id1,sun_sales1,mon_sales1,tue_sales1,wed_sales1,thu_sales1,fri_sales1,sat_sales1] + CometBroadcastHashJoin [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,s_store_name,d_week_seq] + CometProject [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,s_store_name] + CometBroadcastHashJoin [d_week_seq,ss_store_sk,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_sk,s_store_id,s_store_name] + CometHashAggregate [d_week_seq,ss_store_sk,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END))] + CometExchange [d_week_seq,ss_store_sk] #1 + CometHashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,d_day_name,ss_sales_price] + CometProject [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] + CometBroadcastHashJoin [ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_week_seq,d_day_name] + CometFilter [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_week_seq,d_day_name] #2 + CometFilter [d_date_sk,d_week_seq,d_day_name] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name] + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #3 + CometFilter [s_store_sk,s_store_id,s_store_name] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name] + CometBroadcastExchange [d_week_seq] #4 + CometProject [d_week_seq] + CometFilter [d_month_seq,d_week_seq] + CometScan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq] + CometBroadcastExchange [d_week_seq2,s_store_id2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] #5 + CometProject [d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] [d_week_seq2,s_store_id2,sun_sales2,mon_sales2,tue_sales2,wed_sales2,thu_sales2,fri_sales2,sat_sales2] + CometBroadcastHashJoin [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,d_week_seq] + CometProject [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id] + CometBroadcastHashJoin [d_week_seq,ss_store_sk,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_sk,s_store_id] + CometHashAggregate [d_week_seq,ss_store_sk,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END))] + ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 + CometBroadcastExchange [s_store_sk,s_store_id] #6 + CometFilter [s_store_sk,s_store_id] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id] + CometBroadcastExchange [d_week_seq] #7 + CometProject [d_week_seq] + CometFilter [d_month_seq,d_week_seq] + CometScan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_datafusion/explain.txt new file mode 100644 index 0000000000..36540b4276 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_datafusion/explain.txt @@ -0,0 +1,243 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometTakeOrderedAndProject (37) + +- CometFilter (36) + +- CometHashAggregate (35) + +- CometExchange (34) + +- CometHashAggregate (33) + +- CometProject (32) + +- CometBroadcastHashJoin (31) + :- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + +- CometBroadcastExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (20) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (19) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometHashAggregate (25) + +- CometExchange (24) + +- CometHashAggregate (23) + +- CometFilter (22) + +- CometNativeScan: `spark_catalog`.`default`.`item` (21) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#1, ca_state#2] +Arguments: [ca_address_sk#1, ca_state#2] + +(2) CometFilter +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: [c_customer_sk#3, c_current_addr_sk#4] + +(4) CometFilter +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(5) CometBroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: [c_customer_sk#3, c_current_addr_sk#4] + +(6) CometBroadcastHashJoin +Left output [2]: [ca_address_sk#1, ca_state#2] +Right output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: [ca_address_sk#1], [c_current_addr_sk#4], Inner, BuildRight + +(7) CometProject +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] +Arguments: [ca_state#2, c_customer_sk#3], [ca_state#2, c_customer_sk#3] + +(8) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(9) CometFilter +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_item_sk#5)) + +(10) CometBroadcastExchange +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(11) CometBroadcastHashJoin +Left output [2]: [ca_state#2, c_customer_sk#3] +Right output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], Inner, BuildRight + +(12) CometProject +Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7], [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#8, d_month_seq#9] +Arguments: [d_date_sk#8, d_month_seq#9] + +(14) CometFilter +Input [2]: [d_date_sk#8, d_month_seq#9] +Condition : ((isnotnull(d_month_seq#9) AND (d_month_seq#9 = Subquery scalar-subquery#10, [id=#11])) AND isnotnull(d_date_sk#8)) + +(15) CometProject +Input [2]: [d_date_sk#8, d_month_seq#9] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(17) CometBroadcastHashJoin +Left output [3]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(18) CometProject +Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ca_state#2, ss_item_sk#5], [ca_state#2, ss_item_sk#5] + +(19) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Arguments: [i_item_sk#12, i_current_price#13, i_category#14] + +(20) CometFilter +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Condition : ((isnotnull(i_current_price#13) AND isnotnull(i_category#14)) AND isnotnull(i_item_sk#12)) + +(21) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_current_price#15, i_category#16] +Arguments: [i_current_price#15, i_category#16] + +(22) CometFilter +Input [2]: [i_current_price#15, i_category#16] +Condition : isnotnull(i_category#16) + +(23) CometHashAggregate +Input [2]: [i_current_price#15, i_category#16] +Keys [1]: [i_category#16] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#15))] + +(24) CometExchange +Input [3]: [i_category#16, sum#17, count#18] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(25) CometHashAggregate +Input [3]: [i_category#16, sum#17, count#18] +Keys [1]: [i_category#16] +Functions [1]: [avg(UnscaledValue(i_current_price#15))] + +(26) CometFilter +Input [2]: [avg(i_current_price)#19, i_category#16] +Condition : isnotnull(avg(i_current_price)#19) + +(27) CometBroadcastExchange +Input [2]: [avg(i_current_price)#19, i_category#16] +Arguments: [avg(i_current_price)#19, i_category#16] + +(28) CometBroadcastHashJoin +Left output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Right output [2]: [avg(i_current_price)#19, i_category#16] +Arguments: [i_category#14], [i_category#16], Inner, (cast(i_current_price#13 as decimal(14,7)) > (1.2 * avg(i_current_price)#19)), BuildRight + +(29) CometProject +Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#19, i_category#16] +Arguments: [i_item_sk#12], [i_item_sk#12] + +(30) CometBroadcastExchange +Input [1]: [i_item_sk#12] +Arguments: [i_item_sk#12] + +(31) CometBroadcastHashJoin +Left output [2]: [ca_state#2, ss_item_sk#5] +Right output [1]: [i_item_sk#12] +Arguments: [ss_item_sk#5], [i_item_sk#12], Inner, BuildRight + +(32) CometProject +Input [3]: [ca_state#2, ss_item_sk#5, i_item_sk#12] +Arguments: [ca_state#2], [ca_state#2] + +(33) CometHashAggregate +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] + +(34) CometExchange +Input [2]: [ca_state#2, count#20] +Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(35) CometHashAggregate +Input [2]: [ca_state#2, count#20] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] + +(36) CometFilter +Input [2]: [state#21, cnt#22] +Condition : (cnt#22 >= 10) + +(37) CometTakeOrderedAndProject +Input [2]: [state#21, cnt#22] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[cnt#22 ASC NULLS FIRST], output=[state#21,cnt#22]), [state#21, cnt#22], 100, [cnt#22 ASC NULLS FIRST], [state#21, cnt#22] + +(38) ColumnarToRow [codegen id : 1] +Input [2]: [state#21, cnt#22] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 14 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* ColumnarToRow (45) ++- CometHashAggregate (44) + +- CometExchange (43) + +- CometHashAggregate (42) + +- CometProject (41) + +- CometFilter (40) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (39) + + +(39) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_month_seq#23, d_year#24, d_moy#25] +Arguments: [d_month_seq#23, d_year#24, d_moy#25] + +(40) CometFilter +Input [3]: [d_month_seq#23, d_year#24, d_moy#25] +Condition : (((isnotnull(d_year#24) AND isnotnull(d_moy#25)) AND (d_year#24 = 2000)) AND (d_moy#25 = 1)) + +(41) CometProject +Input [3]: [d_month_seq#23, d_year#24, d_moy#25] +Arguments: [d_month_seq#23], [d_month_seq#23] + +(42) CometHashAggregate +Input [1]: [d_month_seq#23] +Keys [1]: [d_month_seq#23] +Functions: [] + +(43) CometExchange +Input [1]: [d_month_seq#23] +Arguments: hashpartitioning(d_month_seq#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(44) CometHashAggregate +Input [1]: [d_month_seq#23] +Keys [1]: [d_month_seq#23] +Functions: [] + +(45) ColumnarToRow [codegen id : 1] +Input [1]: [d_month_seq#23] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_datafusion/simplified.txt new file mode 100644 index 0000000000..4ea38af90f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_datafusion/simplified.txt @@ -0,0 +1,50 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [state,cnt] + CometFilter [state,cnt] + CometHashAggregate [state,cnt,ca_state,count,count(1)] + CometExchange [ca_state] #1 + CometHashAggregate [ca_state,count] + CometProject [ca_state] + CometBroadcastHashJoin [ca_state,ss_item_sk,i_item_sk] + CometProject [ca_state,ss_item_sk] + CometBroadcastHashJoin [ca_state,ss_item_sk,ss_sold_date_sk,d_date_sk] + CometProject [ca_state,ss_item_sk,ss_sold_date_sk] + CometBroadcastHashJoin [ca_state,c_customer_sk,ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometProject [ca_state,c_customer_sk] + CometBroadcastHashJoin [ca_address_sk,ca_state,c_customer_sk,c_current_addr_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #2 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [d_month_seq] + CometExchange [d_month_seq] #5 + CometHashAggregate [d_month_seq] + CometProject [d_month_seq] + CometFilter [d_month_seq,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_month_seq,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk] #6 + CometProject [i_item_sk] + CometBroadcastHashJoin [i_item_sk,i_current_price,i_category,avg(i_current_price),i_category] + CometFilter [i_item_sk,i_current_price,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_category] + CometBroadcastExchange [avg(i_current_price),i_category] #7 + CometFilter [avg(i_current_price),i_category] + CometHashAggregate [avg(i_current_price),i_category,sum,count,avg(UnscaledValue(i_current_price))] + CometExchange [i_category] #8 + CometHashAggregate [i_category,sum,count,i_current_price] + CometFilter [i_current_price,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_current_price,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..0443fbeb23 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_iceberg_compat/explain.txt @@ -0,0 +1,265 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometTakeOrderedAndProject (37) + +- CometFilter (36) + +- CometHashAggregate (35) + +- CometExchange (34) + +- CometHashAggregate (33) + +- CometProject (32) + +- CometBroadcastHashJoin (31) + :- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.customer_address (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.customer (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometScan parquet spark_catalog.default.store_sales (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometScan parquet spark_catalog.default.date_dim (13) + +- CometBroadcastExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (20) + : +- CometScan parquet spark_catalog.default.item (19) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometHashAggregate (25) + +- CometExchange (24) + +- CometHashAggregate (23) + +- CometFilter (22) + +- CometScan parquet spark_catalog.default.item (21) + + +(1) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#1, ca_state#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(2) CometFilter +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(3) CometScan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(5) CometBroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: [c_customer_sk#3, c_current_addr_sk#4] + +(6) CometBroadcastHashJoin +Left output [2]: [ca_address_sk#1, ca_state#2] +Right output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: [ca_address_sk#1], [c_current_addr_sk#4], Inner, BuildRight + +(7) CometProject +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] +Arguments: [ca_state#2, c_customer_sk#3], [ca_state#2, c_customer_sk#3] + +(8) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_item_sk#5)) + +(10) CometBroadcastExchange +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(11) CometBroadcastHashJoin +Left output [2]: [ca_state#2, c_customer_sk#3] +Right output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], Inner, BuildRight + +(12) CometProject +Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7], [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_month_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [d_date_sk#8, d_month_seq#9] +Condition : ((isnotnull(d_month_seq#9) AND (d_month_seq#9 = Subquery scalar-subquery#10, [id=#11])) AND isnotnull(d_date_sk#8)) + +(15) CometProject +Input [2]: [d_date_sk#8, d_month_seq#9] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(17) CometBroadcastHashJoin +Left output [3]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(18) CometProject +Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ca_state#2, ss_item_sk#5], [ca_state#2, ss_item_sk#5] + +(19) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_category), IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) CometFilter +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Condition : ((isnotnull(i_current_price#13) AND isnotnull(i_category#14)) AND isnotnull(i_item_sk#12)) + +(21) CometScan parquet spark_catalog.default.item +Output [2]: [i_current_price#15, i_category#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(22) CometFilter +Input [2]: [i_current_price#15, i_category#16] +Condition : isnotnull(i_category#16) + +(23) CometHashAggregate +Input [2]: [i_current_price#15, i_category#16] +Keys [1]: [i_category#16] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#15))] + +(24) CometExchange +Input [3]: [i_category#16, sum#17, count#18] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(25) CometHashAggregate +Input [3]: [i_category#16, sum#17, count#18] +Keys [1]: [i_category#16] +Functions [1]: [avg(UnscaledValue(i_current_price#15))] + +(26) CometFilter +Input [2]: [avg(i_current_price)#19, i_category#16] +Condition : isnotnull(avg(i_current_price)#19) + +(27) CometBroadcastExchange +Input [2]: [avg(i_current_price)#19, i_category#16] +Arguments: [avg(i_current_price)#19, i_category#16] + +(28) CometBroadcastHashJoin +Left output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Right output [2]: [avg(i_current_price)#19, i_category#16] +Arguments: [i_category#14], [i_category#16], Inner, (cast(i_current_price#13 as decimal(14,7)) > (1.2 * avg(i_current_price)#19)), BuildRight + +(29) CometProject +Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#19, i_category#16] +Arguments: [i_item_sk#12], [i_item_sk#12] + +(30) CometBroadcastExchange +Input [1]: [i_item_sk#12] +Arguments: [i_item_sk#12] + +(31) CometBroadcastHashJoin +Left output [2]: [ca_state#2, ss_item_sk#5] +Right output [1]: [i_item_sk#12] +Arguments: [ss_item_sk#5], [i_item_sk#12], Inner, BuildRight + +(32) CometProject +Input [3]: [ca_state#2, ss_item_sk#5, i_item_sk#12] +Arguments: [ca_state#2], [ca_state#2] + +(33) CometHashAggregate +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] + +(34) CometExchange +Input [2]: [ca_state#2, count#20] +Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(35) CometHashAggregate +Input [2]: [ca_state#2, count#20] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] + +(36) CometFilter +Input [2]: [state#21, cnt#22] +Condition : (cnt#22 >= 10) + +(37) CometTakeOrderedAndProject +Input [2]: [state#21, cnt#22] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[cnt#22 ASC NULLS FIRST], output=[state#21,cnt#22]), [state#21, cnt#22], 100, [cnt#22 ASC NULLS FIRST], [state#21, cnt#22] + +(38) ColumnarToRow [codegen id : 1] +Input [2]: [state#21, cnt#22] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 14 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* ColumnarToRow (45) ++- CometHashAggregate (44) + +- CometExchange (43) + +- CometHashAggregate (42) + +- CometProject (41) + +- CometFilter (40) + +- CometScan parquet spark_catalog.default.date_dim (39) + + +(39) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#23, d_year#24, d_moy#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(40) CometFilter +Input [3]: [d_month_seq#23, d_year#24, d_moy#25] +Condition : (((isnotnull(d_year#24) AND isnotnull(d_moy#25)) AND (d_year#24 = 2000)) AND (d_moy#25 = 1)) + +(41) CometProject +Input [3]: [d_month_seq#23, d_year#24, d_moy#25] +Arguments: [d_month_seq#23], [d_month_seq#23] + +(42) CometHashAggregate +Input [1]: [d_month_seq#23] +Keys [1]: [d_month_seq#23] +Functions: [] + +(43) CometExchange +Input [1]: [d_month_seq#23] +Arguments: hashpartitioning(d_month_seq#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(44) CometHashAggregate +Input [1]: [d_month_seq#23] +Keys [1]: [d_month_seq#23] +Functions: [] + +(45) ColumnarToRow [codegen id : 1] +Input [1]: [d_month_seq#23] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..69ab33f6e5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q6.native_iceberg_compat/simplified.txt @@ -0,0 +1,50 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [state,cnt] + CometFilter [state,cnt] + CometHashAggregate [state,cnt,ca_state,count,count(1)] + CometExchange [ca_state] #1 + CometHashAggregate [ca_state,count] + CometProject [ca_state] + CometBroadcastHashJoin [ca_state,ss_item_sk,i_item_sk] + CometProject [ca_state,ss_item_sk] + CometBroadcastHashJoin [ca_state,ss_item_sk,ss_sold_date_sk,d_date_sk] + CometProject [ca_state,ss_item_sk,ss_sold_date_sk] + CometBroadcastHashJoin [ca_state,c_customer_sk,ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometProject [ca_state,c_customer_sk] + CometBroadcastHashJoin [ca_address_sk,ca_state,c_customer_sk,c_current_addr_sk] + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #2 + CometFilter [c_customer_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [d_month_seq] + CometExchange [d_month_seq] #5 + CometHashAggregate [d_month_seq] + CometProject [d_month_seq] + CometFilter [d_month_seq,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk] #6 + CometProject [i_item_sk] + CometBroadcastHashJoin [i_item_sk,i_current_price,i_category,avg(i_current_price),i_category] + CometFilter [i_item_sk,i_current_price,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_category] + CometBroadcastExchange [avg(i_current_price),i_category] #7 + CometFilter [avg(i_current_price),i_category] + CometHashAggregate [avg(i_current_price),i_category,sum,count,avg(UnscaledValue(i_current_price))] + CometExchange [i_category] #8 + CometHashAggregate [i_category,sum,count,i_current_price] + CometFilter [i_current_price,i_category] + CometScan parquet spark_catalog.default.item [i_current_price,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_datafusion/explain.txt new file mode 100644 index 0000000000..e5ada56e73 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_datafusion/explain.txt @@ -0,0 +1,258 @@ +== Physical Plan == +* ColumnarToRow (49) ++- CometTakeOrderedAndProject (48) + +- CometHashAggregate (47) + +- CometExchange (46) + +- CometHashAggregate (45) + +- CometUnion (44) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (9) + : +- CometBroadcastExchange (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (16) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (15) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (17) + :- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometProject (38) + : +- CometBroadcastHashJoin (37) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometProject (32) + : : : +- CometBroadcastHashJoin (31) + : : : :- CometFilter (29) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (28) + : : : +- ReusedExchange (30) + : : +- ReusedExchange (33) + : +- ReusedExchange (36) + +- CometHashAggregate (43) + +- ReusedExchange (42) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5, d_year#6, d_moy#7] + +(4) CometFilter +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 9)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3], [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8, ca_gmt_offset#9] + +(10) CometFilter +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(11) CometProject +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8], [ca_address_sk#8] + +(12) CometBroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: [ca_address_sk#8] + +(13) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Right output [1]: [ca_address_sk#8] +Arguments: [ss_addr_sk#2], [ca_address_sk#8], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] +Arguments: [ss_item_sk#1, ss_ext_sales_price#3], [ss_item_sk#1, ss_ext_sales_price#3] + +(15) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#10, i_item_id#11] +Arguments: [i_item_sk#10, i_item_id#11] + +(16) CometFilter +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(17) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_id#12, i_category#13] +Arguments: [i_item_id#12, i_category#13] + +(18) CometFilter +Input [2]: [i_item_id#12, i_category#13] +Condition : (isnotnull(i_category#13) AND (i_category#13 = Music )) + +(19) CometProject +Input [2]: [i_item_id#12, i_category#13] +Arguments: [i_item_id#12], [i_item_id#12] + +(20) CometBroadcastExchange +Input [1]: [i_item_id#12] +Arguments: [i_item_id#12] + +(21) CometBroadcastHashJoin +Left output [2]: [i_item_sk#10, i_item_id#11] +Right output [1]: [i_item_id#12] +Arguments: [i_item_id#11], [i_item_id#12], LeftSemi, BuildRight + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: [i_item_sk#10, i_item_id#11] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Right output [2]: [i_item_sk#10, i_item_id#11] +Arguments: [ss_item_sk#1], [i_item_sk#10], Inner, BuildRight + +(24) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] +Arguments: [ss_ext_sales_price#3, i_item_id#11], [ss_ext_sales_price#3, i_item_id#11] + +(25) CometHashAggregate +Input [2]: [ss_ext_sales_price#3, i_item_id#11] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] + +(26) CometExchange +Input [2]: [i_item_id#11, sum#14] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [i_item_id#11, sum#14] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] + +(28) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Arguments: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] + +(29) CometFilter +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_bill_addr_sk#15) AND isnotnull(cs_item_sk#16)) + +(30) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#19] + +(31) CometBroadcastHashJoin +Left output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [cs_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(32) CometProject +Input [5]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Arguments: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17], [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] + +(33) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#20] + +(34) CometBroadcastHashJoin +Left output [3]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] +Right output [1]: [ca_address_sk#20] +Arguments: [cs_bill_addr_sk#15], [ca_address_sk#20], Inner, BuildRight + +(35) CometProject +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, ca_address_sk#20] +Arguments: [cs_item_sk#16, cs_ext_sales_price#17], [cs_item_sk#16, cs_ext_sales_price#17] + +(36) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#21, i_item_id#22] + +(37) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#16, cs_ext_sales_price#17] +Right output [2]: [i_item_sk#21, i_item_id#22] +Arguments: [cs_item_sk#16], [i_item_sk#21], Inner, BuildRight + +(38) CometProject +Input [4]: [cs_item_sk#16, cs_ext_sales_price#17, i_item_sk#21, i_item_id#22] +Arguments: [cs_ext_sales_price#17, i_item_id#22], [cs_ext_sales_price#17, i_item_id#22] + +(39) CometHashAggregate +Input [2]: [cs_ext_sales_price#17, i_item_id#22] +Keys [1]: [i_item_id#22] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#17))] + +(40) CometExchange +Input [2]: [i_item_id#22, sum#23] +Arguments: hashpartitioning(i_item_id#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(41) CometHashAggregate +Input [2]: [i_item_id#22, sum#23] +Keys [1]: [i_item_id#22] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#17))] + +(42) ReusedExchange [Reuses operator id: 26] +Output [2]: [i_item_id#24, sum#25] + +(43) CometHashAggregate +Input [2]: [i_item_id#24, sum#25] +Keys [1]: [i_item_id#24] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#26))] + +(44) CometUnion +Child 0 Input [2]: [i_item_id#11, total_sales#27] +Child 1 Input [2]: [i_item_id#22, total_sales#28] +Child 2 Input [2]: [i_item_id#24, total_sales#29] + +(45) CometHashAggregate +Input [2]: [i_item_id#11, total_sales#27] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(total_sales#27)] + +(46) CometExchange +Input [3]: [i_item_id#11, sum#30, isEmpty#31] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(47) CometHashAggregate +Input [3]: [i_item_id#11, sum#30, isEmpty#31] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(total_sales#27)] + +(48) CometTakeOrderedAndProject +Input [2]: [i_item_id#11, total_sales#32] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#11 ASC NULLS FIRST,total_sales#32 ASC NULLS FIRST], output=[i_item_id#11,total_sales#32]), [i_item_id#11, total_sales#32], 100, [i_item_id#11 ASC NULLS FIRST, total_sales#32 ASC NULLS FIRST], [i_item_id#11, total_sales#32] + +(49) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_id#11, total_sales#32] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_datafusion/simplified.txt new file mode 100644 index 0000000000..5bc8aaae31 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_datafusion/simplified.txt @@ -0,0 +1,51 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,total_sales] + CometHashAggregate [i_item_id,total_sales,sum,isEmpty,sum(total_sales)] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,isEmpty,total_sales] + CometUnion [i_item_id,total_sales] + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_item_id] #2 + CometHashAggregate [i_item_id,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ca_address_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ca_address_sk] #4 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_id] + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [i_item_id] #6 + CometProject [i_item_id] + CometFilter [i_item_id,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_id,i_category] + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_item_id] #7 + CometHashAggregate [i_item_id,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,ca_address_sk] + CometProject [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_item_id] #5 + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(ws_ext_sales_price))] + ReusedExchange [i_item_id,sum] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..8b1561ea41 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_iceberg_compat/explain.txt @@ -0,0 +1,344 @@ +== Physical Plan == +* ColumnarToRow (61) ++- CometTakeOrderedAndProject (60) + +- CometHashAggregate (59) + +- CometExchange (58) + +- CometHashAggregate (57) + +- CometUnion (56) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.customer_address (9) + : +- CometBroadcastExchange (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (16) + : : +- CometScan parquet spark_catalog.default.item (15) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometScan parquet spark_catalog.default.item (17) + :- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometProject (38) + : +- CometBroadcastHashJoin (37) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometProject (32) + : : : +- CometBroadcastHashJoin (31) + : : : :- CometFilter (29) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (28) + : : : +- ReusedExchange (30) + : : +- ReusedExchange (33) + : +- ReusedExchange (36) + +- CometHashAggregate (55) + +- CometExchange (54) + +- CometHashAggregate (53) + +- CometProject (52) + +- CometBroadcastHashJoin (51) + :- CometProject (49) + : +- CometBroadcastHashJoin (48) + : :- CometProject (46) + : : +- CometBroadcastHashJoin (45) + : : :- CometFilter (43) + : : : +- CometScan parquet spark_catalog.default.web_sales (42) + : : +- ReusedExchange (44) + : +- ReusedExchange (47) + +- ReusedExchange (50) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 9)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3], [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(9) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(11) CometProject +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8], [ca_address_sk#8] + +(12) CometBroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: [ca_address_sk#8] + +(13) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Right output [1]: [ca_address_sk#8] +Arguments: [ss_addr_sk#2], [ca_address_sk#8], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] +Arguments: [ss_item_sk#1, ss_ext_sales_price#3], [ss_item_sk#1, ss_ext_sales_price#3] + +(15) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#10, i_item_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(16) CometFilter +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(17) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_id#12, i_category#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music )] +ReadSchema: struct + +(18) CometFilter +Input [2]: [i_item_id#12, i_category#13] +Condition : (isnotnull(i_category#13) AND (i_category#13 = Music )) + +(19) CometProject +Input [2]: [i_item_id#12, i_category#13] +Arguments: [i_item_id#12], [i_item_id#12] + +(20) CometBroadcastExchange +Input [1]: [i_item_id#12] +Arguments: [i_item_id#12] + +(21) CometBroadcastHashJoin +Left output [2]: [i_item_sk#10, i_item_id#11] +Right output [1]: [i_item_id#12] +Arguments: [i_item_id#11], [i_item_id#12], LeftSemi, BuildRight + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: [i_item_sk#10, i_item_id#11] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Right output [2]: [i_item_sk#10, i_item_id#11] +Arguments: [ss_item_sk#1], [i_item_sk#10], Inner, BuildRight + +(24) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] +Arguments: [ss_ext_sales_price#3, i_item_id#11], [ss_ext_sales_price#3, i_item_id#11] + +(25) CometHashAggregate +Input [2]: [ss_ext_sales_price#3, i_item_id#11] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] + +(26) CometExchange +Input [2]: [i_item_id#11, sum#14] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [i_item_id#11, sum#14] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] + +(28) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#18)] +PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(29) CometFilter +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_bill_addr_sk#15) AND isnotnull(cs_item_sk#16)) + +(30) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#19] + +(31) CometBroadcastHashJoin +Left output [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [cs_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(32) CometProject +Input [5]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Arguments: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17], [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] + +(33) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#20] + +(34) CometBroadcastHashJoin +Left output [3]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17] +Right output [1]: [ca_address_sk#20] +Arguments: [cs_bill_addr_sk#15], [ca_address_sk#20], Inner, BuildRight + +(35) CometProject +Input [4]: [cs_bill_addr_sk#15, cs_item_sk#16, cs_ext_sales_price#17, ca_address_sk#20] +Arguments: [cs_item_sk#16, cs_ext_sales_price#17], [cs_item_sk#16, cs_ext_sales_price#17] + +(36) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#21, i_item_id#22] + +(37) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#16, cs_ext_sales_price#17] +Right output [2]: [i_item_sk#21, i_item_id#22] +Arguments: [cs_item_sk#16], [i_item_sk#21], Inner, BuildRight + +(38) CometProject +Input [4]: [cs_item_sk#16, cs_ext_sales_price#17, i_item_sk#21, i_item_id#22] +Arguments: [cs_ext_sales_price#17, i_item_id#22], [cs_ext_sales_price#17, i_item_id#22] + +(39) CometHashAggregate +Input [2]: [cs_ext_sales_price#17, i_item_id#22] +Keys [1]: [i_item_id#22] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#17))] + +(40) CometExchange +Input [2]: [i_item_id#22, sum#23] +Arguments: hashpartitioning(i_item_id#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(41) CometHashAggregate +Input [2]: [i_item_id#22, sum#23] +Keys [1]: [i_item_id#22] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#17))] + +(42) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#27)] +PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(43) CometFilter +Input [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27] +Condition : (isnotnull(ws_bill_addr_sk#25) AND isnotnull(ws_item_sk#24)) + +(44) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#28] + +(45) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27] +Right output [1]: [d_date_sk#28] +Arguments: [ws_sold_date_sk#27], [d_date_sk#28], Inner, BuildRight + +(46) CometProject +Input [5]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ws_sold_date_sk#27, d_date_sk#28] +Arguments: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26], [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26] + +(47) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#29] + +(48) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26] +Right output [1]: [ca_address_sk#29] +Arguments: [ws_bill_addr_sk#25], [ca_address_sk#29], Inner, BuildRight + +(49) CometProject +Input [4]: [ws_item_sk#24, ws_bill_addr_sk#25, ws_ext_sales_price#26, ca_address_sk#29] +Arguments: [ws_item_sk#24, ws_ext_sales_price#26], [ws_item_sk#24, ws_ext_sales_price#26] + +(50) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#30, i_item_id#31] + +(51) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#24, ws_ext_sales_price#26] +Right output [2]: [i_item_sk#30, i_item_id#31] +Arguments: [ws_item_sk#24], [i_item_sk#30], Inner, BuildRight + +(52) CometProject +Input [4]: [ws_item_sk#24, ws_ext_sales_price#26, i_item_sk#30, i_item_id#31] +Arguments: [ws_ext_sales_price#26, i_item_id#31], [ws_ext_sales_price#26, i_item_id#31] + +(53) CometHashAggregate +Input [2]: [ws_ext_sales_price#26, i_item_id#31] +Keys [1]: [i_item_id#31] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#26))] + +(54) CometExchange +Input [2]: [i_item_id#31, sum#32] +Arguments: hashpartitioning(i_item_id#31, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(55) CometHashAggregate +Input [2]: [i_item_id#31, sum#32] +Keys [1]: [i_item_id#31] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#26))] + +(56) CometUnion +Child 0 Input [2]: [i_item_id#11, total_sales#33] +Child 1 Input [2]: [i_item_id#22, total_sales#34] +Child 2 Input [2]: [i_item_id#31, total_sales#35] + +(57) CometHashAggregate +Input [2]: [i_item_id#11, total_sales#33] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(total_sales#33)] + +(58) CometExchange +Input [3]: [i_item_id#11, sum#36, isEmpty#37] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(59) CometHashAggregate +Input [3]: [i_item_id#11, sum#36, isEmpty#37] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(total_sales#33)] + +(60) CometTakeOrderedAndProject +Input [2]: [i_item_id#11, total_sales#38] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#11 ASC NULLS FIRST,total_sales#38 ASC NULLS FIRST], output=[i_item_id#11,total_sales#38]), [i_item_id#11, total_sales#38], 100, [i_item_id#11 ASC NULLS FIRST, total_sales#38 ASC NULLS FIRST], [i_item_id#11, total_sales#38] + +(61) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_id#11, total_sales#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..f16c9e0a62 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q60.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,total_sales] + CometHashAggregate [i_item_id,total_sales,sum,isEmpty,sum(total_sales)] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,isEmpty,total_sales] + CometUnion [i_item_id,total_sales] + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_item_id] #2 + CometHashAggregate [i_item_id,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ca_address_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ca_address_sk] #4 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_id] + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] + CometBroadcastExchange [i_item_id] #6 + CometProject [i_item_id] + CometFilter [i_item_id,i_category] + CometScan parquet spark_catalog.default.item [i_item_id,i_category] + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_item_id] #7 + CometHashAggregate [i_item_id,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,ca_address_sk] + CometProject [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_item_id] #5 + CometHashAggregate [i_item_id,total_sales,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [i_item_id] #8 + CometHashAggregate [i_item_id,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ws_item_sk,ws_ext_sales_price,i_item_sk,i_item_id] + CometProject [ws_item_sk,ws_ext_sales_price] + CometBroadcastHashJoin [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ca_address_sk] + CometProject [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + CometBroadcastHashJoin [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_item_id] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_datafusion/explain.txt new file mode 100644 index 0000000000..75c3a8e9c6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_datafusion/explain.txt @@ -0,0 +1,320 @@ +== Physical Plan == +* Project (62) ++- * BroadcastNestedLoopJoin Inner BuildRight (61) + :- * ColumnarToRow (38) + : +- CometHashAggregate (37) + : +- CometExchange (36) + : +- CometHashAggregate (35) + : +- CometProject (34) + : +- CometBroadcastHashJoin (33) + : :- CometProject (28) + : : +- CometBroadcastHashJoin (27) + : : :- CometProject (25) + : : : +- CometBroadcastHashJoin (24) + : : : :- CometProject (20) + : : : : +- CometBroadcastHashJoin (19) + : : : : :- CometProject (14) + : : : : : +- CometBroadcastHashJoin (13) + : : : : : :- CometProject (8) + : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometBroadcastExchange (6) + : : : : : : +- CometProject (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (3) + : : : : : +- CometBroadcastExchange (12) + : : : : : +- CometProject (11) + : : : : : +- CometFilter (10) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (9) + : : : : +- CometBroadcastExchange (18) + : : : : +- CometProject (17) + : : : : +- CometFilter (16) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (15) + : : : +- CometBroadcastExchange (23) + : : : +- CometFilter (22) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (21) + : : +- ReusedExchange (26) + : +- CometBroadcastExchange (32) + : +- CometProject (31) + : +- CometFilter (30) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (29) + +- BroadcastExchange (60) + +- * ColumnarToRow (59) + +- CometHashAggregate (58) + +- CometExchange (57) + +- CometHashAggregate (56) + +- CometProject (55) + +- CometBroadcastHashJoin (54) + :- CometProject (52) + : +- CometBroadcastHashJoin (51) + : :- CometProject (49) + : : +- CometBroadcastHashJoin (48) + : : :- CometProject (46) + : : : +- CometBroadcastHashJoin (45) + : : : :- CometProject (43) + : : : : +- CometBroadcastHashJoin (42) + : : : : :- CometFilter (40) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (39) + : : : : +- ReusedExchange (41) + : : : +- ReusedExchange (44) + : : +- ReusedExchange (47) + : +- ReusedExchange (50) + +- ReusedExchange (53) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_store_sk#3) AND isnotnull(ss_promo_sk#4)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#7, s_gmt_offset#8] +Arguments: [s_store_sk#7, s_gmt_offset#8] + +(4) CometFilter +Input [2]: [s_store_sk#7, s_gmt_offset#8] +Condition : ((isnotnull(s_gmt_offset#8) AND (s_gmt_offset#8 = -5.00)) AND isnotnull(s_store_sk#7)) + +(5) CometProject +Input [2]: [s_store_sk#7, s_gmt_offset#8] +Arguments: [s_store_sk#7], [s_store_sk#7] + +(6) CometBroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(7) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [s_store_sk#7] +Arguments: [ss_store_sk#3], [s_store_sk#7], Inner, BuildRight + +(8) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, s_store_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6], [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(9) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Arguments: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] + +(10) CometFilter +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Condition : ((((p_channel_dmail#10 = Y) OR (p_channel_email#11 = Y)) OR (p_channel_tv#12 = Y)) AND isnotnull(p_promo_sk#9)) + +(11) CometProject +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Arguments: [p_promo_sk#9], [p_promo_sk#9] + +(12) CometBroadcastExchange +Input [1]: [p_promo_sk#9] +Arguments: [p_promo_sk#9] + +(13) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [p_promo_sk#9] +Arguments: [ss_promo_sk#4], [p_promo_sk#9], Inner, BuildRight + +(14) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, p_promo_sk#9] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6], [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(15) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#13, d_year#14, d_moy#15] +Arguments: [d_date_sk#13, d_year#14, d_moy#15] + +(16) CometFilter +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Condition : ((((isnotnull(d_year#14) AND isnotnull(d_moy#15)) AND (d_year#14 = 1998)) AND (d_moy#15 = 11)) AND isnotnull(d_date_sk#13)) + +(17) CometProject +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(18) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(19) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#6], [d_date_sk#13], Inner, BuildRight + +(20) CometProject +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5], [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5] + +(21) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: [c_customer_sk#16, c_current_addr_sk#17] + +(22) CometFilter +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Condition : (isnotnull(c_customer_sk#16) AND isnotnull(c_current_addr_sk#17)) + +(23) CometBroadcastExchange +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: [c_customer_sk#16, c_current_addr_sk#17] + +(24) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5] +Right output [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: [ss_customer_sk#2], [c_customer_sk#16], Inner, BuildRight + +(25) CometProject +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, c_customer_sk#16, c_current_addr_sk#17] +Arguments: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17], [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17] + +(26) ReusedExchange [Reuses operator id: 6] +Output [1]: [ca_address_sk#18] + +(27) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17] +Right output [1]: [ca_address_sk#18] +Arguments: [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight + +(28) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17, ca_address_sk#18] +Arguments: [ss_item_sk#1, ss_ext_sales_price#5], [ss_item_sk#1, ss_ext_sales_price#5] + +(29) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#19, i_category#20] +Arguments: [i_item_sk#19, i_category#20] + +(30) CometFilter +Input [2]: [i_item_sk#19, i_category#20] +Condition : ((isnotnull(i_category#20) AND (i_category#20 = Jewelry )) AND isnotnull(i_item_sk#19)) + +(31) CometProject +Input [2]: [i_item_sk#19, i_category#20] +Arguments: [i_item_sk#19], [i_item_sk#19] + +(32) CometBroadcastExchange +Input [1]: [i_item_sk#19] +Arguments: [i_item_sk#19] + +(33) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#5] +Right output [1]: [i_item_sk#19] +Arguments: [ss_item_sk#1], [i_item_sk#19], Inner, BuildRight + +(34) CometProject +Input [3]: [ss_item_sk#1, ss_ext_sales_price#5, i_item_sk#19] +Arguments: [ss_ext_sales_price#5], [ss_ext_sales_price#5] + +(35) CometHashAggregate +Input [1]: [ss_ext_sales_price#5] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] + +(36) CometExchange +Input [1]: [sum#21] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(37) CometHashAggregate +Input [1]: [sum#21] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] + +(38) ColumnarToRow [codegen id : 2] +Input [1]: [promotions#22] + +(39) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#23, ss_customer_sk#24, ss_store_sk#25, ss_ext_sales_price#26, ss_sold_date_sk#27] +Arguments: [ss_item_sk#23, ss_customer_sk#24, ss_store_sk#25, ss_ext_sales_price#26, ss_sold_date_sk#27] + +(40) CometFilter +Input [5]: [ss_item_sk#23, ss_customer_sk#24, ss_store_sk#25, ss_ext_sales_price#26, ss_sold_date_sk#27] +Condition : ((isnotnull(ss_store_sk#25) AND isnotnull(ss_customer_sk#24)) AND isnotnull(ss_item_sk#23)) + +(41) ReusedExchange [Reuses operator id: 6] +Output [1]: [s_store_sk#28] + +(42) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#23, ss_customer_sk#24, ss_store_sk#25, ss_ext_sales_price#26, ss_sold_date_sk#27] +Right output [1]: [s_store_sk#28] +Arguments: [ss_store_sk#25], [s_store_sk#28], Inner, BuildRight + +(43) CometProject +Input [6]: [ss_item_sk#23, ss_customer_sk#24, ss_store_sk#25, ss_ext_sales_price#26, ss_sold_date_sk#27, s_store_sk#28] +Arguments: [ss_item_sk#23, ss_customer_sk#24, ss_ext_sales_price#26, ss_sold_date_sk#27], [ss_item_sk#23, ss_customer_sk#24, ss_ext_sales_price#26, ss_sold_date_sk#27] + +(44) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#29] + +(45) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#23, ss_customer_sk#24, ss_ext_sales_price#26, ss_sold_date_sk#27] +Right output [1]: [d_date_sk#29] +Arguments: [ss_sold_date_sk#27], [d_date_sk#29], Inner, BuildRight + +(46) CometProject +Input [5]: [ss_item_sk#23, ss_customer_sk#24, ss_ext_sales_price#26, ss_sold_date_sk#27, d_date_sk#29] +Arguments: [ss_item_sk#23, ss_customer_sk#24, ss_ext_sales_price#26], [ss_item_sk#23, ss_customer_sk#24, ss_ext_sales_price#26] + +(47) ReusedExchange [Reuses operator id: 23] +Output [2]: [c_customer_sk#30, c_current_addr_sk#31] + +(48) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#23, ss_customer_sk#24, ss_ext_sales_price#26] +Right output [2]: [c_customer_sk#30, c_current_addr_sk#31] +Arguments: [ss_customer_sk#24], [c_customer_sk#30], Inner, BuildRight + +(49) CometProject +Input [5]: [ss_item_sk#23, ss_customer_sk#24, ss_ext_sales_price#26, c_customer_sk#30, c_current_addr_sk#31] +Arguments: [ss_item_sk#23, ss_ext_sales_price#26, c_current_addr_sk#31], [ss_item_sk#23, ss_ext_sales_price#26, c_current_addr_sk#31] + +(50) ReusedExchange [Reuses operator id: 6] +Output [1]: [ca_address_sk#32] + +(51) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#23, ss_ext_sales_price#26, c_current_addr_sk#31] +Right output [1]: [ca_address_sk#32] +Arguments: [c_current_addr_sk#31], [ca_address_sk#32], Inner, BuildRight + +(52) CometProject +Input [4]: [ss_item_sk#23, ss_ext_sales_price#26, c_current_addr_sk#31, ca_address_sk#32] +Arguments: [ss_item_sk#23, ss_ext_sales_price#26], [ss_item_sk#23, ss_ext_sales_price#26] + +(53) ReusedExchange [Reuses operator id: 32] +Output [1]: [i_item_sk#33] + +(54) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#23, ss_ext_sales_price#26] +Right output [1]: [i_item_sk#33] +Arguments: [ss_item_sk#23], [i_item_sk#33], Inner, BuildRight + +(55) CometProject +Input [3]: [ss_item_sk#23, ss_ext_sales_price#26, i_item_sk#33] +Arguments: [ss_ext_sales_price#26], [ss_ext_sales_price#26] + +(56) CometHashAggregate +Input [1]: [ss_ext_sales_price#26] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#26))] + +(57) CometExchange +Input [1]: [sum#34] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(58) CometHashAggregate +Input [1]: [sum#34] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#26))] + +(59) ColumnarToRow [codegen id : 1] +Input [1]: [total#35] + +(60) BroadcastExchange +Input [1]: [total#35] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(61) BroadcastNestedLoopJoin [codegen id : 2] +Join type: Inner +Join condition: None + +(62) Project [codegen id : 2] +Output [3]: [promotions#22, total#35, ((cast(promotions#22 as decimal(15,4)) / cast(total#35 as decimal(15,4))) * 100) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#36] +Input [2]: [promotions#22, total#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f2bc0148e2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_datafusion/simplified.txt @@ -0,0 +1,67 @@ +WholeStageCodegen (2) + Project [promotions,total] + BroadcastNestedLoopJoin + ColumnarToRow + InputAdapter + CometHashAggregate [promotions,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange #1 + CometHashAggregate [sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,c_current_addr_sk,ca_address_sk] + CometProject [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,c_customer_sk,c_current_addr_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk,p_promo_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk,s_store_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk] #2 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_gmt_offset] + CometBroadcastExchange [p_promo_sk] #3 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #5 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + ReusedExchange [ca_address_sk] #2 + CometBroadcastExchange [i_item_sk] #6 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_category] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [total,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange #8 + CometHashAggregate [sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,c_current_addr_sk,ca_address_sk] + CometProject [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,c_customer_sk,c_current_addr_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,s_store_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + ReusedExchange [s_store_sk] #2 + ReusedExchange [d_date_sk] #4 + ReusedExchange [c_customer_sk,c_current_addr_sk] #5 + ReusedExchange [ca_address_sk] #2 + ReusedExchange [i_item_sk] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..2a009f9c2d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_iceberg_compat/explain.txt @@ -0,0 +1,362 @@ +== Physical Plan == +* Project (65) ++- * BroadcastNestedLoopJoin Inner BuildRight (64) + :- * ColumnarToRow (41) + : +- CometHashAggregate (40) + : +- CometExchange (39) + : +- CometHashAggregate (38) + : +- CometProject (37) + : +- CometBroadcastHashJoin (36) + : :- CometProject (31) + : : +- CometBroadcastHashJoin (30) + : : :- CometProject (25) + : : : +- CometBroadcastHashJoin (24) + : : : :- CometProject (20) + : : : : +- CometBroadcastHashJoin (19) + : : : : :- CometProject (14) + : : : : : +- CometBroadcastHashJoin (13) + : : : : : :- CometProject (8) + : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : : +- CometBroadcastExchange (6) + : : : : : : +- CometProject (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometScan parquet spark_catalog.default.store (3) + : : : : : +- CometBroadcastExchange (12) + : : : : : +- CometProject (11) + : : : : : +- CometFilter (10) + : : : : : +- CometScan parquet spark_catalog.default.promotion (9) + : : : : +- CometBroadcastExchange (18) + : : : : +- CometProject (17) + : : : : +- CometFilter (16) + : : : : +- CometScan parquet spark_catalog.default.date_dim (15) + : : : +- CometBroadcastExchange (23) + : : : +- CometFilter (22) + : : : +- CometScan parquet spark_catalog.default.customer (21) + : : +- CometBroadcastExchange (29) + : : +- CometProject (28) + : : +- CometFilter (27) + : : +- CometScan parquet spark_catalog.default.customer_address (26) + : +- CometBroadcastExchange (35) + : +- CometProject (34) + : +- CometFilter (33) + : +- CometScan parquet spark_catalog.default.item (32) + +- BroadcastExchange (63) + +- * ColumnarToRow (62) + +- CometHashAggregate (61) + +- CometExchange (60) + +- CometHashAggregate (59) + +- CometProject (58) + +- CometBroadcastHashJoin (57) + :- CometProject (55) + : +- CometBroadcastHashJoin (54) + : :- CometProject (52) + : : +- CometBroadcastHashJoin (51) + : : :- CometProject (49) + : : : +- CometBroadcastHashJoin (48) + : : : :- CometProject (46) + : : : : +- CometBroadcastHashJoin (45) + : : : : :- CometFilter (43) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (42) + : : : : +- ReusedExchange (44) + : : : +- ReusedExchange (47) + : : +- ReusedExchange (50) + : +- ReusedExchange (53) + +- ReusedExchange (56) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_store_sk#3) AND isnotnull(ss_promo_sk#4)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#7, s_gmt_offset#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [s_store_sk#7, s_gmt_offset#8] +Condition : ((isnotnull(s_gmt_offset#8) AND (s_gmt_offset#8 = -5.00)) AND isnotnull(s_store_sk#7)) + +(5) CometProject +Input [2]: [s_store_sk#7, s_gmt_offset#8] +Arguments: [s_store_sk#7], [s_store_sk#7] + +(6) CometBroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(7) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [s_store_sk#7] +Arguments: [ss_store_sk#3], [s_store_sk#7], Inner, BuildRight + +(8) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, s_store_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6], [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(9) CometScan parquet spark_catalog.default.promotion +Output [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(10) CometFilter +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Condition : ((((p_channel_dmail#10 = Y) OR (p_channel_email#11 = Y)) OR (p_channel_tv#12 = Y)) AND isnotnull(p_promo_sk#9)) + +(11) CometProject +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Arguments: [p_promo_sk#9], [p_promo_sk#9] + +(12) CometBroadcastExchange +Input [1]: [p_promo_sk#9] +Arguments: [p_promo_sk#9] + +(13) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [p_promo_sk#9] +Arguments: [ss_promo_sk#4], [p_promo_sk#9], Inner, BuildRight + +(14) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, p_promo_sk#9] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6], [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(15) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#13, d_year#14, d_moy#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(16) CometFilter +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Condition : ((((isnotnull(d_year#14) AND isnotnull(d_moy#15)) AND (d_year#14 = 1998)) AND (d_moy#15 = 11)) AND isnotnull(d_date_sk#13)) + +(17) CometProject +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(18) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(19) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#6], [d_date_sk#13], Inner, BuildRight + +(20) CometProject +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5], [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5] + +(21) CometScan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#16, c_current_addr_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(22) CometFilter +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Condition : (isnotnull(c_customer_sk#16) AND isnotnull(c_current_addr_sk#17)) + +(23) CometBroadcastExchange +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: [c_customer_sk#16, c_current_addr_sk#17] + +(24) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5] +Right output [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: [ss_customer_sk#2], [c_customer_sk#16], Inner, BuildRight + +(25) CometProject +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, c_customer_sk#16, c_current_addr_sk#17] +Arguments: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17], [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17] + +(26) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#18, ca_gmt_offset#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(27) CometFilter +Input [2]: [ca_address_sk#18, ca_gmt_offset#19] +Condition : ((isnotnull(ca_gmt_offset#19) AND (ca_gmt_offset#19 = -5.00)) AND isnotnull(ca_address_sk#18)) + +(28) CometProject +Input [2]: [ca_address_sk#18, ca_gmt_offset#19] +Arguments: [ca_address_sk#18], [ca_address_sk#18] + +(29) CometBroadcastExchange +Input [1]: [ca_address_sk#18] +Arguments: [ca_address_sk#18] + +(30) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17] +Right output [1]: [ca_address_sk#18] +Arguments: [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight + +(31) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17, ca_address_sk#18] +Arguments: [ss_item_sk#1, ss_ext_sales_price#5], [ss_item_sk#1, ss_ext_sales_price#5] + +(32) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#20, i_category#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(33) CometFilter +Input [2]: [i_item_sk#20, i_category#21] +Condition : ((isnotnull(i_category#21) AND (i_category#21 = Jewelry )) AND isnotnull(i_item_sk#20)) + +(34) CometProject +Input [2]: [i_item_sk#20, i_category#21] +Arguments: [i_item_sk#20], [i_item_sk#20] + +(35) CometBroadcastExchange +Input [1]: [i_item_sk#20] +Arguments: [i_item_sk#20] + +(36) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#5] +Right output [1]: [i_item_sk#20] +Arguments: [ss_item_sk#1], [i_item_sk#20], Inner, BuildRight + +(37) CometProject +Input [3]: [ss_item_sk#1, ss_ext_sales_price#5, i_item_sk#20] +Arguments: [ss_ext_sales_price#5], [ss_ext_sales_price#5] + +(38) CometHashAggregate +Input [1]: [ss_ext_sales_price#5] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] + +(39) CometExchange +Input [1]: [sum#22] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(40) CometHashAggregate +Input [1]: [sum#22] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] + +(41) ColumnarToRow [codegen id : 2] +Input [1]: [promotions#23] + +(42) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#24, ss_customer_sk#25, ss_store_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#28)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(43) CometFilter +Input [5]: [ss_item_sk#24, ss_customer_sk#25, ss_store_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28] +Condition : ((isnotnull(ss_store_sk#26) AND isnotnull(ss_customer_sk#25)) AND isnotnull(ss_item_sk#24)) + +(44) ReusedExchange [Reuses operator id: 6] +Output [1]: [s_store_sk#29] + +(45) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#24, ss_customer_sk#25, ss_store_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28] +Right output [1]: [s_store_sk#29] +Arguments: [ss_store_sk#26], [s_store_sk#29], Inner, BuildRight + +(46) CometProject +Input [6]: [ss_item_sk#24, ss_customer_sk#25, ss_store_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28, s_store_sk#29] +Arguments: [ss_item_sk#24, ss_customer_sk#25, ss_ext_sales_price#27, ss_sold_date_sk#28], [ss_item_sk#24, ss_customer_sk#25, ss_ext_sales_price#27, ss_sold_date_sk#28] + +(47) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#30] + +(48) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#24, ss_customer_sk#25, ss_ext_sales_price#27, ss_sold_date_sk#28] +Right output [1]: [d_date_sk#30] +Arguments: [ss_sold_date_sk#28], [d_date_sk#30], Inner, BuildRight + +(49) CometProject +Input [5]: [ss_item_sk#24, ss_customer_sk#25, ss_ext_sales_price#27, ss_sold_date_sk#28, d_date_sk#30] +Arguments: [ss_item_sk#24, ss_customer_sk#25, ss_ext_sales_price#27], [ss_item_sk#24, ss_customer_sk#25, ss_ext_sales_price#27] + +(50) ReusedExchange [Reuses operator id: 23] +Output [2]: [c_customer_sk#31, c_current_addr_sk#32] + +(51) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#24, ss_customer_sk#25, ss_ext_sales_price#27] +Right output [2]: [c_customer_sk#31, c_current_addr_sk#32] +Arguments: [ss_customer_sk#25], [c_customer_sk#31], Inner, BuildRight + +(52) CometProject +Input [5]: [ss_item_sk#24, ss_customer_sk#25, ss_ext_sales_price#27, c_customer_sk#31, c_current_addr_sk#32] +Arguments: [ss_item_sk#24, ss_ext_sales_price#27, c_current_addr_sk#32], [ss_item_sk#24, ss_ext_sales_price#27, c_current_addr_sk#32] + +(53) ReusedExchange [Reuses operator id: 29] +Output [1]: [ca_address_sk#33] + +(54) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#24, ss_ext_sales_price#27, c_current_addr_sk#32] +Right output [1]: [ca_address_sk#33] +Arguments: [c_current_addr_sk#32], [ca_address_sk#33], Inner, BuildRight + +(55) CometProject +Input [4]: [ss_item_sk#24, ss_ext_sales_price#27, c_current_addr_sk#32, ca_address_sk#33] +Arguments: [ss_item_sk#24, ss_ext_sales_price#27], [ss_item_sk#24, ss_ext_sales_price#27] + +(56) ReusedExchange [Reuses operator id: 35] +Output [1]: [i_item_sk#34] + +(57) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#24, ss_ext_sales_price#27] +Right output [1]: [i_item_sk#34] +Arguments: [ss_item_sk#24], [i_item_sk#34], Inner, BuildRight + +(58) CometProject +Input [3]: [ss_item_sk#24, ss_ext_sales_price#27, i_item_sk#34] +Arguments: [ss_ext_sales_price#27], [ss_ext_sales_price#27] + +(59) CometHashAggregate +Input [1]: [ss_ext_sales_price#27] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#27))] + +(60) CometExchange +Input [1]: [sum#35] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(61) CometHashAggregate +Input [1]: [sum#35] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#27))] + +(62) ColumnarToRow [codegen id : 1] +Input [1]: [total#36] + +(63) BroadcastExchange +Input [1]: [total#36] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(64) BroadcastNestedLoopJoin [codegen id : 2] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 2] +Output [3]: [promotions#23, total#36, ((cast(promotions#23 as decimal(15,4)) / cast(total#36 as decimal(15,4))) * 100) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#37] +Input [2]: [promotions#23, total#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..e72b12c3d6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q61.native_iceberg_compat/simplified.txt @@ -0,0 +1,70 @@ +WholeStageCodegen (2) + Project [promotions,total] + BroadcastNestedLoopJoin + ColumnarToRow + InputAdapter + CometHashAggregate [promotions,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange #1 + CometHashAggregate [sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,c_current_addr_sk,ca_address_sk] + CometProject [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,c_customer_sk,c_current_addr_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk,p_promo_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk,s_store_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk] #2 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_gmt_offset] + CometScan parquet spark_catalog.default.store [s_store_sk,s_gmt_offset] + CometBroadcastExchange [p_promo_sk] #3 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + CometScan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #5 + CometFilter [c_customer_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [i_item_sk] #7 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_category] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [total,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange #9 + CometHashAggregate [sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,c_current_addr_sk,ca_address_sk] + CometProject [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,c_customer_sk,c_current_addr_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,s_store_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + ReusedExchange [s_store_sk] #2 + ReusedExchange [d_date_sk] #4 + ReusedExchange [c_customer_sk,c_current_addr_sk] #5 + ReusedExchange [ca_address_sk] #6 + ReusedExchange [i_item_sk] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_datafusion/explain.txt new file mode 100644 index 0000000000..d555809a8e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_datafusion/explain.txt @@ -0,0 +1,126 @@ +== Physical Plan == +* ColumnarToRow (24) ++- CometTakeOrderedAndProject (23) + +- CometHashAggregate (22) + +- CometExchange (21) + +- CometHashAggregate (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (10) + : : +- CometBroadcastHashJoin (9) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (3) + : : +- ReusedExchange (8) + : +- ReusedExchange (11) + +- CometBroadcastExchange (17) + +- CometProject (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Arguments: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_ship_mode_sk#3)) AND isnotnull(ws_web_site_sk#2)) AND isnotnull(ws_ship_date_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(4) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(5) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [ws_warehouse_sk#4], [w_warehouse_sk#6], Inner, BuildRight + +(7) CometProject +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7], [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7] + +(8) ReusedExchange [Reuses operator id: 5] +Output [2]: [sm_ship_mode_sk#8, sm_type#9] + +(9) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7] +Right output [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: [ws_ship_mode_sk#3], [sm_ship_mode_sk#8], Inner, BuildRight + +(10) CometProject +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] +Arguments: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9], [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9] + +(11) ReusedExchange [Reuses operator id: 5] +Output [2]: [web_site_sk#10, web_name#11] + +(12) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Right output [2]: [web_site_sk#10, web_name#11] +Arguments: [ws_web_site_sk#2], [web_site_sk#10], Inner, BuildRight + +(13) CometProject +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_site_sk#10, web_name#11] +Arguments: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11], [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11] + +(14) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#12, d_month_seq#13] +Arguments: [d_date_sk#12, d_month_seq#13] + +(15) CometFilter +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) + +(16) CometProject +Input [2]: [d_date_sk#12, d_month_seq#13] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(17) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(18) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11] +Right output [1]: [d_date_sk#12] +Arguments: [ws_ship_date_sk#1], [d_date_sk#12], Inner, BuildRight + +(19) CometProject +Input [6]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11, d_date_sk#12] +Arguments: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, _groupingexpression#14], [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] + +(20) CometHashAggregate +Input [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] +Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(21) CometExchange +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#15, sum#16, sum#17, sum#18, sum#19] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, web_name#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#15, sum#16, sum#17, sum#18, sum#19] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] +Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(23) CometTakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#20, sm_type#9, web_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[substr(w_warehouse_name, 1, 20)#20 ASC NULLS FIRST,sm_type#9 ASC NULLS FIRST,web_name#11 ASC NULLS FIRST], output=[substr(w_warehouse_name, 1, 20)#20,sm_type#9,web_name#11,30 days #21,31 - 60 days #22,61 - 90 days #23,91 - 120 days #24,>120 days #25]), [substr(w_warehouse_name, 1, 20)#20, sm_type#9, web_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25], 100, [substr(w_warehouse_name, 1, 20)#20 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, web_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#20, sm_type#9, web_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] + +(24) ColumnarToRow [codegen id : 1] +Input [8]: [substr(w_warehouse_name, 1, 20)#20, sm_type#9, web_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_datafusion/simplified.txt new file mode 100644 index 0000000000..d7ce3a3184 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_datafusion/simplified.txt @@ -0,0 +1,26 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + CometHashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,_groupingexpression,sum,sum,sum,sum,sum,sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END)] + CometExchange [_groupingexpression,sm_type,web_name] #1 + CometHashAggregate [_groupingexpression,sm_type,web_name,sum,sum,sum,sum,sum,ws_ship_date_sk,ws_sold_date_sk] + CometProject [w_warehouse_name] [ws_ship_date_sk,ws_sold_date_sk,sm_type,web_name,_groupingexpression] + CometBroadcastHashJoin [ws_ship_date_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_name,d_date_sk] + CometProject [ws_ship_date_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_name] + CometBroadcastHashJoin [ws_ship_date_sk,ws_web_site_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_site_sk,web_name] + CometProject [ws_ship_date_sk,ws_web_site_sk,ws_sold_date_sk,w_warehouse_name,sm_type] + CometBroadcastHashJoin [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,w_warehouse_name,sm_ship_mode_sk,sm_type] + CometProject [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,w_warehouse_name] + CometBroadcastHashJoin [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk,w_warehouse_sk,w_warehouse_name] + CometFilter [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #2 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + ReusedExchange [sm_ship_mode_sk,sm_type] #2 + ReusedExchange [web_site_sk,web_name] #2 + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..c174784d38 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_iceberg_compat/explain.txt @@ -0,0 +1,163 @@ +== Physical Plan == +* ColumnarToRow (28) ++- CometTakeOrderedAndProject (27) + +- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometProject (23) + +- CometBroadcastHashJoin (22) + :- CometProject (17) + : +- CometBroadcastHashJoin (16) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.warehouse (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometScan parquet spark_catalog.default.ship_mode (8) + : +- CometBroadcastExchange (15) + : +- CometFilter (14) + : +- CometScan parquet spark_catalog.default.web_site (13) + +- CometBroadcastExchange (21) + +- CometProject (20) + +- CometFilter (19) + +- CometScan parquet spark_catalog.default.date_dim (18) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_ship_date_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_ship_mode_sk#3)) AND isnotnull(ws_web_site_sk#2)) AND isnotnull(ws_ship_date_sk#1)) + +(3) CometScan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(5) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [ws_warehouse_sk#4], [w_warehouse_sk#6], Inner, BuildRight + +(7) CometProject +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7], [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7] + +(8) CometScan parquet spark_catalog.default.ship_mode +Output [2]: [sm_ship_mode_sk#8, sm_type#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Condition : isnotnull(sm_ship_mode_sk#8) + +(10) CometBroadcastExchange +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: [sm_ship_mode_sk#8, sm_type#9] + +(11) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7] +Right output [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: [ws_ship_mode_sk#3], [sm_ship_mode_sk#8], Inner, BuildRight + +(12) CometProject +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] +Arguments: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9], [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9] + +(13) CometScan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#10, web_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [web_site_sk#10, web_name#11] +Condition : isnotnull(web_site_sk#10) + +(15) CometBroadcastExchange +Input [2]: [web_site_sk#10, web_name#11] +Arguments: [web_site_sk#10, web_name#11] + +(16) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Right output [2]: [web_site_sk#10, web_name#11] +Arguments: [ws_web_site_sk#2], [web_site_sk#10], Inner, BuildRight + +(17) CometProject +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_site_sk#10, web_name#11] +Arguments: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11], [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11] + +(18) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_month_seq#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) CometFilter +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) + +(20) CometProject +Input [2]: [d_date_sk#12, d_month_seq#13] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(21) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(22) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11] +Right output [1]: [d_date_sk#12] +Arguments: [ws_ship_date_sk#1], [d_date_sk#12], Inner, BuildRight + +(23) CometProject +Input [6]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11, d_date_sk#12] +Arguments: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, _groupingexpression#14], [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] + +(24) CometHashAggregate +Input [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] +Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(25) CometExchange +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#15, sum#16, sum#17, sum#18, sum#19] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, web_name#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(26) CometHashAggregate +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#15, sum#16, sum#17, sum#18, sum#19] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] +Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(27) CometTakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#20, sm_type#9, web_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[substr(w_warehouse_name, 1, 20)#20 ASC NULLS FIRST,sm_type#9 ASC NULLS FIRST,web_name#11 ASC NULLS FIRST], output=[substr(w_warehouse_name, 1, 20)#20,sm_type#9,web_name#11,30 days #21,31 - 60 days #22,61 - 90 days #23,91 - 120 days #24,>120 days #25]), [substr(w_warehouse_name, 1, 20)#20, sm_type#9, web_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25], 100, [substr(w_warehouse_name, 1, 20)#20 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, web_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#20, sm_type#9, web_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] + +(28) ColumnarToRow [codegen id : 1] +Input [8]: [substr(w_warehouse_name, 1, 20)#20, sm_type#9, web_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..6a9b1ced35 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q62.native_iceberg_compat/simplified.txt @@ -0,0 +1,30 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + CometHashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,_groupingexpression,sum,sum,sum,sum,sum,sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END)] + CometExchange [_groupingexpression,sm_type,web_name] #1 + CometHashAggregate [_groupingexpression,sm_type,web_name,sum,sum,sum,sum,sum,ws_ship_date_sk,ws_sold_date_sk] + CometProject [w_warehouse_name] [ws_ship_date_sk,ws_sold_date_sk,sm_type,web_name,_groupingexpression] + CometBroadcastHashJoin [ws_ship_date_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_name,d_date_sk] + CometProject [ws_ship_date_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_name] + CometBroadcastHashJoin [ws_ship_date_sk,ws_web_site_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_site_sk,web_name] + CometProject [ws_ship_date_sk,ws_web_site_sk,ws_sold_date_sk,w_warehouse_name,sm_type] + CometBroadcastHashJoin [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,w_warehouse_name,sm_ship_mode_sk,sm_type] + CometProject [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,w_warehouse_name] + CometBroadcastHashJoin [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk,w_warehouse_sk,w_warehouse_name] + CometFilter [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #2 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [sm_ship_mode_sk,sm_type] #3 + CometFilter [sm_ship_mode_sk,sm_type] + CometScan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_type] + CometBroadcastExchange [web_site_sk,web_name] #4 + CometFilter [web_site_sk,web_name] + CometScan parquet spark_catalog.default.web_site [web_site_sk,web_name] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_datafusion/explain.txt new file mode 100644 index 0000000000..455002dee6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_datafusion/explain.txt @@ -0,0 +1,152 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * Project (28) + +- * Filter (27) + +- Window (26) + +- * ColumnarToRow (25) + +- CometSort (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometExchange (21) + +- CometHashAggregate (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (14) + : +- CometBroadcastHashJoin (13) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : +- CometBroadcastExchange (6) + : : +- CometFilter (5) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + : +- CometBroadcastExchange (12) + : +- CometProject (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + +- CometBroadcastExchange (17) + +- CometFilter (16) + +- CometNativeScan: `spark_catalog`.`default`.`store` (15) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Arguments: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(2) CometFilter +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Condition : ((((i_category#4 IN (Books ,Children ,Electronics ) AND i_class#3 IN (personal ,portable ,refernece ,self-help )) AND i_brand#2 IN (scholaramalgamalg #6 ,scholaramalgamalg #7 ,exportiunivamalg #8 ,scholaramalgamalg #8 )) OR ((i_category#4 IN (Women ,Music ,Men ) AND i_class#3 IN (accessories ,classical ,fragrances ,pants )) AND i_brand#2 IN (amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ))) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Arguments: [i_item_sk#1, i_manager_id#5], [i_item_sk#1, i_manager_id#5] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(5) CometFilter +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) + +(6) CometBroadcastExchange +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(7) CometBroadcastHashJoin +Left output [2]: [i_item_sk#1, i_manager_id#5] +Right output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_item_sk#1], [ss_item_sk#10], Inner, BuildRight + +(8) CometProject +Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13], [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Arguments: [d_date_sk#14, d_month_seq#15, d_moy#16] + +(10) CometFilter +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Condition : (d_month_seq#15 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#14)) + +(11) CometProject +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Arguments: [d_date_sk#14, d_moy#16], [d_date_sk#14, d_moy#16] + +(12) CometBroadcastExchange +Input [2]: [d_date_sk#14, d_moy#16] +Arguments: [d_date_sk#14, d_moy#16] + +(13) CometBroadcastHashJoin +Left output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Right output [2]: [d_date_sk#14, d_moy#16] +Arguments: [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + +(14) CometProject +Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_moy#16] +Arguments: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16], [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16] + +(15) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#17] +Arguments: [s_store_sk#17] + +(16) CometFilter +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: [s_store_sk#17] + +(18) CometBroadcastHashJoin +Left output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16] +Right output [1]: [s_store_sk#17] +Arguments: [ss_store_sk#11], [s_store_sk#17], Inner, BuildRight + +(19) CometProject +Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16, s_store_sk#17] +Arguments: [i_manager_id#5, ss_sales_price#12, d_moy#16], [i_manager_id#5, ss_sales_price#12, d_moy#16] + +(20) CometHashAggregate +Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Keys [2]: [i_manager_id#5, d_moy#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] + +(21) CometExchange +Input [3]: [i_manager_id#5, d_moy#16, sum#18] +Arguments: hashpartitioning(i_manager_id#5, d_moy#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [3]: [i_manager_id#5, d_moy#16, sum#18] +Keys [2]: [i_manager_id#5, d_moy#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] + +(23) CometExchange +Input [3]: [i_manager_id#5, sum_sales#19, _w0#20] +Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(24) CometSort +Input [3]: [i_manager_id#5, sum_sales#19, _w0#20] +Arguments: [i_manager_id#5, sum_sales#19, _w0#20], [i_manager_id#5 ASC NULLS FIRST] + +(25) ColumnarToRow [codegen id : 1] +Input [3]: [i_manager_id#5, sum_sales#19, _w0#20] + +(26) Window +Input [3]: [i_manager_id#5, sum_sales#19, _w0#20] +Arguments: [avg(_w0#20) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#21], [i_manager_id#5] + +(27) Filter [codegen id : 2] +Input [4]: [i_manager_id#5, sum_sales#19, _w0#20, avg_monthly_sales#21] +Condition : CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN ((abs((sum_sales#19 - avg_monthly_sales#21)) / avg_monthly_sales#21) > 0.1000000000000000) ELSE false END + +(28) Project [codegen id : 2] +Output [3]: [i_manager_id#5, sum_sales#19, avg_monthly_sales#21] +Input [4]: [i_manager_id#5, sum_sales#19, _w0#20, avg_monthly_sales#21] + +(29) TakeOrderedAndProject +Input [3]: [i_manager_id#5, sum_sales#19, avg_monthly_sales#21] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#21 ASC NULLS FIRST, sum_sales#19 ASC NULLS FIRST], [i_manager_id#5, sum_sales#19, avg_monthly_sales#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_datafusion/simplified.txt new file mode 100644 index 0000000000..081555fe51 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_datafusion/simplified.txt @@ -0,0 +1,33 @@ +TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] + WholeStageCodegen (2) + Project [i_manager_id,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_manager_id,sum_sales,_w0] + CometExchange [i_manager_id] #1 + CometHashAggregate [i_manager_id,sum_sales,_w0,d_moy,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [i_manager_id,d_moy] #2 + CometHashAggregate [i_manager_id,d_moy,sum,ss_sales_price] + CometProject [i_manager_id,ss_sales_price,d_moy] + CometBroadcastHashJoin [i_manager_id,ss_store_sk,ss_sales_price,d_moy,s_store_sk] + CometProject [i_manager_id,ss_store_sk,ss_sales_price,d_moy] + CometBroadcastHashJoin [i_manager_id,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_moy] + CometProject [i_manager_id,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_manager_id,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometProject [i_item_sk,i_manager_id] + CometFilter [i_item_sk,i_brand,i_class,i_category,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_manager_id] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_moy] #4 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_month_seq,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq,d_moy] + CometBroadcastExchange [s_store_sk] #5 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..39bed8e33d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_iceberg_compat/explain.txt @@ -0,0 +1,165 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * Project (28) + +- * Filter (27) + +- Window (26) + +- * ColumnarToRow (25) + +- CometSort (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometExchange (21) + +- CometHashAggregate (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (14) + : +- CometBroadcastHashJoin (13) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.item (1) + : : +- CometBroadcastExchange (6) + : : +- CometFilter (5) + : : +- CometScan parquet spark_catalog.default.store_sales (4) + : +- CometBroadcastExchange (12) + : +- CometProject (11) + : +- CometFilter (10) + : +- CometScan parquet spark_catalog.default.date_dim (9) + +- CometBroadcastExchange (17) + +- CometFilter (16) + +- CometScan parquet spark_catalog.default.store (15) + + +(1) CometScan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(And(In(i_category, [Books ,Children ,Electronics ]),In(i_class, [personal ,portable ,refernece ,self-help ])),In(i_brand, [exportiunivamalg #6 ,scholaramalgamalg #7 ,scholaramalgamalg #8 ,scholaramalgamalg #6 ])),And(And(In(i_category, [Men ,Music ,Women ]),In(i_class, [accessories ,classical ,fragrances ,pants ])),In(i_brand, [amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Condition : ((((i_category#4 IN (Books ,Children ,Electronics ) AND i_class#3 IN (personal ,portable ,refernece ,self-help )) AND i_brand#2 IN (scholaramalgamalg #7 ,scholaramalgamalg #8 ,exportiunivamalg #6 ,scholaramalgamalg #6 )) OR ((i_category#4 IN (Women ,Music ,Men ) AND i_class#3 IN (accessories ,classical ,fragrances ,pants )) AND i_brand#2 IN (amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ))) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Arguments: [i_item_sk#1, i_manager_id#5], [i_item_sk#1, i_manager_id#5] + +(4) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#13)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) CometFilter +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) + +(6) CometBroadcastExchange +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(7) CometBroadcastHashJoin +Left output [2]: [i_item_sk#1, i_manager_id#5] +Right output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_item_sk#1], [ss_item_sk#10], Inner, BuildRight + +(8) CometProject +Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13], [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(9) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_month_seq, [1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Condition : (d_month_seq#15 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#14)) + +(11) CometProject +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Arguments: [d_date_sk#14, d_moy#16], [d_date_sk#14, d_moy#16] + +(12) CometBroadcastExchange +Input [2]: [d_date_sk#14, d_moy#16] +Arguments: [d_date_sk#14, d_moy#16] + +(13) CometBroadcastHashJoin +Left output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Right output [2]: [d_date_sk#14, d_moy#16] +Arguments: [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + +(14) CometProject +Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_moy#16] +Arguments: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16], [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16] + +(15) CometScan parquet spark_catalog.default.store +Output [1]: [s_store_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(16) CometFilter +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: [s_store_sk#17] + +(18) CometBroadcastHashJoin +Left output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16] +Right output [1]: [s_store_sk#17] +Arguments: [ss_store_sk#11], [s_store_sk#17], Inner, BuildRight + +(19) CometProject +Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16, s_store_sk#17] +Arguments: [i_manager_id#5, ss_sales_price#12, d_moy#16], [i_manager_id#5, ss_sales_price#12, d_moy#16] + +(20) CometHashAggregate +Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Keys [2]: [i_manager_id#5, d_moy#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] + +(21) CometExchange +Input [3]: [i_manager_id#5, d_moy#16, sum#18] +Arguments: hashpartitioning(i_manager_id#5, d_moy#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [3]: [i_manager_id#5, d_moy#16, sum#18] +Keys [2]: [i_manager_id#5, d_moy#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] + +(23) CometExchange +Input [3]: [i_manager_id#5, sum_sales#19, _w0#20] +Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(24) CometSort +Input [3]: [i_manager_id#5, sum_sales#19, _w0#20] +Arguments: [i_manager_id#5, sum_sales#19, _w0#20], [i_manager_id#5 ASC NULLS FIRST] + +(25) ColumnarToRow [codegen id : 1] +Input [3]: [i_manager_id#5, sum_sales#19, _w0#20] + +(26) Window +Input [3]: [i_manager_id#5, sum_sales#19, _w0#20] +Arguments: [avg(_w0#20) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#21], [i_manager_id#5] + +(27) Filter [codegen id : 2] +Input [4]: [i_manager_id#5, sum_sales#19, _w0#20, avg_monthly_sales#21] +Condition : CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN ((abs((sum_sales#19 - avg_monthly_sales#21)) / avg_monthly_sales#21) > 0.1000000000000000) ELSE false END + +(28) Project [codegen id : 2] +Output [3]: [i_manager_id#5, sum_sales#19, avg_monthly_sales#21] +Input [4]: [i_manager_id#5, sum_sales#19, _w0#20, avg_monthly_sales#21] + +(29) TakeOrderedAndProject +Input [3]: [i_manager_id#5, sum_sales#19, avg_monthly_sales#21] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#21 ASC NULLS FIRST, sum_sales#19 ASC NULLS FIRST], [i_manager_id#5, sum_sales#19, avg_monthly_sales#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..2d65cbf15f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q63.native_iceberg_compat/simplified.txt @@ -0,0 +1,33 @@ +TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] + WholeStageCodegen (2) + Project [i_manager_id,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_manager_id,sum_sales,_w0] + CometExchange [i_manager_id] #1 + CometHashAggregate [i_manager_id,sum_sales,_w0,d_moy,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [i_manager_id,d_moy] #2 + CometHashAggregate [i_manager_id,d_moy,sum,ss_sales_price] + CometProject [i_manager_id,ss_sales_price,d_moy] + CometBroadcastHashJoin [i_manager_id,ss_store_sk,ss_sales_price,d_moy,s_store_sk] + CometProject [i_manager_id,ss_store_sk,ss_sales_price,d_moy] + CometBroadcastHashJoin [i_manager_id,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_moy] + CometProject [i_manager_id,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_manager_id,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometProject [i_item_sk,i_manager_id] + CometFilter [i_item_sk,i_brand,i_class,i_category,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_moy] #4 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_month_seq,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_moy] + CometBroadcastExchange [s_store_sk] #5 + CometFilter [s_store_sk] + CometScan parquet spark_catalog.default.store [s_store_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_datafusion/explain.txt new file mode 100644 index 0000000000..dffc690465 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_datafusion/explain.txt @@ -0,0 +1,837 @@ +== Physical Plan == +* ColumnarToRow (163) ++- CometSort (162) + +- CometColumnarExchange (161) + +- CometProject (160) + +- CometSortMergeJoin (159) + :- CometSort (98) + : +- CometExchange (97) + : +- CometHashAggregate (96) + : +- CometHashAggregate (95) + : +- CometProject (94) + : +- CometBroadcastHashJoin (93) + : :- CometProject (88) + : : +- CometBroadcastHashJoin (87) + : : :- CometProject (85) + : : : +- CometBroadcastHashJoin (84) + : : : :- CometProject (82) + : : : : +- CometBroadcastHashJoin (81) + : : : : :- CometProject (79) + : : : : : +- CometBroadcastHashJoin (78) + : : : : : :- CometProject (74) + : : : : : : +- CometBroadcastHashJoin (73) + : : : : : : :- CometProject (71) + : : : : : : : +- CometBroadcastHashJoin (70) + : : : : : : : :- CometProject (66) + : : : : : : : : +- CometBroadcastHashJoin (65) + : : : : : : : : :- CometProject (61) + : : : : : : : : : +- CometBroadcastHashJoin (60) + : : : : : : : : : :- CometProject (58) + : : : : : : : : : : +- CometBroadcastHashJoin (57) + : : : : : : : : : : :- CometProject (53) + : : : : : : : : : : : +- CometBroadcastHashJoin (52) + : : : : : : : : : : : :- CometProject (50) + : : : : : : : : : : : : +- CometBroadcastHashJoin (49) + : : : : : : : : : : : : :- CometProject (45) + : : : : : : : : : : : : : +- CometBroadcastHashJoin (44) + : : : : : : : : : : : : : :- CometProject (40) + : : : : : : : : : : : : : : +- CometBroadcastHashJoin (39) + : : : : : : : : : : : : : : :- CometProject (35) + : : : : : : : : : : : : : : : +- CometBroadcastHashJoin (34) + : : : : : : : : : : : : : : : :- CometProject (30) + : : : : : : : : : : : : : : : : +- CometSortMergeJoin (29) + : : : : : : : : : : : : : : : : :- CometSort (10) + : : : : : : : : : : : : : : : : : +- CometExchange (9) + : : : : : : : : : : : : : : : : : +- CometProject (8) + : : : : : : : : : : : : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : : : : : : : : : : : : :- CometBroadcastExchange (3) + : : : : : : : : : : : : : : : : : : +- CometFilter (2) + : : : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : : : : : : : : : : : : +- CometProject (6) + : : : : : : : : : : : : : : : : : +- CometFilter (5) + : : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (4) + : : : : : : : : : : : : : : : : +- CometSort (28) + : : : : : : : : : : : : : : : : +- CometProject (27) + : : : : : : : : : : : : : : : : +- CometFilter (26) + : : : : : : : : : : : : : : : : +- CometHashAggregate (25) + : : : : : : : : : : : : : : : : +- CometExchange (24) + : : : : : : : : : : : : : : : : +- CometHashAggregate (23) + : : : : : : : : : : : : : : : : +- CometProject (22) + : : : : : : : : : : : : : : : : +- CometSortMergeJoin (21) + : : : : : : : : : : : : : : : : :- CometSort (15) + : : : : : : : : : : : : : : : : : +- CometExchange (14) + : : : : : : : : : : : : : : : : : +- CometProject (13) + : : : : : : : : : : : : : : : : : +- CometFilter (12) + : : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (11) + : : : : : : : : : : : : : : : : +- CometSort (20) + : : : : : : : : : : : : : : : : +- CometExchange (19) + : : : : : : : : : : : : : : : : +- CometProject (18) + : : : : : : : : : : : : : : : : +- CometFilter (17) + : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (16) + : : : : : : : : : : : : : : : +- CometBroadcastExchange (33) + : : : : : : : : : : : : : : : +- CometFilter (32) + : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (31) + : : : : : : : : : : : : : : +- CometBroadcastExchange (38) + : : : : : : : : : : : : : : +- CometFilter (37) + : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (36) + : : : : : : : : : : : : : +- CometBroadcastExchange (43) + : : : : : : : : : : : : : +- CometFilter (42) + : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (41) + : : : : : : : : : : : : +- CometBroadcastExchange (48) + : : : : : : : : : : : : +- CometFilter (47) + : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (46) + : : : : : : : : : : : +- ReusedExchange (51) + : : : : : : : : : : +- CometBroadcastExchange (56) + : : : : : : : : : : +- CometFilter (55) + : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (54) + : : : : : : : : : +- ReusedExchange (59) + : : : : : : : : +- CometBroadcastExchange (64) + : : : : : : : : +- CometFilter (63) + : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (62) + : : : : : : : +- CometBroadcastExchange (69) + : : : : : : : +- CometFilter (68) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (67) + : : : : : : +- ReusedExchange (72) + : : : : : +- CometBroadcastExchange (77) + : : : : : +- CometFilter (76) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (75) + : : : : +- ReusedExchange (80) + : : : +- ReusedExchange (83) + : : +- ReusedExchange (86) + : +- CometBroadcastExchange (92) + : +- CometProject (91) + : +- CometFilter (90) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (89) + +- CometSort (158) + +- CometExchange (157) + +- CometHashAggregate (156) + +- CometHashAggregate (155) + +- CometProject (154) + +- CometBroadcastHashJoin (153) + :- CometProject (151) + : +- CometBroadcastHashJoin (150) + : :- CometProject (148) + : : +- CometBroadcastHashJoin (147) + : : :- CometProject (145) + : : : +- CometBroadcastHashJoin (144) + : : : :- CometProject (142) + : : : : +- CometBroadcastHashJoin (141) + : : : : :- CometProject (139) + : : : : : +- CometBroadcastHashJoin (138) + : : : : : :- CometProject (136) + : : : : : : +- CometBroadcastHashJoin (135) + : : : : : : :- CometProject (133) + : : : : : : : +- CometBroadcastHashJoin (132) + : : : : : : : :- CometProject (130) + : : : : : : : : +- CometBroadcastHashJoin (129) + : : : : : : : : :- CometProject (127) + : : : : : : : : : +- CometBroadcastHashJoin (126) + : : : : : : : : : :- CometProject (124) + : : : : : : : : : : +- CometBroadcastHashJoin (123) + : : : : : : : : : : :- CometProject (121) + : : : : : : : : : : : +- CometBroadcastHashJoin (120) + : : : : : : : : : : : :- CometProject (118) + : : : : : : : : : : : : +- CometBroadcastHashJoin (117) + : : : : : : : : : : : : :- CometProject (115) + : : : : : : : : : : : : : +- CometBroadcastHashJoin (114) + : : : : : : : : : : : : : :- CometProject (112) + : : : : : : : : : : : : : : +- CometBroadcastHashJoin (111) + : : : : : : : : : : : : : : :- CometProject (107) + : : : : : : : : : : : : : : : +- CometSortMergeJoin (106) + : : : : : : : : : : : : : : : :- CometSort (100) + : : : : : : : : : : : : : : : : +- ReusedExchange (99) + : : : : : : : : : : : : : : : +- CometSort (105) + : : : : : : : : : : : : : : : +- CometProject (104) + : : : : : : : : : : : : : : : +- CometFilter (103) + : : : : : : : : : : : : : : : +- CometHashAggregate (102) + : : : : : : : : : : : : : : : +- ReusedExchange (101) + : : : : : : : : : : : : : : +- CometBroadcastExchange (110) + : : : : : : : : : : : : : : +- CometFilter (109) + : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (108) + : : : : : : : : : : : : : +- ReusedExchange (113) + : : : : : : : : : : : : +- ReusedExchange (116) + : : : : : : : : : : : +- ReusedExchange (119) + : : : : : : : : : : +- ReusedExchange (122) + : : : : : : : : : +- ReusedExchange (125) + : : : : : : : : +- ReusedExchange (128) + : : : : : : : +- ReusedExchange (131) + : : : : : : +- ReusedExchange (134) + : : : : : +- ReusedExchange (137) + : : : : +- ReusedExchange (140) + : : : +- ReusedExchange (143) + : : +- ReusedExchange (146) + : +- ReusedExchange (149) + +- ReusedExchange (152) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(2) CometFilter +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) + +(3) CometBroadcastExchange +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Arguments: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(5) CometFilter +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(6) CometProject +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Arguments: [sr_item_sk#13, sr_ticket_number#14], [sr_item_sk#13, sr_ticket_number#14] + +(7) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [2]: [sr_item_sk#13, sr_ticket_number#14] +Arguments: [ss_item_sk#1, ss_ticket_number#8], [sr_item_sk#13, sr_ticket_number#14], Inner, BuildLeft + +(8) CometProject +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#13, sr_ticket_number#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(9) CometExchange +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometSort +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1 ASC NULLS FIRST] + +(11) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(12) CometFilter +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(13) CometProject +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18], [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] + +(14) CometExchange +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: hashpartitioning(cs_item_sk#16, cs_order_number#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(15) CometSort +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18], [cs_item_sk#16 ASC NULLS FIRST, cs_order_number#17 ASC NULLS FIRST] + +(16) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(17) CometFilter +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Condition : (isnotnull(cr_item_sk#20) AND isnotnull(cr_order_number#21)) + +(18) CometProject +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(19) CometExchange +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: hashpartitioning(cr_item_sk#20, cr_order_number#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometSort +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cr_item_sk#20 ASC NULLS FIRST, cr_order_number#21 ASC NULLS FIRST] + +(21) CometSortMergeJoin +Left output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Right output [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cs_item_sk#16, cs_order_number#17], [cr_item_sk#20, cr_order_number#21], Inner + +(22) CometProject +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(23) CometHashAggregate +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] + +(24) CometExchange +Input [4]: [cs_item_sk#16, sum#26, sum#27, isEmpty#28] +Arguments: hashpartitioning(cs_item_sk#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(25) CometHashAggregate +Input [4]: [cs_item_sk#16, sum#26, sum#27, isEmpty#28] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] + +(26) CometFilter +Input [3]: [cs_item_sk#16, sale#29, refund#30] +Condition : ((isnotnull(sale#29) AND isnotnull(refund#30)) AND (cast(sale#29 as decimal(21,2)) > (2 * refund#30))) + +(27) CometProject +Input [3]: [cs_item_sk#16, sale#29, refund#30] +Arguments: [cs_item_sk#16], [cs_item_sk#16] + +(28) CometSort +Input [1]: [cs_item_sk#16] +Arguments: [cs_item_sk#16], [cs_item_sk#16 ASC NULLS FIRST] + +(29) CometSortMergeJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [1]: [cs_item_sk#16] +Arguments: [ss_item_sk#1], [cs_item_sk#16], Inner + +(30) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#16] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(31) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#31, d_year#32] +Arguments: [d_date_sk#31, d_year#32] + +(32) CometFilter +Input [2]: [d_date_sk#31, d_year#32] +Condition : ((isnotnull(d_year#32) AND (d_year#32 = 1999)) AND isnotnull(d_date_sk#31)) + +(33) CometBroadcastExchange +Input [2]: [d_date_sk#31, d_year#32] +Arguments: [d_date_sk#31, d_year#32] + +(34) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [2]: [d_date_sk#31, d_year#32] +Arguments: [ss_sold_date_sk#12], [d_date_sk#31], Inner, BuildRight + +(35) CometProject +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#31, d_year#32] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32] + +(36) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [s_store_sk#33, s_store_name#34, s_zip#35] + +(37) CometFilter +Input [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Condition : ((isnotnull(s_store_sk#33) AND isnotnull(s_store_name#34)) AND isnotnull(s_zip#35)) + +(38) CometBroadcastExchange +Input [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [s_store_sk#33, s_store_name#34, s_zip#35] + +(39) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32] +Right output [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [ss_store_sk#6], [s_store_sk#33], Inner, BuildRight + +(40) CometProject +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35] + +(41) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] + +(42) CometFilter +Input [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Condition : (((((isnotnull(c_customer_sk#36) AND isnotnull(c_first_sales_date_sk#41)) AND isnotnull(c_first_shipto_date_sk#40)) AND isnotnull(c_current_cdemo_sk#37)) AND isnotnull(c_current_hdemo_sk#38)) AND isnotnull(c_current_addr_sk#39)) + +(43) CometBroadcastExchange +Input [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] + +(44) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35] +Right output [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [ss_customer_sk#2], [c_customer_sk#36], Inner, BuildRight + +(45) CometProject +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] + +(46) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#42, d_year#43] +Arguments: [d_date_sk#42, d_year#43] + +(47) CometFilter +Input [2]: [d_date_sk#42, d_year#43] +Condition : isnotnull(d_date_sk#42) + +(48) CometBroadcastExchange +Input [2]: [d_date_sk#42, d_year#43] +Arguments: [d_date_sk#42, d_year#43] + +(49) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Right output [2]: [d_date_sk#42, d_year#43] +Arguments: [c_first_sales_date_sk#41], [d_date_sk#42], Inner, BuildRight + +(50) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41, d_date_sk#42, d_year#43] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43] + +(51) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#44, d_year#45] + +(52) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43] +Right output [2]: [d_date_sk#44, d_year#45] +Arguments: [c_first_shipto_date_sk#40], [d_date_sk#44], Inner, BuildRight + +(53) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43, d_date_sk#44, d_year#45] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(54) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [2]: [cd_demo_sk#46, cd_marital_status#47] +Arguments: [cd_demo_sk#46, cd_marital_status#47] + +(55) CometFilter +Input [2]: [cd_demo_sk#46, cd_marital_status#47] +Condition : (isnotnull(cd_demo_sk#46) AND isnotnull(cd_marital_status#47)) + +(56) CometBroadcastExchange +Input [2]: [cd_demo_sk#46, cd_marital_status#47] +Arguments: [cd_demo_sk#46, cd_marital_status#47] + +(57) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [2]: [cd_demo_sk#46, cd_marital_status#47] +Arguments: [ss_cdemo_sk#3], [cd_demo_sk#46], Inner, BuildRight + +(58) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_demo_sk#46, cd_marital_status#47] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47] + +(59) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#48, cd_marital_status#49] + +(60) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47] +Right output [2]: [cd_demo_sk#48, cd_marital_status#49] +Arguments: [c_current_cdemo_sk#37], [cd_demo_sk#48], Inner, NOT (cd_marital_status#47 = cd_marital_status#49), BuildRight + +(61) CometProject +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47, cd_demo_sk#48, cd_marital_status#49] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(62) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [1]: [p_promo_sk#50] +Arguments: [p_promo_sk#50] + +(63) CometFilter +Input [1]: [p_promo_sk#50] +Condition : isnotnull(p_promo_sk#50) + +(64) CometBroadcastExchange +Input [1]: [p_promo_sk#50] +Arguments: [p_promo_sk#50] + +(65) CometBroadcastHashJoin +Left output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [1]: [p_promo_sk#50] +Arguments: [ss_promo_sk#7], [p_promo_sk#50], Inner, BuildRight + +(66) CometProject +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, p_promo_sk#50] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(67) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [hd_demo_sk#51, hd_income_band_sk#52] + +(68) CometFilter +Input [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Condition : (isnotnull(hd_demo_sk#51) AND isnotnull(hd_income_band_sk#52)) + +(69) CometBroadcastExchange +Input [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [hd_demo_sk#51, hd_income_band_sk#52] + +(70) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [ss_hdemo_sk#4], [hd_demo_sk#51], Inner, BuildRight + +(71) CometProject +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52], [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52] + +(72) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#53, hd_income_band_sk#54] + +(73) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52] +Right output [2]: [hd_demo_sk#53, hd_income_band_sk#54] +Arguments: [c_current_hdemo_sk#38], [hd_demo_sk#53], Inner, BuildRight + +(74) CometProject +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_demo_sk#53, hd_income_band_sk#54] +Arguments: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54], [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54] + +(75) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] + +(76) CometFilter +Input [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Condition : isnotnull(ca_address_sk#55) + +(77) CometBroadcastExchange +Input [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] + +(78) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54] +Right output [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ss_addr_sk#5], [ca_address_sk#55], Inner, BuildRight + +(79) CometProject +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] + +(80) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(81) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Right output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: [c_current_addr_sk#39], [ca_address_sk#60], Inner, BuildRight + +(82) CometProject +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(83) ReusedExchange [Reuses operator id: 64] +Output [1]: [ib_income_band_sk#65] + +(84) CometBroadcastHashJoin +Left output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [1]: [ib_income_band_sk#65] +Arguments: [hd_income_band_sk#52], [ib_income_band_sk#65], Inner, BuildRight + +(85) CometProject +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ib_income_band_sk#65] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(86) ReusedExchange [Reuses operator id: 64] +Output [1]: [ib_income_band_sk#66] + +(87) CometBroadcastHashJoin +Left output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [1]: [ib_income_band_sk#66] +Arguments: [hd_income_band_sk#54], [ib_income_band_sk#66], Inner, BuildRight + +(88) CometProject +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ib_income_band_sk#66] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(89) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Arguments: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] + +(90) CometFilter +Input [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Condition : ((((((isnotnull(i_current_price#68) AND i_color#69 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#68 >= 64.00)) AND (i_current_price#68 <= 74.00)) AND (i_current_price#68 >= 65.00)) AND (i_current_price#68 <= 79.00)) AND isnotnull(i_item_sk#67)) + +(91) CometProject +Input [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Arguments: [i_item_sk#67, i_product_name#70], [i_item_sk#67, i_product_name#70] + +(92) CometBroadcastExchange +Input [2]: [i_item_sk#67, i_product_name#70] +Arguments: [i_item_sk#67, i_product_name#70] + +(93) CometBroadcastHashJoin +Left output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [2]: [i_item_sk#67, i_product_name#70] +Arguments: [ss_item_sk#1], [i_item_sk#67], Inner, BuildRight + +(94) CometProject +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] +Arguments: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70], [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] + +(95) CometHashAggregate +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] +Keys [15]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] + +(96) CometHashAggregate +Input [19]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45, count#71, sum#72, sum#73, sum#74] +Keys [15]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] + +(97) CometExchange +Input [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Arguments: hashpartitioning(item_sk#76, store_name#77, store_zip#78, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(98) CometSort +Input [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Arguments: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91], [item_sk#76 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, store_zip#78 ASC NULLS FIRST] + +(99) ReusedExchange [Reuses operator id: 9] +Output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] + +(100) CometSort +Input [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102], [ss_item_sk#92 ASC NULLS FIRST] + +(101) ReusedExchange [Reuses operator id: 24] +Output [4]: [cs_item_sk#103, sum#104, sum#105, isEmpty#106] + +(102) CometHashAggregate +Input [4]: [cs_item_sk#103, sum#104, sum#105, isEmpty#106] +Keys [1]: [cs_item_sk#103] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#107)), sum(((cr_refunded_cash#108 + cr_reversed_charge#109) + cr_store_credit#110))] + +(103) CometFilter +Input [3]: [cs_item_sk#103, sale#29, refund#30] +Condition : ((isnotnull(sale#29) AND isnotnull(refund#30)) AND (cast(sale#29 as decimal(21,2)) > (2 * refund#30))) + +(104) CometProject +Input [3]: [cs_item_sk#103, sale#29, refund#30] +Arguments: [cs_item_sk#103], [cs_item_sk#103] + +(105) CometSort +Input [1]: [cs_item_sk#103] +Arguments: [cs_item_sk#103], [cs_item_sk#103 ASC NULLS FIRST] + +(106) CometSortMergeJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Right output [1]: [cs_item_sk#103] +Arguments: [ss_item_sk#92], [cs_item_sk#103], Inner + +(107) CometProject +Input [12]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102, cs_item_sk#103] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] + +(108) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#111, d_year#112] +Arguments: [d_date_sk#111, d_year#112] + +(109) CometFilter +Input [2]: [d_date_sk#111, d_year#112] +Condition : ((isnotnull(d_year#112) AND (d_year#112 = 2000)) AND isnotnull(d_date_sk#111)) + +(110) CometBroadcastExchange +Input [2]: [d_date_sk#111, d_year#112] +Arguments: [d_date_sk#111, d_year#112] + +(111) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Right output [2]: [d_date_sk#111, d_year#112] +Arguments: [ss_sold_date_sk#102], [d_date_sk#111], Inner, BuildRight + +(112) CometProject +Input [13]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102, d_date_sk#111, d_year#112] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112] + +(113) ReusedExchange [Reuses operator id: 38] +Output [3]: [s_store_sk#113, s_store_name#114, s_zip#115] + +(114) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112] +Right output [3]: [s_store_sk#113, s_store_name#114, s_zip#115] +Arguments: [ss_store_sk#97], [s_store_sk#113], Inner, BuildRight + +(115) CometProject +Input [14]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_sk#113, s_store_name#114, s_zip#115] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115] + +(116) ReusedExchange [Reuses operator id: 43] +Output [6]: [c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] + +(117) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115] +Right output [6]: [c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Arguments: [ss_customer_sk#93], [c_customer_sk#116], Inner, BuildRight + +(118) CometProject +Input [18]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] + +(119) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#122, d_year#123] + +(120) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Right output [2]: [d_date_sk#122, d_year#123] +Arguments: [c_first_sales_date_sk#121], [d_date_sk#122], Inner, BuildRight + +(121) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121, d_date_sk#122, d_year#123] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123] + +(122) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#124, d_year#125] + +(123) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123] +Right output [2]: [d_date_sk#124, d_year#125] +Arguments: [c_first_shipto_date_sk#120], [d_date_sk#124], Inner, BuildRight + +(124) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123, d_date_sk#124, d_year#125] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(125) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#126, cd_marital_status#127] + +(126) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [2]: [cd_demo_sk#126, cd_marital_status#127] +Arguments: [ss_cdemo_sk#94], [cd_demo_sk#126], Inner, BuildRight + +(127) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_demo_sk#126, cd_marital_status#127] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127] + +(128) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#128, cd_marital_status#129] + +(129) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127] +Right output [2]: [cd_demo_sk#128, cd_marital_status#129] +Arguments: [c_current_cdemo_sk#117], [cd_demo_sk#128], Inner, NOT (cd_marital_status#127 = cd_marital_status#129), BuildRight + +(130) CometProject +Input [18]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127, cd_demo_sk#128, cd_marital_status#129] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(131) ReusedExchange [Reuses operator id: 64] +Output [1]: [p_promo_sk#130] + +(132) CometBroadcastHashJoin +Left output [14]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [1]: [p_promo_sk#130] +Arguments: [ss_promo_sk#98], [p_promo_sk#130], Inner, BuildRight + +(133) CometProject +Input [15]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, p_promo_sk#130] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(134) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#131, hd_income_band_sk#132] + +(135) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [2]: [hd_demo_sk#131, hd_income_band_sk#132] +Arguments: [ss_hdemo_sk#95], [hd_demo_sk#131], Inner, BuildRight + +(136) CometProject +Input [15]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_demo_sk#131, hd_income_band_sk#132] +Arguments: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132], [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132] + +(137) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#133, hd_income_band_sk#134] + +(138) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132] +Right output [2]: [hd_demo_sk#133, hd_income_band_sk#134] +Arguments: [c_current_hdemo_sk#118], [hd_demo_sk#133], Inner, BuildRight + +(139) CometProject +Input [15]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_demo_sk#133, hd_income_band_sk#134] +Arguments: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134], [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134] + +(140) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] + +(141) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134] +Right output [5]: [ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Arguments: [ss_addr_sk#96], [ca_address_sk#135], Inner, BuildRight + +(142) CometProject +Input [18]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] + +(143) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(144) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Right output [5]: [ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Arguments: [c_current_addr_sk#119], [ca_address_sk#140], Inner, BuildRight + +(145) CometProject +Input [21]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(146) ReusedExchange [Reuses operator id: 64] +Output [1]: [ib_income_band_sk#145] + +(147) CometBroadcastHashJoin +Left output [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [1]: [ib_income_band_sk#145] +Arguments: [hd_income_band_sk#132], [ib_income_band_sk#145], Inner, BuildRight + +(148) CometProject +Input [20]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, ib_income_band_sk#145] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(149) ReusedExchange [Reuses operator id: 64] +Output [1]: [ib_income_band_sk#146] + +(150) CometBroadcastHashJoin +Left output [18]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [1]: [ib_income_band_sk#146] +Arguments: [hd_income_band_sk#134], [ib_income_band_sk#146], Inner, BuildRight + +(151) CometProject +Input [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, ib_income_band_sk#146] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(152) ReusedExchange [Reuses operator id: 92] +Output [2]: [i_item_sk#147, i_product_name#148] + +(153) CometBroadcastHashJoin +Left output [17]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [2]: [i_item_sk#147, i_product_name#148] +Arguments: [ss_item_sk#92], [i_item_sk#147], Inner, BuildRight + +(154) CometProject +Input [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] +Arguments: [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148], [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] + +(155) CometHashAggregate +Input [18]: [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] +Keys [15]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#99)), partial_sum(UnscaledValue(ss_list_price#100)), partial_sum(UnscaledValue(ss_coupon_amt#101))] + +(156) CometHashAggregate +Input [19]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125, count#71, sum#149, sum#150, sum#151] +Keys [15]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#99)), sum(UnscaledValue(ss_list_price#100)), sum(UnscaledValue(ss_coupon_amt#101))] + +(157) CometExchange +Input [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: hashpartitioning(item_sk#152, store_name#153, store_zip#154, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(158) CometSort +Input [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159], [item_sk#152 ASC NULLS FIRST, store_name#153 ASC NULLS FIRST, store_zip#154 ASC NULLS FIRST] + +(159) CometSortMergeJoin +Left output [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Right output [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [item_sk#76, store_name#77, store_zip#78], [item_sk#152, store_name#153, store_zip#154], Inner, (cnt#156 <= cnt#88) + +(160) CometProject +Input [25]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156], [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] + +(161) CometColumnarExchange +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] +Arguments: rangepartitioning(product_name#75 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, cnt#156 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=7] + +(162) CometSort +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] +Arguments: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156], [product_name#75 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, cnt#156 ASC NULLS FIRST] + +(163) ColumnarToRow [codegen id : 1] +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ef0fff5d7a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_datafusion/simplified.txt @@ -0,0 +1,165 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + CometColumnarExchange [product_name,store_name,cnt] #1 + CometProject [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + CometSortMergeJoin [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,item_sk,store_name,store_zip,syear,cnt,s1,s2,s3] + CometSort [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3] + CometExchange [item_sk,store_name,store_zip] #2 + CometHashAggregate [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt))] + CometHashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + CometProject [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,p_promo_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_sk,s_store_name,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,cs_item_sk] + CometSort [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometExchange [ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #4 + CometFilter [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometSort [cs_item_sk] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,sale,refund] + CometHashAggregate [cs_item_sk,sale,refund,sum,sum,isEmpty,sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))] + CometExchange [cs_item_sk] #5 + CometHashAggregate [cs_item_sk,sum,sum,isEmpty,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometProject [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_ext_list_price,cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometSort [cs_item_sk,cs_order_number,cs_ext_list_price] + CometExchange [cs_item_sk,cs_order_number] #6 + CometProject [cs_item_sk,cs_order_number,cs_ext_list_price] + CometFilter [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometExchange [cr_item_sk,cr_order_number] #7 + CometProject [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometFilter [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #8 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_store_name,s_zip] #9 + CometFilter [s_store_sk,s_store_name,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_zip] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #11 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + ReusedExchange [d_date_sk,d_year] #11 + CometBroadcastExchange [cd_demo_sk,cd_marital_status] #12 + CometFilter [cd_demo_sk,cd_marital_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status] + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + CometBroadcastExchange [p_promo_sk] #13 + CometFilter [p_promo_sk] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk] + CometBroadcastExchange [hd_demo_sk,hd_income_band_sk] #14 + CometFilter [hd_demo_sk,hd_income_band_sk] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_income_band_sk] + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + CometBroadcastExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + CometFilter [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + ReusedExchange [ib_income_band_sk] #13 + ReusedExchange [ib_income_band_sk] #13 + CometBroadcastExchange [i_item_sk,i_product_name] #16 + CometProject [i_item_sk,i_product_name] + CometFilter [i_item_sk,i_current_price,i_color,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_color,i_product_name] + CometSort [item_sk,store_name,store_zip,syear,cnt,s1,s2,s3] + CometExchange [item_sk,store_name,store_zip] #17 + CometHashAggregate [item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt))] + CometHashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + CometProject [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,p_promo_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_sk,s_store_name,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,cs_item_sk] + CometSort [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #3 + CometSort [cs_item_sk] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,sale,refund] + CometHashAggregate [cs_item_sk,sale,refund,sum,sum,isEmpty,sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))] + ReusedExchange [cs_item_sk,sum,sum,isEmpty] #5 + CometBroadcastExchange [d_date_sk,d_year] #18 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + ReusedExchange [s_store_sk,s_store_name,s_zip] #9 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + ReusedExchange [d_date_sk,d_year] #11 + ReusedExchange [d_date_sk,d_year] #11 + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + ReusedExchange [p_promo_sk] #13 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + ReusedExchange [ib_income_band_sk] #13 + ReusedExchange [ib_income_band_sk] #13 + ReusedExchange [i_item_sk,i_product_name] #16 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..121e15e05c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_iceberg_compat/explain.txt @@ -0,0 +1,894 @@ +== Physical Plan == +* ColumnarToRow (165) ++- CometSort (164) + +- CometColumnarExchange (163) + +- CometProject (162) + +- CometSortMergeJoin (161) + :- CometSort (100) + : +- CometExchange (99) + : +- CometHashAggregate (98) + : +- CometHashAggregate (97) + : +- CometProject (96) + : +- CometBroadcastHashJoin (95) + : :- CometProject (90) + : : +- CometBroadcastHashJoin (89) + : : :- CometProject (87) + : : : +- CometBroadcastHashJoin (86) + : : : :- CometProject (82) + : : : : +- CometBroadcastHashJoin (81) + : : : : :- CometProject (79) + : : : : : +- CometBroadcastHashJoin (78) + : : : : : :- CometProject (74) + : : : : : : +- CometBroadcastHashJoin (73) + : : : : : : :- CometProject (71) + : : : : : : : +- CometBroadcastHashJoin (70) + : : : : : : : :- CometProject (66) + : : : : : : : : +- CometBroadcastHashJoin (65) + : : : : : : : : :- CometProject (61) + : : : : : : : : : +- CometBroadcastHashJoin (60) + : : : : : : : : : :- CometProject (58) + : : : : : : : : : : +- CometBroadcastHashJoin (57) + : : : : : : : : : : :- CometProject (53) + : : : : : : : : : : : +- CometBroadcastHashJoin (52) + : : : : : : : : : : : :- CometProject (50) + : : : : : : : : : : : : +- CometBroadcastHashJoin (49) + : : : : : : : : : : : : :- CometProject (45) + : : : : : : : : : : : : : +- CometBroadcastHashJoin (44) + : : : : : : : : : : : : : :- CometProject (40) + : : : : : : : : : : : : : : +- CometBroadcastHashJoin (39) + : : : : : : : : : : : : : : :- CometProject (35) + : : : : : : : : : : : : : : : +- CometBroadcastHashJoin (34) + : : : : : : : : : : : : : : : :- CometProject (30) + : : : : : : : : : : : : : : : : +- CometSortMergeJoin (29) + : : : : : : : : : : : : : : : : :- CometSort (10) + : : : : : : : : : : : : : : : : : +- CometExchange (9) + : : : : : : : : : : : : : : : : : +- CometProject (8) + : : : : : : : : : : : : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : : : : : : : : : : : : :- CometBroadcastExchange (3) + : : : : : : : : : : : : : : : : : : +- CometFilter (2) + : : : : : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : : : : : : : : : : : : : +- CometProject (6) + : : : : : : : : : : : : : : : : : +- CometFilter (5) + : : : : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.store_returns (4) + : : : : : : : : : : : : : : : : +- CometSort (28) + : : : : : : : : : : : : : : : : +- CometProject (27) + : : : : : : : : : : : : : : : : +- CometFilter (26) + : : : : : : : : : : : : : : : : +- CometHashAggregate (25) + : : : : : : : : : : : : : : : : +- CometExchange (24) + : : : : : : : : : : : : : : : : +- CometHashAggregate (23) + : : : : : : : : : : : : : : : : +- CometProject (22) + : : : : : : : : : : : : : : : : +- CometSortMergeJoin (21) + : : : : : : : : : : : : : : : : :- CometSort (15) + : : : : : : : : : : : : : : : : : +- CometExchange (14) + : : : : : : : : : : : : : : : : : +- CometProject (13) + : : : : : : : : : : : : : : : : : +- CometFilter (12) + : : : : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (11) + : : : : : : : : : : : : : : : : +- CometSort (20) + : : : : : : : : : : : : : : : : +- CometExchange (19) + : : : : : : : : : : : : : : : : +- CometProject (18) + : : : : : : : : : : : : : : : : +- CometFilter (17) + : : : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.catalog_returns (16) + : : : : : : : : : : : : : : : +- CometBroadcastExchange (33) + : : : : : : : : : : : : : : : +- CometFilter (32) + : : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.date_dim (31) + : : : : : : : : : : : : : : +- CometBroadcastExchange (38) + : : : : : : : : : : : : : : +- CometFilter (37) + : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.store (36) + : : : : : : : : : : : : : +- CometBroadcastExchange (43) + : : : : : : : : : : : : : +- CometFilter (42) + : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.customer (41) + : : : : : : : : : : : : +- CometBroadcastExchange (48) + : : : : : : : : : : : : +- CometFilter (47) + : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.date_dim (46) + : : : : : : : : : : : +- ReusedExchange (51) + : : : : : : : : : : +- CometBroadcastExchange (56) + : : : : : : : : : : +- CometFilter (55) + : : : : : : : : : : +- CometScan parquet spark_catalog.default.customer_demographics (54) + : : : : : : : : : +- ReusedExchange (59) + : : : : : : : : +- CometBroadcastExchange (64) + : : : : : : : : +- CometFilter (63) + : : : : : : : : +- CometScan parquet spark_catalog.default.promotion (62) + : : : : : : : +- CometBroadcastExchange (69) + : : : : : : : +- CometFilter (68) + : : : : : : : +- CometScan parquet spark_catalog.default.household_demographics (67) + : : : : : : +- ReusedExchange (72) + : : : : : +- CometBroadcastExchange (77) + : : : : : +- CometFilter (76) + : : : : : +- CometScan parquet spark_catalog.default.customer_address (75) + : : : : +- ReusedExchange (80) + : : : +- CometBroadcastExchange (85) + : : : +- CometFilter (84) + : : : +- CometScan parquet spark_catalog.default.income_band (83) + : : +- ReusedExchange (88) + : +- CometBroadcastExchange (94) + : +- CometProject (93) + : +- CometFilter (92) + : +- CometScan parquet spark_catalog.default.item (91) + +- CometSort (160) + +- CometExchange (159) + +- CometHashAggregate (158) + +- CometHashAggregate (157) + +- CometProject (156) + +- CometBroadcastHashJoin (155) + :- CometProject (153) + : +- CometBroadcastHashJoin (152) + : :- CometProject (150) + : : +- CometBroadcastHashJoin (149) + : : :- CometProject (147) + : : : +- CometBroadcastHashJoin (146) + : : : :- CometProject (144) + : : : : +- CometBroadcastHashJoin (143) + : : : : :- CometProject (141) + : : : : : +- CometBroadcastHashJoin (140) + : : : : : :- CometProject (138) + : : : : : : +- CometBroadcastHashJoin (137) + : : : : : : :- CometProject (135) + : : : : : : : +- CometBroadcastHashJoin (134) + : : : : : : : :- CometProject (132) + : : : : : : : : +- CometBroadcastHashJoin (131) + : : : : : : : : :- CometProject (129) + : : : : : : : : : +- CometBroadcastHashJoin (128) + : : : : : : : : : :- CometProject (126) + : : : : : : : : : : +- CometBroadcastHashJoin (125) + : : : : : : : : : : :- CometProject (123) + : : : : : : : : : : : +- CometBroadcastHashJoin (122) + : : : : : : : : : : : :- CometProject (120) + : : : : : : : : : : : : +- CometBroadcastHashJoin (119) + : : : : : : : : : : : : :- CometProject (117) + : : : : : : : : : : : : : +- CometBroadcastHashJoin (116) + : : : : : : : : : : : : : :- CometProject (114) + : : : : : : : : : : : : : : +- CometBroadcastHashJoin (113) + : : : : : : : : : : : : : : :- CometProject (109) + : : : : : : : : : : : : : : : +- CometSortMergeJoin (108) + : : : : : : : : : : : : : : : :- CometSort (102) + : : : : : : : : : : : : : : : : +- ReusedExchange (101) + : : : : : : : : : : : : : : : +- CometSort (107) + : : : : : : : : : : : : : : : +- CometProject (106) + : : : : : : : : : : : : : : : +- CometFilter (105) + : : : : : : : : : : : : : : : +- CometHashAggregate (104) + : : : : : : : : : : : : : : : +- ReusedExchange (103) + : : : : : : : : : : : : : : +- CometBroadcastExchange (112) + : : : : : : : : : : : : : : +- CometFilter (111) + : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.date_dim (110) + : : : : : : : : : : : : : +- ReusedExchange (115) + : : : : : : : : : : : : +- ReusedExchange (118) + : : : : : : : : : : : +- ReusedExchange (121) + : : : : : : : : : : +- ReusedExchange (124) + : : : : : : : : : +- ReusedExchange (127) + : : : : : : : : +- ReusedExchange (130) + : : : : : : : +- ReusedExchange (133) + : : : : : : +- ReusedExchange (136) + : : : : : +- ReusedExchange (139) + : : : : +- ReusedExchange (142) + : : : +- ReusedExchange (145) + : : +- ReusedExchange (148) + : +- ReusedExchange (151) + +- ReusedExchange (154) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) CometFilter +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) + +(3) CometBroadcastExchange +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(4) CometScan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(6) CometProject +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Arguments: [sr_item_sk#13, sr_ticket_number#14], [sr_item_sk#13, sr_ticket_number#14] + +(7) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [2]: [sr_item_sk#13, sr_ticket_number#14] +Arguments: [ss_item_sk#1, ss_ticket_number#8], [sr_item_sk#13, sr_ticket_number#14], Inner, BuildLeft + +(8) CometProject +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#13, sr_ticket_number#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(9) CometExchange +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometSort +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1 ASC NULLS FIRST] + +(11) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(12) CometFilter +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(13) CometProject +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18], [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] + +(14) CometExchange +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: hashpartitioning(cs_item_sk#16, cs_order_number#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(15) CometSort +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18], [cs_item_sk#16 ASC NULLS FIRST, cs_order_number#17 ASC NULLS FIRST] + +(16) CometScan parquet spark_catalog.default.catalog_returns +Output [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(17) CometFilter +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Condition : (isnotnull(cr_item_sk#20) AND isnotnull(cr_order_number#21)) + +(18) CometProject +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(19) CometExchange +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: hashpartitioning(cr_item_sk#20, cr_order_number#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometSort +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cr_item_sk#20 ASC NULLS FIRST, cr_order_number#21 ASC NULLS FIRST] + +(21) CometSortMergeJoin +Left output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Right output [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cs_item_sk#16, cs_order_number#17], [cr_item_sk#20, cr_order_number#21], Inner + +(22) CometProject +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(23) CometHashAggregate +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] + +(24) CometExchange +Input [4]: [cs_item_sk#16, sum#26, sum#27, isEmpty#28] +Arguments: hashpartitioning(cs_item_sk#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(25) CometHashAggregate +Input [4]: [cs_item_sk#16, sum#26, sum#27, isEmpty#28] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] + +(26) CometFilter +Input [3]: [cs_item_sk#16, sale#29, refund#30] +Condition : ((isnotnull(sale#29) AND isnotnull(refund#30)) AND (cast(sale#29 as decimal(21,2)) > (2 * refund#30))) + +(27) CometProject +Input [3]: [cs_item_sk#16, sale#29, refund#30] +Arguments: [cs_item_sk#16], [cs_item_sk#16] + +(28) CometSort +Input [1]: [cs_item_sk#16] +Arguments: [cs_item_sk#16], [cs_item_sk#16 ASC NULLS FIRST] + +(29) CometSortMergeJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [1]: [cs_item_sk#16] +Arguments: [ss_item_sk#1], [cs_item_sk#16], Inner + +(30) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#16] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(31) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#31, d_year#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(32) CometFilter +Input [2]: [d_date_sk#31, d_year#32] +Condition : ((isnotnull(d_year#32) AND (d_year#32 = 1999)) AND isnotnull(d_date_sk#31)) + +(33) CometBroadcastExchange +Input [2]: [d_date_sk#31, d_year#32] +Arguments: [d_date_sk#31, d_year#32] + +(34) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [2]: [d_date_sk#31, d_year#32] +Arguments: [ss_sold_date_sk#12], [d_date_sk#31], Inner, BuildRight + +(35) CometProject +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#31, d_year#32] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32] + +(36) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] +ReadSchema: struct + +(37) CometFilter +Input [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Condition : ((isnotnull(s_store_sk#33) AND isnotnull(s_store_name#34)) AND isnotnull(s_zip#35)) + +(38) CometBroadcastExchange +Input [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [s_store_sk#33, s_store_name#34, s_zip#35] + +(39) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32] +Right output [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [ss_store_sk#6], [s_store_sk#33], Inner, BuildRight + +(40) CometProject +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35] + +(41) CometScan parquet spark_catalog.default.customer +Output [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(42) CometFilter +Input [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Condition : (((((isnotnull(c_customer_sk#36) AND isnotnull(c_first_sales_date_sk#41)) AND isnotnull(c_first_shipto_date_sk#40)) AND isnotnull(c_current_cdemo_sk#37)) AND isnotnull(c_current_hdemo_sk#38)) AND isnotnull(c_current_addr_sk#39)) + +(43) CometBroadcastExchange +Input [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] + +(44) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35] +Right output [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [ss_customer_sk#2], [c_customer_sk#36], Inner, BuildRight + +(45) CometProject +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] + +(46) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#42, d_year#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(47) CometFilter +Input [2]: [d_date_sk#42, d_year#43] +Condition : isnotnull(d_date_sk#42) + +(48) CometBroadcastExchange +Input [2]: [d_date_sk#42, d_year#43] +Arguments: [d_date_sk#42, d_year#43] + +(49) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Right output [2]: [d_date_sk#42, d_year#43] +Arguments: [c_first_sales_date_sk#41], [d_date_sk#42], Inner, BuildRight + +(50) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41, d_date_sk#42, d_year#43] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43] + +(51) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#44, d_year#45] + +(52) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43] +Right output [2]: [d_date_sk#44, d_year#45] +Arguments: [c_first_shipto_date_sk#40], [d_date_sk#44], Inner, BuildRight + +(53) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43, d_date_sk#44, d_year#45] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(54) CometScan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#46, cd_marital_status#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(55) CometFilter +Input [2]: [cd_demo_sk#46, cd_marital_status#47] +Condition : (isnotnull(cd_demo_sk#46) AND isnotnull(cd_marital_status#47)) + +(56) CometBroadcastExchange +Input [2]: [cd_demo_sk#46, cd_marital_status#47] +Arguments: [cd_demo_sk#46, cd_marital_status#47] + +(57) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [2]: [cd_demo_sk#46, cd_marital_status#47] +Arguments: [ss_cdemo_sk#3], [cd_demo_sk#46], Inner, BuildRight + +(58) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_demo_sk#46, cd_marital_status#47] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47] + +(59) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#48, cd_marital_status#49] + +(60) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47] +Right output [2]: [cd_demo_sk#48, cd_marital_status#49] +Arguments: [c_current_cdemo_sk#37], [cd_demo_sk#48], Inner, NOT (cd_marital_status#47 = cd_marital_status#49), BuildRight + +(61) CometProject +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47, cd_demo_sk#48, cd_marital_status#49] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(62) CometScan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(63) CometFilter +Input [1]: [p_promo_sk#50] +Condition : isnotnull(p_promo_sk#50) + +(64) CometBroadcastExchange +Input [1]: [p_promo_sk#50] +Arguments: [p_promo_sk#50] + +(65) CometBroadcastHashJoin +Left output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [1]: [p_promo_sk#50] +Arguments: [ss_promo_sk#7], [p_promo_sk#50], Inner, BuildRight + +(66) CometProject +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, p_promo_sk#50] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(67) CometScan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(68) CometFilter +Input [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Condition : (isnotnull(hd_demo_sk#51) AND isnotnull(hd_income_band_sk#52)) + +(69) CometBroadcastExchange +Input [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [hd_demo_sk#51, hd_income_band_sk#52] + +(70) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [ss_hdemo_sk#4], [hd_demo_sk#51], Inner, BuildRight + +(71) CometProject +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52], [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52] + +(72) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#53, hd_income_band_sk#54] + +(73) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52] +Right output [2]: [hd_demo_sk#53, hd_income_band_sk#54] +Arguments: [c_current_hdemo_sk#38], [hd_demo_sk#53], Inner, BuildRight + +(74) CometProject +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_demo_sk#53, hd_income_band_sk#54] +Arguments: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54], [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54] + +(75) CometScan parquet spark_catalog.default.customer_address +Output [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(76) CometFilter +Input [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Condition : isnotnull(ca_address_sk#55) + +(77) CometBroadcastExchange +Input [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] + +(78) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54] +Right output [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ss_addr_sk#5], [ca_address_sk#55], Inner, BuildRight + +(79) CometProject +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] + +(80) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(81) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Right output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: [c_current_addr_sk#39], [ca_address_sk#60], Inner, BuildRight + +(82) CometProject +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(83) CometScan parquet spark_catalog.default.income_band +Output [1]: [ib_income_band_sk#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(84) CometFilter +Input [1]: [ib_income_band_sk#65] +Condition : isnotnull(ib_income_band_sk#65) + +(85) CometBroadcastExchange +Input [1]: [ib_income_band_sk#65] +Arguments: [ib_income_band_sk#65] + +(86) CometBroadcastHashJoin +Left output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [1]: [ib_income_band_sk#65] +Arguments: [hd_income_band_sk#52], [ib_income_band_sk#65], Inner, BuildRight + +(87) CometProject +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ib_income_band_sk#65] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(88) ReusedExchange [Reuses operator id: 85] +Output [1]: [ib_income_band_sk#66] + +(89) CometBroadcastHashJoin +Left output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [1]: [ib_income_band_sk#66] +Arguments: [hd_income_band_sk#54], [ib_income_band_sk#66], Inner, BuildRight + +(90) CometProject +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ib_income_band_sk#66] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(91) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood ,floral ,indian ,medium ,purple ,spring ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(92) CometFilter +Input [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Condition : ((((((isnotnull(i_current_price#68) AND i_color#69 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#68 >= 64.00)) AND (i_current_price#68 <= 74.00)) AND (i_current_price#68 >= 65.00)) AND (i_current_price#68 <= 79.00)) AND isnotnull(i_item_sk#67)) + +(93) CometProject +Input [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Arguments: [i_item_sk#67, i_product_name#70], [i_item_sk#67, i_product_name#70] + +(94) CometBroadcastExchange +Input [2]: [i_item_sk#67, i_product_name#70] +Arguments: [i_item_sk#67, i_product_name#70] + +(95) CometBroadcastHashJoin +Left output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [2]: [i_item_sk#67, i_product_name#70] +Arguments: [ss_item_sk#1], [i_item_sk#67], Inner, BuildRight + +(96) CometProject +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] +Arguments: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70], [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] + +(97) CometHashAggregate +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] +Keys [15]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] + +(98) CometHashAggregate +Input [19]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45, count#71, sum#72, sum#73, sum#74] +Keys [15]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] + +(99) CometExchange +Input [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Arguments: hashpartitioning(item_sk#76, store_name#77, store_zip#78, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(100) CometSort +Input [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Arguments: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91], [item_sk#76 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, store_zip#78 ASC NULLS FIRST] + +(101) ReusedExchange [Reuses operator id: 9] +Output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] + +(102) CometSort +Input [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102], [ss_item_sk#92 ASC NULLS FIRST] + +(103) ReusedExchange [Reuses operator id: 24] +Output [4]: [cs_item_sk#103, sum#104, sum#105, isEmpty#106] + +(104) CometHashAggregate +Input [4]: [cs_item_sk#103, sum#104, sum#105, isEmpty#106] +Keys [1]: [cs_item_sk#103] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#107)), sum(((cr_refunded_cash#108 + cr_reversed_charge#109) + cr_store_credit#110))] + +(105) CometFilter +Input [3]: [cs_item_sk#103, sale#29, refund#30] +Condition : ((isnotnull(sale#29) AND isnotnull(refund#30)) AND (cast(sale#29 as decimal(21,2)) > (2 * refund#30))) + +(106) CometProject +Input [3]: [cs_item_sk#103, sale#29, refund#30] +Arguments: [cs_item_sk#103], [cs_item_sk#103] + +(107) CometSort +Input [1]: [cs_item_sk#103] +Arguments: [cs_item_sk#103], [cs_item_sk#103 ASC NULLS FIRST] + +(108) CometSortMergeJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Right output [1]: [cs_item_sk#103] +Arguments: [ss_item_sk#92], [cs_item_sk#103], Inner + +(109) CometProject +Input [12]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102, cs_item_sk#103] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] + +(110) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#111, d_year#112] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(111) CometFilter +Input [2]: [d_date_sk#111, d_year#112] +Condition : ((isnotnull(d_year#112) AND (d_year#112 = 2000)) AND isnotnull(d_date_sk#111)) + +(112) CometBroadcastExchange +Input [2]: [d_date_sk#111, d_year#112] +Arguments: [d_date_sk#111, d_year#112] + +(113) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Right output [2]: [d_date_sk#111, d_year#112] +Arguments: [ss_sold_date_sk#102], [d_date_sk#111], Inner, BuildRight + +(114) CometProject +Input [13]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102, d_date_sk#111, d_year#112] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112] + +(115) ReusedExchange [Reuses operator id: 38] +Output [3]: [s_store_sk#113, s_store_name#114, s_zip#115] + +(116) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112] +Right output [3]: [s_store_sk#113, s_store_name#114, s_zip#115] +Arguments: [ss_store_sk#97], [s_store_sk#113], Inner, BuildRight + +(117) CometProject +Input [14]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_sk#113, s_store_name#114, s_zip#115] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115] + +(118) ReusedExchange [Reuses operator id: 43] +Output [6]: [c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] + +(119) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115] +Right output [6]: [c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Arguments: [ss_customer_sk#93], [c_customer_sk#116], Inner, BuildRight + +(120) CometProject +Input [18]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] + +(121) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#122, d_year#123] + +(122) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Right output [2]: [d_date_sk#122, d_year#123] +Arguments: [c_first_sales_date_sk#121], [d_date_sk#122], Inner, BuildRight + +(123) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121, d_date_sk#122, d_year#123] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123] + +(124) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#124, d_year#125] + +(125) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123] +Right output [2]: [d_date_sk#124, d_year#125] +Arguments: [c_first_shipto_date_sk#120], [d_date_sk#124], Inner, BuildRight + +(126) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123, d_date_sk#124, d_year#125] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(127) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#126, cd_marital_status#127] + +(128) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [2]: [cd_demo_sk#126, cd_marital_status#127] +Arguments: [ss_cdemo_sk#94], [cd_demo_sk#126], Inner, BuildRight + +(129) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_demo_sk#126, cd_marital_status#127] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127] + +(130) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#128, cd_marital_status#129] + +(131) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127] +Right output [2]: [cd_demo_sk#128, cd_marital_status#129] +Arguments: [c_current_cdemo_sk#117], [cd_demo_sk#128], Inner, NOT (cd_marital_status#127 = cd_marital_status#129), BuildRight + +(132) CometProject +Input [18]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127, cd_demo_sk#128, cd_marital_status#129] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(133) ReusedExchange [Reuses operator id: 64] +Output [1]: [p_promo_sk#130] + +(134) CometBroadcastHashJoin +Left output [14]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [1]: [p_promo_sk#130] +Arguments: [ss_promo_sk#98], [p_promo_sk#130], Inner, BuildRight + +(135) CometProject +Input [15]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, p_promo_sk#130] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(136) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#131, hd_income_band_sk#132] + +(137) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [2]: [hd_demo_sk#131, hd_income_band_sk#132] +Arguments: [ss_hdemo_sk#95], [hd_demo_sk#131], Inner, BuildRight + +(138) CometProject +Input [15]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_demo_sk#131, hd_income_band_sk#132] +Arguments: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132], [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132] + +(139) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#133, hd_income_band_sk#134] + +(140) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132] +Right output [2]: [hd_demo_sk#133, hd_income_band_sk#134] +Arguments: [c_current_hdemo_sk#118], [hd_demo_sk#133], Inner, BuildRight + +(141) CometProject +Input [15]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_demo_sk#133, hd_income_band_sk#134] +Arguments: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134], [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134] + +(142) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] + +(143) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134] +Right output [5]: [ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Arguments: [ss_addr_sk#96], [ca_address_sk#135], Inner, BuildRight + +(144) CometProject +Input [18]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] + +(145) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(146) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Right output [5]: [ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Arguments: [c_current_addr_sk#119], [ca_address_sk#140], Inner, BuildRight + +(147) CometProject +Input [21]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(148) ReusedExchange [Reuses operator id: 85] +Output [1]: [ib_income_band_sk#145] + +(149) CometBroadcastHashJoin +Left output [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [1]: [ib_income_band_sk#145] +Arguments: [hd_income_band_sk#132], [ib_income_band_sk#145], Inner, BuildRight + +(150) CometProject +Input [20]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, ib_income_band_sk#145] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(151) ReusedExchange [Reuses operator id: 85] +Output [1]: [ib_income_band_sk#146] + +(152) CometBroadcastHashJoin +Left output [18]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [1]: [ib_income_band_sk#146] +Arguments: [hd_income_band_sk#134], [ib_income_band_sk#146], Inner, BuildRight + +(153) CometProject +Input [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, ib_income_band_sk#146] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(154) ReusedExchange [Reuses operator id: 94] +Output [2]: [i_item_sk#147, i_product_name#148] + +(155) CometBroadcastHashJoin +Left output [17]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [2]: [i_item_sk#147, i_product_name#148] +Arguments: [ss_item_sk#92], [i_item_sk#147], Inner, BuildRight + +(156) CometProject +Input [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] +Arguments: [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148], [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] + +(157) CometHashAggregate +Input [18]: [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] +Keys [15]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#99)), partial_sum(UnscaledValue(ss_list_price#100)), partial_sum(UnscaledValue(ss_coupon_amt#101))] + +(158) CometHashAggregate +Input [19]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125, count#71, sum#149, sum#150, sum#151] +Keys [15]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#99)), sum(UnscaledValue(ss_list_price#100)), sum(UnscaledValue(ss_coupon_amt#101))] + +(159) CometExchange +Input [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: hashpartitioning(item_sk#152, store_name#153, store_zip#154, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(160) CometSort +Input [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159], [item_sk#152 ASC NULLS FIRST, store_name#153 ASC NULLS FIRST, store_zip#154 ASC NULLS FIRST] + +(161) CometSortMergeJoin +Left output [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Right output [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [item_sk#76, store_name#77, store_zip#78], [item_sk#152, store_name#153, store_zip#154], Inner, (cnt#156 <= cnt#88) + +(162) CometProject +Input [25]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156], [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] + +(163) CometColumnarExchange +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] +Arguments: rangepartitioning(product_name#75 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, cnt#156 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=7] + +(164) CometSort +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] +Arguments: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156], [product_name#75 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, cnt#156 ASC NULLS FIRST] + +(165) ColumnarToRow [codegen id : 1] +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..7434d13f17 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q64.native_iceberg_compat/simplified.txt @@ -0,0 +1,167 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + CometColumnarExchange [product_name,store_name,cnt] #1 + CometProject [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + CometSortMergeJoin [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,item_sk,store_name,store_zip,syear,cnt,s1,s2,s3] + CometSort [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3] + CometExchange [item_sk,store_name,store_zip] #2 + CometHashAggregate [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt))] + CometHashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + CometProject [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,p_promo_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_sk,s_store_name,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,cs_item_sk] + CometSort [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometExchange [ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #4 + CometFilter [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometSort [cs_item_sk] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,sale,refund] + CometHashAggregate [cs_item_sk,sale,refund,sum,sum,isEmpty,sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))] + CometExchange [cs_item_sk] #5 + CometHashAggregate [cs_item_sk,sum,sum,isEmpty,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometProject [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_ext_list_price,cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometSort [cs_item_sk,cs_order_number,cs_ext_list_price] + CometExchange [cs_item_sk,cs_order_number] #6 + CometProject [cs_item_sk,cs_order_number,cs_ext_list_price] + CometFilter [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometExchange [cr_item_sk,cr_order_number] #7 + CometProject [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometFilter [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #8 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_store_name,s_zip] #9 + CometFilter [s_store_sk,s_store_name,s_zip] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #11 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + ReusedExchange [d_date_sk,d_year] #11 + CometBroadcastExchange [cd_demo_sk,cd_marital_status] #12 + CometFilter [cd_demo_sk,cd_marital_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + CometBroadcastExchange [p_promo_sk] #13 + CometFilter [p_promo_sk] + CometScan parquet spark_catalog.default.promotion [p_promo_sk] + CometBroadcastExchange [hd_demo_sk,hd_income_band_sk] #14 + CometFilter [hd_demo_sk,hd_income_band_sk] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk] + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + CometBroadcastExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + CometFilter [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + CometBroadcastExchange [ib_income_band_sk] #16 + CometFilter [ib_income_band_sk] + CometScan parquet spark_catalog.default.income_band [ib_income_band_sk] + ReusedExchange [ib_income_band_sk] #16 + CometBroadcastExchange [i_item_sk,i_product_name] #17 + CometProject [i_item_sk,i_product_name] + CometFilter [i_item_sk,i_current_price,i_color,i_product_name] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color,i_product_name] + CometSort [item_sk,store_name,store_zip,syear,cnt,s1,s2,s3] + CometExchange [item_sk,store_name,store_zip] #18 + CometHashAggregate [item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt))] + CometHashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + CometProject [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,p_promo_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_sk,s_store_name,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,cs_item_sk] + CometSort [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #3 + CometSort [cs_item_sk] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,sale,refund] + CometHashAggregate [cs_item_sk,sale,refund,sum,sum,isEmpty,sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))] + ReusedExchange [cs_item_sk,sum,sum,isEmpty] #5 + CometBroadcastExchange [d_date_sk,d_year] #19 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + ReusedExchange [s_store_sk,s_store_name,s_zip] #9 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + ReusedExchange [d_date_sk,d_year] #11 + ReusedExchange [d_date_sk,d_year] #11 + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + ReusedExchange [p_promo_sk] #13 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + ReusedExchange [ib_income_band_sk] #16 + ReusedExchange [ib_income_band_sk] #16 + ReusedExchange [i_item_sk,i_product_name] #17 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_datafusion/explain.txt new file mode 100644 index 0000000000..329ce1b5f2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_datafusion/explain.txt @@ -0,0 +1,207 @@ +== Physical Plan == +* ColumnarToRow (39) ++- CometTakeOrderedAndProject (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (22) + : +- CometBroadcastHashJoin (21) + : :- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (1) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometHashAggregate (13) + : : +- CometExchange (12) + : : +- CometHashAggregate (11) + : : +- CometProject (10) + : : +- CometBroadcastHashJoin (9) + : : :- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : +- CometBroadcastExchange (8) + : : +- CometProject (7) + : : +- CometFilter (6) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (5) + : +- CometBroadcastExchange (20) + : +- CometFilter (19) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (18) + +- CometBroadcastExchange (35) + +- CometFilter (34) + +- CometHashAggregate (33) + +- CometExchange (32) + +- CometHashAggregate (31) + +- CometHashAggregate (30) + +- CometExchange (29) + +- CometHashAggregate (28) + +- CometProject (27) + +- CometBroadcastHashJoin (26) + :- CometFilter (24) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (23) + +- ReusedExchange (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#1, s_store_name#2] +Arguments: [s_store_sk#1, s_store_name#2] + +(2) CometFilter +Input [2]: [s_store_sk#1, s_store_name#2] +Condition : isnotnull(s_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(4) CometFilter +Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : (isnotnull(ss_store_sk#4) AND isnotnull(ss_item_sk#3)) + +(5) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#7, d_month_seq#8] +Arguments: [d_date_sk#7, d_month_seq#8] + +(6) CometFilter +Input [2]: [d_date_sk#7, d_month_seq#8] +Condition : (((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1176)) AND (d_month_seq#8 <= 1187)) AND isnotnull(d_date_sk#7)) + +(7) CometProject +Input [2]: [d_date_sk#7, d_month_seq#8] +Arguments: [d_date_sk#7], [d_date_sk#7] + +(8) CometBroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: [d_date_sk#7] + +(9) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [d_date_sk#7] +Arguments: [ss_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight + +(10) CometProject +Input [5]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6, d_date_sk#7] +Arguments: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5], [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] + +(11) CometHashAggregate +Input [3]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] +Keys [2]: [ss_store_sk#4, ss_item_sk#3] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#5))] + +(12) CometExchange +Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#9] +Arguments: hashpartitioning(ss_store_sk#4, ss_item_sk#3, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(13) CometHashAggregate +Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#9] +Keys [2]: [ss_store_sk#4, ss_item_sk#3] +Functions [1]: [sum(UnscaledValue(ss_sales_price#5))] + +(14) CometFilter +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#10] +Condition : isnotnull(revenue#10) + +(15) CometBroadcastExchange +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#10] +Arguments: [ss_store_sk#4, ss_item_sk#3, revenue#10] + +(16) CometBroadcastHashJoin +Left output [2]: [s_store_sk#1, s_store_name#2] +Right output [3]: [ss_store_sk#4, ss_item_sk#3, revenue#10] +Arguments: [s_store_sk#1], [ss_store_sk#4], Inner, BuildRight + +(17) CometProject +Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#10] +Arguments: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#10], [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#10] + +(18) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Arguments: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] + +(19) CometFilter +Input [5]: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Condition : isnotnull(i_item_sk#11) + +(20) CometBroadcastExchange +Input [5]: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Arguments: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] + +(21) CometBroadcastHashJoin +Left output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#10] +Right output [5]: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Arguments: [ss_item_sk#3], [i_item_sk#11], Inner, BuildRight + +(22) CometProject +Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#10, i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Arguments: [s_store_name#2, ss_store_sk#4, revenue#10, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15], [s_store_name#2, ss_store_sk#4, revenue#10, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] + +(23) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18, ss_sold_date_sk#19] +Arguments: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18, ss_sold_date_sk#19] + +(24) CometFilter +Input [4]: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18, ss_sold_date_sk#19] +Condition : isnotnull(ss_store_sk#17) + +(25) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#20] + +(26) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18, ss_sold_date_sk#19] +Right output [1]: [d_date_sk#20] +Arguments: [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + +(27) CometProject +Input [5]: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18, ss_sold_date_sk#19, d_date_sk#20] +Arguments: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18], [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18] + +(28) CometHashAggregate +Input [3]: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18] +Keys [2]: [ss_store_sk#17, ss_item_sk#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#18))] + +(29) CometExchange +Input [3]: [ss_store_sk#17, ss_item_sk#16, sum#21] +Arguments: hashpartitioning(ss_store_sk#17, ss_item_sk#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(30) CometHashAggregate +Input [3]: [ss_store_sk#17, ss_item_sk#16, sum#21] +Keys [2]: [ss_store_sk#17, ss_item_sk#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#18))] + +(31) CometHashAggregate +Input [2]: [ss_store_sk#17, revenue#22] +Keys [1]: [ss_store_sk#17] +Functions [1]: [partial_avg(revenue#22)] + +(32) CometExchange +Input [3]: [ss_store_sk#17, sum#23, count#24] +Arguments: hashpartitioning(ss_store_sk#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(33) CometHashAggregate +Input [3]: [ss_store_sk#17, sum#23, count#24] +Keys [1]: [ss_store_sk#17] +Functions [1]: [avg(revenue#22)] + +(34) CometFilter +Input [2]: [ss_store_sk#17, ave#25] +Condition : isnotnull(ave#25) + +(35) CometBroadcastExchange +Input [2]: [ss_store_sk#17, ave#25] +Arguments: [ss_store_sk#17, ave#25] + +(36) CometBroadcastHashJoin +Left output [7]: [s_store_name#2, ss_store_sk#4, revenue#10, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Right output [2]: [ss_store_sk#17, ave#25] +Arguments: [ss_store_sk#4], [ss_store_sk#17], Inner, (cast(revenue#10 as decimal(23,7)) <= (0.1 * ave#25)), BuildRight + +(37) CometProject +Input [9]: [s_store_name#2, ss_store_sk#4, revenue#10, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15, ss_store_sk#17, ave#25] +Arguments: [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15], [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15] + +(38) CometTakeOrderedAndProject +Input [6]: [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[s_store_name#2 ASC NULLS FIRST,i_item_desc#12 ASC NULLS FIRST], output=[s_store_name#2,i_item_desc#12,revenue#10,i_current_price#13,i_wholesale_cost#14,i_brand#15]), [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15], 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#12 ASC NULLS FIRST], [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15] + +(39) ColumnarToRow [codegen id : 1] +Input [6]: [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_datafusion/simplified.txt new file mode 100644 index 0000000000..0b9c656364 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_datafusion/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + CometProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + CometBroadcastHashJoin [s_store_name,ss_store_sk,revenue,i_item_desc,i_current_price,i_wholesale_cost,i_brand,ss_store_sk,ave] + CometProject [s_store_name,ss_store_sk,revenue,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + CometBroadcastHashJoin [s_store_name,ss_store_sk,ss_item_sk,revenue,i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + CometProject [s_store_name,ss_store_sk,ss_item_sk,revenue] + CometBroadcastHashJoin [s_store_sk,s_store_name,ss_store_sk,ss_item_sk,revenue] + CometFilter [s_store_sk,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name] + CometBroadcastExchange [ss_store_sk,ss_item_sk,revenue] #1 + CometFilter [ss_store_sk,ss_item_sk,revenue] + CometHashAggregate [ss_store_sk,ss_item_sk,revenue,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [ss_store_sk,ss_item_sk] #2 + CometHashAggregate [ss_store_sk,ss_item_sk,sum,ss_sales_price] + CometProject [ss_item_sk,ss_store_sk,ss_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] #4 + CometFilter [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + CometBroadcastExchange [ss_store_sk,ave] #5 + CometFilter [ss_store_sk,ave] + CometHashAggregate [ss_store_sk,ave,sum,count,avg(revenue)] + CometExchange [ss_store_sk] #6 + CometHashAggregate [ss_store_sk,sum,count,revenue] + CometHashAggregate [ss_store_sk,revenue,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [ss_store_sk,ss_item_sk] #7 + CometHashAggregate [ss_store_sk,ss_item_sk,sum,ss_sales_price] + CometProject [ss_item_sk,ss_store_sk,ss_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..df794c5932 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_iceberg_compat/explain.txt @@ -0,0 +1,224 @@ +== Physical Plan == +* ColumnarToRow (39) ++- CometTakeOrderedAndProject (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (22) + : +- CometBroadcastHashJoin (21) + : :- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.store (1) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometHashAggregate (13) + : : +- CometExchange (12) + : : +- CometHashAggregate (11) + : : +- CometProject (10) + : : +- CometBroadcastHashJoin (9) + : : :- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.store_sales (3) + : : +- CometBroadcastExchange (8) + : : +- CometProject (7) + : : +- CometFilter (6) + : : +- CometScan parquet spark_catalog.default.date_dim (5) + : +- CometBroadcastExchange (20) + : +- CometFilter (19) + : +- CometScan parquet spark_catalog.default.item (18) + +- CometBroadcastExchange (35) + +- CometFilter (34) + +- CometHashAggregate (33) + +- CometExchange (32) + +- CometHashAggregate (31) + +- CometHashAggregate (30) + +- CometExchange (29) + +- CometHashAggregate (28) + +- CometProject (27) + +- CometBroadcastHashJoin (26) + :- CometFilter (24) + : +- CometScan parquet spark_catalog.default.store_sales (23) + +- ReusedExchange (25) + + +(1) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#1, s_store_name#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [2]: [s_store_sk#1, s_store_name#2] +Condition : isnotnull(s_store_sk#1) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : (isnotnull(ss_store_sk#4) AND isnotnull(ss_item_sk#3)) + +(5) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#7, d_month_seq#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)] +ReadSchema: struct + +(6) CometFilter +Input [2]: [d_date_sk#7, d_month_seq#8] +Condition : (((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1176)) AND (d_month_seq#8 <= 1187)) AND isnotnull(d_date_sk#7)) + +(7) CometProject +Input [2]: [d_date_sk#7, d_month_seq#8] +Arguments: [d_date_sk#7], [d_date_sk#7] + +(8) CometBroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: [d_date_sk#7] + +(9) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [d_date_sk#7] +Arguments: [ss_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight + +(10) CometProject +Input [5]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6, d_date_sk#7] +Arguments: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5], [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] + +(11) CometHashAggregate +Input [3]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] +Keys [2]: [ss_store_sk#4, ss_item_sk#3] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#5))] + +(12) CometExchange +Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#9] +Arguments: hashpartitioning(ss_store_sk#4, ss_item_sk#3, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(13) CometHashAggregate +Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#9] +Keys [2]: [ss_store_sk#4, ss_item_sk#3] +Functions [1]: [sum(UnscaledValue(ss_sales_price#5))] + +(14) CometFilter +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#10] +Condition : isnotnull(revenue#10) + +(15) CometBroadcastExchange +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#10] +Arguments: [ss_store_sk#4, ss_item_sk#3, revenue#10] + +(16) CometBroadcastHashJoin +Left output [2]: [s_store_sk#1, s_store_name#2] +Right output [3]: [ss_store_sk#4, ss_item_sk#3, revenue#10] +Arguments: [s_store_sk#1], [ss_store_sk#4], Inner, BuildRight + +(17) CometProject +Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#10] +Arguments: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#10], [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#10] + +(18) CometScan parquet spark_catalog.default.item +Output [5]: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) CometFilter +Input [5]: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Condition : isnotnull(i_item_sk#11) + +(20) CometBroadcastExchange +Input [5]: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Arguments: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] + +(21) CometBroadcastHashJoin +Left output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#10] +Right output [5]: [i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Arguments: [ss_item_sk#3], [i_item_sk#11], Inner, BuildRight + +(22) CometProject +Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#10, i_item_sk#11, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Arguments: [s_store_name#2, ss_store_sk#4, revenue#10, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15], [s_store_name#2, ss_store_sk#4, revenue#10, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] + +(23) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18, ss_sold_date_sk#19] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#19)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(24) CometFilter +Input [4]: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18, ss_sold_date_sk#19] +Condition : isnotnull(ss_store_sk#17) + +(25) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#20] + +(26) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18, ss_sold_date_sk#19] +Right output [1]: [d_date_sk#20] +Arguments: [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + +(27) CometProject +Input [5]: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18, ss_sold_date_sk#19, d_date_sk#20] +Arguments: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18], [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18] + +(28) CometHashAggregate +Input [3]: [ss_item_sk#16, ss_store_sk#17, ss_sales_price#18] +Keys [2]: [ss_store_sk#17, ss_item_sk#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#18))] + +(29) CometExchange +Input [3]: [ss_store_sk#17, ss_item_sk#16, sum#21] +Arguments: hashpartitioning(ss_store_sk#17, ss_item_sk#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(30) CometHashAggregate +Input [3]: [ss_store_sk#17, ss_item_sk#16, sum#21] +Keys [2]: [ss_store_sk#17, ss_item_sk#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#18))] + +(31) CometHashAggregate +Input [2]: [ss_store_sk#17, revenue#22] +Keys [1]: [ss_store_sk#17] +Functions [1]: [partial_avg(revenue#22)] + +(32) CometExchange +Input [3]: [ss_store_sk#17, sum#23, count#24] +Arguments: hashpartitioning(ss_store_sk#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(33) CometHashAggregate +Input [3]: [ss_store_sk#17, sum#23, count#24] +Keys [1]: [ss_store_sk#17] +Functions [1]: [avg(revenue#22)] + +(34) CometFilter +Input [2]: [ss_store_sk#17, ave#25] +Condition : isnotnull(ave#25) + +(35) CometBroadcastExchange +Input [2]: [ss_store_sk#17, ave#25] +Arguments: [ss_store_sk#17, ave#25] + +(36) CometBroadcastHashJoin +Left output [7]: [s_store_name#2, ss_store_sk#4, revenue#10, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Right output [2]: [ss_store_sk#17, ave#25] +Arguments: [ss_store_sk#4], [ss_store_sk#17], Inner, (cast(revenue#10 as decimal(23,7)) <= (0.1 * ave#25)), BuildRight + +(37) CometProject +Input [9]: [s_store_name#2, ss_store_sk#4, revenue#10, i_item_desc#12, i_current_price#13, i_wholesale_cost#14, i_brand#15, ss_store_sk#17, ave#25] +Arguments: [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15], [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15] + +(38) CometTakeOrderedAndProject +Input [6]: [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[s_store_name#2 ASC NULLS FIRST,i_item_desc#12 ASC NULLS FIRST], output=[s_store_name#2,i_item_desc#12,revenue#10,i_current_price#13,i_wholesale_cost#14,i_brand#15]), [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15], 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#12 ASC NULLS FIRST], [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15] + +(39) ColumnarToRow [codegen id : 1] +Input [6]: [s_store_name#2, i_item_desc#12, revenue#10, i_current_price#13, i_wholesale_cost#14, i_brand#15] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..d40005157f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q65.native_iceberg_compat/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + CometProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + CometBroadcastHashJoin [s_store_name,ss_store_sk,revenue,i_item_desc,i_current_price,i_wholesale_cost,i_brand,ss_store_sk,ave] + CometProject [s_store_name,ss_store_sk,revenue,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + CometBroadcastHashJoin [s_store_name,ss_store_sk,ss_item_sk,revenue,i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + CometProject [s_store_name,ss_store_sk,ss_item_sk,revenue] + CometBroadcastHashJoin [s_store_sk,s_store_name,ss_store_sk,ss_item_sk,revenue] + CometFilter [s_store_sk,s_store_name] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name] + CometBroadcastExchange [ss_store_sk,ss_item_sk,revenue] #1 + CometFilter [ss_store_sk,ss_item_sk,revenue] + CometHashAggregate [ss_store_sk,ss_item_sk,revenue,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [ss_store_sk,ss_item_sk] #2 + CometHashAggregate [ss_store_sk,ss_item_sk,sum,ss_sales_price] + CometProject [ss_item_sk,ss_store_sk,ss_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] #4 + CometFilter [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + CometBroadcastExchange [ss_store_sk,ave] #5 + CometFilter [ss_store_sk,ave] + CometHashAggregate [ss_store_sk,ave,sum,count,avg(revenue)] + CometExchange [ss_store_sk] #6 + CometHashAggregate [ss_store_sk,sum,count,revenue] + CometHashAggregate [ss_store_sk,revenue,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [ss_store_sk,ss_item_sk] #7 + CometHashAggregate [ss_store_sk,ss_item_sk,sum,ss_sales_price] + CometProject [ss_item_sk,ss_store_sk,ss_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_datafusion/explain.txt new file mode 100644 index 0000000000..83806709cd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_datafusion/explain.txt @@ -0,0 +1,185 @@ +== Physical Plan == +* ColumnarToRow (35) ++- CometTakeOrderedAndProject (34) + +- CometHashAggregate (33) + +- CometExchange (32) + +- CometHashAggregate (31) + +- CometUnion (30) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (16) + : : +- CometProject (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (13) + : +- CometBroadcastExchange (22) + : +- CometProject (21) + : +- CometFilter (20) + : +- CometNativeScan: `spark_catalog`.`default`.`ship_mode` (19) + +- CometHashAggregate (29) + +- ReusedExchange (28) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Arguments: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Condition : ((isnotnull(ws_warehouse_sk#3) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_ship_mode_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(4) CometFilter +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Condition : isnotnull(w_warehouse_sk#8) + +(5) CometBroadcastExchange +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(6) CometBroadcastHashJoin +Left output [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Right output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [ws_warehouse_sk#3], [w_warehouse_sk#8], Inner, BuildRight + +(7) CometProject +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14], [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Arguments: [d_date_sk#15, d_year#16, d_moy#17] + +(9) CometFilter +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Arguments: [d_date_sk#15, d_year#16, d_moy#17] + +(11) CometBroadcastHashJoin +Left output [12]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Right output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Arguments: [ws_sold_date_sk#7], [d_date_sk#15], Inner, BuildRight + +(12) CometProject +Input [15]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#15, d_year#16, d_moy#17] +Arguments: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17], [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] + +(13) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [2]: [t_time_sk#18, t_time#19] +Arguments: [t_time_sk#18, t_time#19] + +(14) CometFilter +Input [2]: [t_time_sk#18, t_time#19] +Condition : (((isnotnull(t_time#19) AND (t_time#19 >= 30838)) AND (t_time#19 <= 59638)) AND isnotnull(t_time_sk#18)) + +(15) CometProject +Input [2]: [t_time_sk#18, t_time#19] +Arguments: [t_time_sk#18], [t_time_sk#18] + +(16) CometBroadcastExchange +Input [1]: [t_time_sk#18] +Arguments: [t_time_sk#18] + +(17) CometBroadcastHashJoin +Left output [13]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Right output [1]: [t_time_sk#18] +Arguments: [ws_sold_time_sk#1], [t_time_sk#18], Inner, BuildRight + +(18) CometProject +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17, t_time_sk#18] +Arguments: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17], [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] + +(19) CometNativeScan: `spark_catalog`.`default`.`ship_mode` +Output [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Arguments: [sm_ship_mode_sk#20, sm_carrier#21] + +(20) CometFilter +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Condition : (sm_carrier#21 IN (DHL ,BARIAN ) AND isnotnull(sm_ship_mode_sk#20)) + +(21) CometProject +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Arguments: [sm_ship_mode_sk#20], [sm_ship_mode_sk#20] + +(22) CometBroadcastExchange +Input [1]: [sm_ship_mode_sk#20] +Arguments: [sm_ship_mode_sk#20] + +(23) CometBroadcastHashJoin +Left output [12]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Right output [1]: [sm_ship_mode_sk#20] +Arguments: [ws_ship_mode_sk#2], [sm_ship_mode_sk#20], Inner, BuildRight + +(24) CometProject +Input [13]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17, sm_ship_mode_sk#20] +Arguments: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17], [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] + +(25) CometHashAggregate +Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16] +Functions [24]: [partial_sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)] + +(26) CometExchange +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#22, isEmpty#23, sum#24, isEmpty#25, sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#22, isEmpty#23, sum#24, isEmpty#25, sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16] +Functions [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)] + +(28) ReusedExchange [Reuses operator id: 26] +Output [55]: [w_warehouse_name#70, w_warehouse_sq_ft#71, w_city#72, w_county#73, w_state#74, w_country#75, d_year#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124] + +(29) CometHashAggregate +Input [55]: [w_warehouse_name#70, w_warehouse_sq_ft#71, w_city#72, w_county#73, w_state#74, w_country#75, d_year#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124] +Keys [7]: [w_warehouse_name#70, w_warehouse_sq_ft#71, w_city#72, w_county#73, w_state#74, w_country#75, d_year#76] +Functions [24]: [sum(CASE WHEN (d_moy#125 = 1) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 2) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 3) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 4) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 5) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 6) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 7) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 8) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 9) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 10) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 11) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 12) THEN (cs_sales_price#126 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 1) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 2) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 3) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 4) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 5) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 6) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 7) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 8) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 9) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 10) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 11) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#125 = 12) THEN (cs_net_paid_inc_tax#128 * cast(cs_quantity#127 as decimal(10,0))) ELSE 0.00 END)] + +(30) CometUnion +Child 0 Input [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130, jan_sales#131, feb_sales#132, mar_sales#133, apr_sales#134, may_sales#135, jun_sales#136, jul_sales#137, aug_sales#138, sep_sales#139, oct_sales#140, nov_sales#141, dec_sales#142, jan_net#143, feb_net#144, mar_net#145, apr_net#146, may_net#147, jun_net#148, jul_net#149, aug_net#150, sep_net#151, oct_net#152, nov_net#153, dec_net#154] +Child 1 Input [32]: [w_warehouse_name#70, w_warehouse_sq_ft#71, w_city#72, w_county#73, w_state#74, w_country#75, ship_carriers#155, year#156, jan_sales#157, feb_sales#158, mar_sales#159, apr_sales#160, may_sales#161, jun_sales#162, jul_sales#163, aug_sales#164, sep_sales#165, oct_sales#166, nov_sales#167, dec_sales#168, jan_net#169, feb_net#170, mar_net#171, apr_net#172, may_net#173, jun_net#174, jul_net#175, aug_net#176, sep_net#177, oct_net#178, nov_net#179, dec_net#180] + +(31) CometHashAggregate +Input [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130, jan_sales#131, feb_sales#132, mar_sales#133, apr_sales#134, may_sales#135, jun_sales#136, jul_sales#137, aug_sales#138, sep_sales#139, oct_sales#140, nov_sales#141, dec_sales#142, jan_net#143, feb_net#144, mar_net#145, apr_net#146, may_net#147, jun_net#148, jul_net#149, aug_net#150, sep_net#151, oct_net#152, nov_net#153, dec_net#154] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130] +Functions [36]: [partial_sum(jan_sales#131), partial_sum(feb_sales#132), partial_sum(mar_sales#133), partial_sum(apr_sales#134), partial_sum(may_sales#135), partial_sum(jun_sales#136), partial_sum(jul_sales#137), partial_sum(aug_sales#138), partial_sum(sep_sales#139), partial_sum(oct_sales#140), partial_sum(nov_sales#141), partial_sum(dec_sales#142), partial_sum((jan_sales#131 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((feb_sales#132 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((mar_sales#133 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((apr_sales#134 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((may_sales#135 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((jun_sales#136 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((jul_sales#137 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((aug_sales#138 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((sep_sales#139 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((oct_sales#140 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((nov_sales#141 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((dec_sales#142 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum(jan_net#143), partial_sum(feb_net#144), partial_sum(mar_net#145), partial_sum(apr_net#146), partial_sum(may_net#147), partial_sum(jun_net#148), partial_sum(jul_net#149), partial_sum(aug_net#150), partial_sum(sep_net#151), partial_sum(oct_net#152), partial_sum(nov_net#153), partial_sum(dec_net#154)] + +(32) CometExchange +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130, sum#181, isEmpty#182, sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188, sum#189, isEmpty#190, sum#191, isEmpty#192, sum#193, isEmpty#194, sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206, sum#207, isEmpty#208, sum#209, isEmpty#210, sum#211, isEmpty#212, sum#213, isEmpty#214, sum#215, isEmpty#216, sum#217, isEmpty#218, sum#219, isEmpty#220, sum#221, isEmpty#222, sum#223, isEmpty#224, sum#225, isEmpty#226, sum#227, isEmpty#228, sum#229, isEmpty#230, sum#231, isEmpty#232, sum#233, isEmpty#234, sum#235, isEmpty#236, sum#237, isEmpty#238, sum#239, isEmpty#240, sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246, sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(33) CometHashAggregate +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130, sum#181, isEmpty#182, sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188, sum#189, isEmpty#190, sum#191, isEmpty#192, sum#193, isEmpty#194, sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206, sum#207, isEmpty#208, sum#209, isEmpty#210, sum#211, isEmpty#212, sum#213, isEmpty#214, sum#215, isEmpty#216, sum#217, isEmpty#218, sum#219, isEmpty#220, sum#221, isEmpty#222, sum#223, isEmpty#224, sum#225, isEmpty#226, sum#227, isEmpty#228, sum#229, isEmpty#230, sum#231, isEmpty#232, sum#233, isEmpty#234, sum#235, isEmpty#236, sum#237, isEmpty#238, sum#239, isEmpty#240, sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246, sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130] +Functions [36]: [sum(jan_sales#131), sum(feb_sales#132), sum(mar_sales#133), sum(apr_sales#134), sum(may_sales#135), sum(jun_sales#136), sum(jul_sales#137), sum(aug_sales#138), sum(sep_sales#139), sum(oct_sales#140), sum(nov_sales#141), sum(dec_sales#142), sum((jan_sales#131 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((feb_sales#132 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((mar_sales#133 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((apr_sales#134 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((may_sales#135 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((jun_sales#136 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((jul_sales#137 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((aug_sales#138 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((sep_sales#139 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((oct_sales#140 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((nov_sales#141 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((dec_sales#142 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum(jan_net#143), sum(feb_net#144), sum(mar_net#145), sum(apr_net#146), sum(may_net#147), sum(jun_net#148), sum(jul_net#149), sum(aug_net#150), sum(sep_net#151), sum(oct_net#152), sum(nov_net#153), sum(dec_net#154)] + +(34) CometTakeOrderedAndProject +Input [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130, jan_sales#253, feb_sales#254, mar_sales#255, apr_sales#256, may_sales#257, jun_sales#258, jul_sales#259, aug_sales#260, sep_sales#261, oct_sales#262, nov_sales#263, dec_sales#264, jan_sales_per_sq_foot#265, feb_sales_per_sq_foot#266, mar_sales_per_sq_foot#267, apr_sales_per_sq_foot#268, may_sales_per_sq_foot#269, jun_sales_per_sq_foot#270, jul_sales_per_sq_foot#271, aug_sales_per_sq_foot#272, sep_sales_per_sq_foot#273, oct_sales_per_sq_foot#274, nov_sales_per_sq_foot#275, dec_sales_per_sq_foot#276, jan_net#277, feb_net#278, mar_net#279, apr_net#280, may_net#281, jun_net#282, jul_net#283, aug_net#284, sep_net#285, oct_net#286, nov_net#287, dec_net#288] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#9 ASC NULLS FIRST], output=[w_warehouse_name#9,w_warehouse_sq_ft#10,w_city#11,w_county#12,w_state#13,w_country#14,ship_carriers#129,year#130,jan_sales#253,feb_sales#254,mar_sales#255,apr_sales#256,may_sales#257,jun_sales#258,jul_sales#259,aug_sales#260,sep_sales#261,oct_sales#262,nov_sales#263,dec_sales#264,jan_sales_per_sq_foot#265,feb_sales_per_sq_foot#266,mar_sales_per_sq_foot#267,apr_sales_per_sq_foot#268,may_sales_per_sq_foot#269,jun_sales_per_sq_foot#270,jul_sales_per_sq_foot#271,aug_sales_per_sq_foot#272,sep_sales_per_sq_foot#273,oct_sales_per_sq_foot#274,nov_sales_per_sq_foot#275,dec_sales_per_sq_foot#276,jan_net#277,feb_net#278,mar_net#279,apr_net#280,may_net#281,jun_net#282,jul_net#283,aug_net#284,sep_net#285,oct_net#286,nov_net#287,dec_net#288]), [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130, jan_sales#253, feb_sales#254, mar_sales#255, apr_sales#256, may_sales#257, jun_sales#258, jul_sales#259, aug_sales#260, sep_sales#261, oct_sales#262, nov_sales#263, dec_sales#264, jan_sales_per_sq_foot#265, feb_sales_per_sq_foot#266, mar_sales_per_sq_foot#267, apr_sales_per_sq_foot#268, may_sales_per_sq_foot#269, jun_sales_per_sq_foot#270, jul_sales_per_sq_foot#271, aug_sales_per_sq_foot#272, sep_sales_per_sq_foot#273, oct_sales_per_sq_foot#274, nov_sales_per_sq_foot#275, dec_sales_per_sq_foot#276, jan_net#277, feb_net#278, mar_net#279, apr_net#280, may_net#281, jun_net#282, jul_net#283, aug_net#284, sep_net#285, oct_net#286, nov_net#287, dec_net#288], 100, [w_warehouse_name#9 ASC NULLS FIRST], [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130, jan_sales#253, feb_sales#254, mar_sales#255, apr_sales#256, may_sales#257, jun_sales#258, jul_sales#259, aug_sales#260, sep_sales#261, oct_sales#262, nov_sales#263, dec_sales#264, jan_sales_per_sq_foot#265, feb_sales_per_sq_foot#266, mar_sales_per_sq_foot#267, apr_sales_per_sq_foot#268, may_sales_per_sq_foot#269, jun_sales_per_sq_foot#270, jul_sales_per_sq_foot#271, aug_sales_per_sq_foot#272, sep_sales_per_sq_foot#273, oct_sales_per_sq_foot#274, nov_sales_per_sq_foot#275, dec_sales_per_sq_foot#276, jan_net#277, feb_net#278, mar_net#279, apr_net#280, may_net#281, jun_net#282, jul_net#283, aug_net#284, sep_net#285, oct_net#286, nov_net#287, dec_net#288] + +(35) ColumnarToRow [codegen id : 1] +Input [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#129, year#130, jan_sales#253, feb_sales#254, mar_sales#255, apr_sales#256, may_sales#257, jun_sales#258, jul_sales#259, aug_sales#260, sep_sales#261, oct_sales#262, nov_sales#263, dec_sales#264, jan_sales_per_sq_foot#265, feb_sales_per_sq_foot#266, mar_sales_per_sq_foot#267, apr_sales_per_sq_foot#268, may_sales_per_sq_foot#269, jun_sales_per_sq_foot#270, jul_sales_per_sq_foot#271, aug_sales_per_sq_foot#272, sep_sales_per_sq_foot#273, oct_sales_per_sq_foot#274, nov_sales_per_sq_foot#275, dec_sales_per_sq_foot#276, jan_net#277, feb_net#278, mar_net#279, apr_net#280, may_net#281, jun_net#282, jul_net#283, aug_net#284, sep_net#285, oct_net#286, nov_net#287, dec_net#288] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_datafusion/simplified.txt new file mode 100644 index 0000000000..3c072a2ea1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_datafusion/simplified.txt @@ -0,0 +1,37 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum((jan_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((feb_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((mar_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((apr_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((may_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jun_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jul_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((aug_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((sep_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((oct_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((nov_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((dec_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net)] + CometExchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1 + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] + CometUnion [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum(CASE WHEN (d_moy = 1) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END)] + CometExchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2 + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,d_moy,ws_ext_sales_price,ws_quantity,ws_net_paid] + CometProject [ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,sm_ship_mode_sk] + CometProject [ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,t_time_sk] + CometProject [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_date_sk,d_year,d_moy] + CometProject [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometFilter [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] #3 + CometFilter [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [t_time_sk] #5 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_time] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_time] + CometBroadcastExchange [sm_ship_mode_sk] #6 + CometProject [sm_ship_mode_sk] + CometFilter [sm_ship_mode_sk,sm_carrier] + CometNativeScan: `spark_catalog`.`default`.`ship_mode` [sm_ship_mode_sk,sm_carrier] + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum(CASE WHEN (d_moy = 1) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END)] + ReusedExchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..72cfc88324 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_iceberg_compat/explain.txt @@ -0,0 +1,282 @@ +== Physical Plan == +* ColumnarToRow (50) ++- CometTakeOrderedAndProject (49) + +- CometHashAggregate (48) + +- CometExchange (47) + +- CometHashAggregate (46) + +- CometUnion (45) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.warehouse (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (16) + : : +- CometProject (15) + : : +- CometFilter (14) + : : +- CometScan parquet spark_catalog.default.time_dim (13) + : +- CometBroadcastExchange (22) + : +- CometProject (21) + : +- CometFilter (20) + : +- CometScan parquet spark_catalog.default.ship_mode (19) + +- CometHashAggregate (44) + +- CometExchange (43) + +- CometHashAggregate (42) + +- CometProject (41) + +- CometBroadcastHashJoin (40) + :- CometProject (38) + : +- CometBroadcastHashJoin (37) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometProject (32) + : : : +- CometBroadcastHashJoin (31) + : : : :- CometFilter (29) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (28) + : : : +- ReusedExchange (30) + : : +- ReusedExchange (33) + : +- ReusedExchange (36) + +- ReusedExchange (39) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#7)] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_ship_mode_sk)] +ReadSchema: struct + +(2) CometFilter +Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Condition : ((isnotnull(ws_warehouse_sk#3) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_ship_mode_sk#2)) + +(3) CometScan parquet spark_catalog.default.warehouse +Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(4) CometFilter +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Condition : isnotnull(w_warehouse_sk#8) + +(5) CometBroadcastExchange +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(6) CometBroadcastHashJoin +Left output [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Right output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [ws_warehouse_sk#3], [w_warehouse_sk#8], Inner, BuildRight + +(7) CometProject +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14], [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Arguments: [d_date_sk#15, d_year#16, d_moy#17] + +(11) CometBroadcastHashJoin +Left output [12]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Right output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Arguments: [ws_sold_date_sk#7], [d_date_sk#15], Inner, BuildRight + +(12) CometProject +Input [15]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#15, d_year#16, d_moy#17] +Arguments: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17], [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] + +(13) CometScan parquet spark_catalog.default.time_dim +Output [2]: [t_time_sk#18, t_time#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_time_sk)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [t_time_sk#18, t_time#19] +Condition : (((isnotnull(t_time#19) AND (t_time#19 >= 30838)) AND (t_time#19 <= 59638)) AND isnotnull(t_time_sk#18)) + +(15) CometProject +Input [2]: [t_time_sk#18, t_time#19] +Arguments: [t_time_sk#18], [t_time_sk#18] + +(16) CometBroadcastExchange +Input [1]: [t_time_sk#18] +Arguments: [t_time_sk#18] + +(17) CometBroadcastHashJoin +Left output [13]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Right output [1]: [t_time_sk#18] +Arguments: [ws_sold_time_sk#1], [t_time_sk#18], Inner, BuildRight + +(18) CometProject +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17, t_time_sk#18] +Arguments: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17], [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] + +(19) CometScan parquet spark_catalog.default.ship_mode +Output [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [In(sm_carrier, [BARIAN ,DHL ]), IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(20) CometFilter +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Condition : (sm_carrier#21 IN (DHL ,BARIAN ) AND isnotnull(sm_ship_mode_sk#20)) + +(21) CometProject +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Arguments: [sm_ship_mode_sk#20], [sm_ship_mode_sk#20] + +(22) CometBroadcastExchange +Input [1]: [sm_ship_mode_sk#20] +Arguments: [sm_ship_mode_sk#20] + +(23) CometBroadcastHashJoin +Left output [12]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Right output [1]: [sm_ship_mode_sk#20] +Arguments: [ws_ship_mode_sk#2], [sm_ship_mode_sk#20], Inner, BuildRight + +(24) CometProject +Input [13]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17, sm_ship_mode_sk#20] +Arguments: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17], [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] + +(25) CometHashAggregate +Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16] +Functions [24]: [partial_sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)] + +(26) CometExchange +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#22, isEmpty#23, sum#24, isEmpty#25, sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#22, isEmpty#23, sum#24, isEmpty#25, sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16] +Functions [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)] + +(28) CometScan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_warehouse_sk#72, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, cs_sold_date_sk#76] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#76)] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)] +ReadSchema: struct + +(29) CometFilter +Input [7]: [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_warehouse_sk#72, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, cs_sold_date_sk#76] +Condition : ((isnotnull(cs_warehouse_sk#72) AND isnotnull(cs_sold_time_sk#70)) AND isnotnull(cs_ship_mode_sk#71)) + +(30) ReusedExchange [Reuses operator id: 5] +Output [7]: [w_warehouse_sk#77, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83] + +(31) CometBroadcastHashJoin +Left output [7]: [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_warehouse_sk#72, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, cs_sold_date_sk#76] +Right output [7]: [w_warehouse_sk#77, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83] +Arguments: [cs_warehouse_sk#72], [w_warehouse_sk#77], Inner, BuildRight + +(32) CometProject +Input [14]: [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_warehouse_sk#72, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, cs_sold_date_sk#76, w_warehouse_sk#77, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83] +Arguments: [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, cs_sold_date_sk#76, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83], [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, cs_sold_date_sk#76, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83] + +(33) ReusedExchange [Reuses operator id: 10] +Output [3]: [d_date_sk#84, d_year#85, d_moy#86] + +(34) CometBroadcastHashJoin +Left output [12]: [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, cs_sold_date_sk#76, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83] +Right output [3]: [d_date_sk#84, d_year#85, d_moy#86] +Arguments: [cs_sold_date_sk#76], [d_date_sk#84], Inner, BuildRight + +(35) CometProject +Input [15]: [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, cs_sold_date_sk#76, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_date_sk#84, d_year#85, d_moy#86] +Arguments: [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86], [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86] + +(36) ReusedExchange [Reuses operator id: 16] +Output [1]: [t_time_sk#87] + +(37) CometBroadcastHashJoin +Left output [13]: [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86] +Right output [1]: [t_time_sk#87] +Arguments: [cs_sold_time_sk#70], [t_time_sk#87], Inner, BuildRight + +(38) CometProject +Input [14]: [cs_sold_time_sk#70, cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86, t_time_sk#87] +Arguments: [cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86], [cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86] + +(39) ReusedExchange [Reuses operator id: 22] +Output [1]: [sm_ship_mode_sk#88] + +(40) CometBroadcastHashJoin +Left output [12]: [cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86] +Right output [1]: [sm_ship_mode_sk#88] +Arguments: [cs_ship_mode_sk#71], [sm_ship_mode_sk#88], Inner, BuildRight + +(41) CometProject +Input [13]: [cs_ship_mode_sk#71, cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86, sm_ship_mode_sk#88] +Arguments: [cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86], [cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86] + +(42) CometHashAggregate +Input [11]: [cs_quantity#73, cs_sales_price#74, cs_net_paid_inc_tax#75, w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, d_moy#86] +Keys [7]: [w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85] +Functions [24]: [partial_sum(CASE WHEN (d_moy#86 = 1) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 2) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 3) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 4) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 5) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 6) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 7) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 8) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 9) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 10) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 11) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 12) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 1) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 2) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 3) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 4) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 5) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 6) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 7) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 8) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 9) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 10) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 11) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#86 = 12) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END)] + +(43) CometExchange +Input [55]: [w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126, sum#127, isEmpty#128, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134, sum#135, isEmpty#136] +Arguments: hashpartitioning(w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(44) CometHashAggregate +Input [55]: [w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126, sum#127, isEmpty#128, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134, sum#135, isEmpty#136] +Keys [7]: [w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, d_year#85] +Functions [24]: [sum(CASE WHEN (d_moy#86 = 1) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 2) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 3) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 4) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 5) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 6) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 7) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 8) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 9) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 10) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 11) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 12) THEN (cs_sales_price#74 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 1) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 2) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 3) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 4) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 5) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 6) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 7) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 8) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 9) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 10) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 11) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#86 = 12) THEN (cs_net_paid_inc_tax#75 * cast(cs_quantity#73 as decimal(10,0))) ELSE 0.00 END)] + +(45) CometUnion +Child 0 Input [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138, jan_sales#139, feb_sales#140, mar_sales#141, apr_sales#142, may_sales#143, jun_sales#144, jul_sales#145, aug_sales#146, sep_sales#147, oct_sales#148, nov_sales#149, dec_sales#150, jan_net#151, feb_net#152, mar_net#153, apr_net#154, may_net#155, jun_net#156, jul_net#157, aug_net#158, sep_net#159, oct_net#160, nov_net#161, dec_net#162] +Child 1 Input [32]: [w_warehouse_name#78, w_warehouse_sq_ft#79, w_city#80, w_county#81, w_state#82, w_country#83, ship_carriers#163, year#164, jan_sales#165, feb_sales#166, mar_sales#167, apr_sales#168, may_sales#169, jun_sales#170, jul_sales#171, aug_sales#172, sep_sales#173, oct_sales#174, nov_sales#175, dec_sales#176, jan_net#177, feb_net#178, mar_net#179, apr_net#180, may_net#181, jun_net#182, jul_net#183, aug_net#184, sep_net#185, oct_net#186, nov_net#187, dec_net#188] + +(46) CometHashAggregate +Input [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138, jan_sales#139, feb_sales#140, mar_sales#141, apr_sales#142, may_sales#143, jun_sales#144, jul_sales#145, aug_sales#146, sep_sales#147, oct_sales#148, nov_sales#149, dec_sales#150, jan_net#151, feb_net#152, mar_net#153, apr_net#154, may_net#155, jun_net#156, jul_net#157, aug_net#158, sep_net#159, oct_net#160, nov_net#161, dec_net#162] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138] +Functions [36]: [partial_sum(jan_sales#139), partial_sum(feb_sales#140), partial_sum(mar_sales#141), partial_sum(apr_sales#142), partial_sum(may_sales#143), partial_sum(jun_sales#144), partial_sum(jul_sales#145), partial_sum(aug_sales#146), partial_sum(sep_sales#147), partial_sum(oct_sales#148), partial_sum(nov_sales#149), partial_sum(dec_sales#150), partial_sum((jan_sales#139 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((feb_sales#140 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((mar_sales#141 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((apr_sales#142 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((may_sales#143 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((jun_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((jul_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((aug_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((sep_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((oct_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((nov_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((dec_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum(jan_net#151), partial_sum(feb_net#152), partial_sum(mar_net#153), partial_sum(apr_net#154), partial_sum(may_net#155), partial_sum(jun_net#156), partial_sum(jul_net#157), partial_sum(aug_net#158), partial_sum(sep_net#159), partial_sum(oct_net#160), partial_sum(nov_net#161), partial_sum(dec_net#162)] + +(47) CometExchange +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138, sum#189, isEmpty#190, sum#191, isEmpty#192, sum#193, isEmpty#194, sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206, sum#207, isEmpty#208, sum#209, isEmpty#210, sum#211, isEmpty#212, sum#213, isEmpty#214, sum#215, isEmpty#216, sum#217, isEmpty#218, sum#219, isEmpty#220, sum#221, isEmpty#222, sum#223, isEmpty#224, sum#225, isEmpty#226, sum#227, isEmpty#228, sum#229, isEmpty#230, sum#231, isEmpty#232, sum#233, isEmpty#234, sum#235, isEmpty#236, sum#237, isEmpty#238, sum#239, isEmpty#240, sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246, sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252, sum#253, isEmpty#254, sum#255, isEmpty#256, sum#257, isEmpty#258, sum#259, isEmpty#260] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(48) CometHashAggregate +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138, sum#189, isEmpty#190, sum#191, isEmpty#192, sum#193, isEmpty#194, sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206, sum#207, isEmpty#208, sum#209, isEmpty#210, sum#211, isEmpty#212, sum#213, isEmpty#214, sum#215, isEmpty#216, sum#217, isEmpty#218, sum#219, isEmpty#220, sum#221, isEmpty#222, sum#223, isEmpty#224, sum#225, isEmpty#226, sum#227, isEmpty#228, sum#229, isEmpty#230, sum#231, isEmpty#232, sum#233, isEmpty#234, sum#235, isEmpty#236, sum#237, isEmpty#238, sum#239, isEmpty#240, sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246, sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252, sum#253, isEmpty#254, sum#255, isEmpty#256, sum#257, isEmpty#258, sum#259, isEmpty#260] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138] +Functions [36]: [sum(jan_sales#139), sum(feb_sales#140), sum(mar_sales#141), sum(apr_sales#142), sum(may_sales#143), sum(jun_sales#144), sum(jul_sales#145), sum(aug_sales#146), sum(sep_sales#147), sum(oct_sales#148), sum(nov_sales#149), sum(dec_sales#150), sum((jan_sales#139 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((feb_sales#140 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((mar_sales#141 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((apr_sales#142 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((may_sales#143 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((jun_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((jul_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((aug_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((sep_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((oct_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((nov_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((dec_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum(jan_net#151), sum(feb_net#152), sum(mar_net#153), sum(apr_net#154), sum(may_net#155), sum(jun_net#156), sum(jul_net#157), sum(aug_net#158), sum(sep_net#159), sum(oct_net#160), sum(nov_net#161), sum(dec_net#162)] + +(49) CometTakeOrderedAndProject +Input [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138, jan_sales#261, feb_sales#262, mar_sales#263, apr_sales#264, may_sales#265, jun_sales#266, jul_sales#267, aug_sales#268, sep_sales#269, oct_sales#270, nov_sales#271, dec_sales#272, jan_sales_per_sq_foot#273, feb_sales_per_sq_foot#274, mar_sales_per_sq_foot#275, apr_sales_per_sq_foot#276, may_sales_per_sq_foot#277, jun_sales_per_sq_foot#278, jul_sales_per_sq_foot#279, aug_sales_per_sq_foot#280, sep_sales_per_sq_foot#281, oct_sales_per_sq_foot#282, nov_sales_per_sq_foot#283, dec_sales_per_sq_foot#284, jan_net#285, feb_net#286, mar_net#287, apr_net#288, may_net#289, jun_net#290, jul_net#291, aug_net#292, sep_net#293, oct_net#294, nov_net#295, dec_net#296] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#9 ASC NULLS FIRST], output=[w_warehouse_name#9,w_warehouse_sq_ft#10,w_city#11,w_county#12,w_state#13,w_country#14,ship_carriers#137,year#138,jan_sales#261,feb_sales#262,mar_sales#263,apr_sales#264,may_sales#265,jun_sales#266,jul_sales#267,aug_sales#268,sep_sales#269,oct_sales#270,nov_sales#271,dec_sales#272,jan_sales_per_sq_foot#273,feb_sales_per_sq_foot#274,mar_sales_per_sq_foot#275,apr_sales_per_sq_foot#276,may_sales_per_sq_foot#277,jun_sales_per_sq_foot#278,jul_sales_per_sq_foot#279,aug_sales_per_sq_foot#280,sep_sales_per_sq_foot#281,oct_sales_per_sq_foot#282,nov_sales_per_sq_foot#283,dec_sales_per_sq_foot#284,jan_net#285,feb_net#286,mar_net#287,apr_net#288,may_net#289,jun_net#290,jul_net#291,aug_net#292,sep_net#293,oct_net#294,nov_net#295,dec_net#296]), [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138, jan_sales#261, feb_sales#262, mar_sales#263, apr_sales#264, may_sales#265, jun_sales#266, jul_sales#267, aug_sales#268, sep_sales#269, oct_sales#270, nov_sales#271, dec_sales#272, jan_sales_per_sq_foot#273, feb_sales_per_sq_foot#274, mar_sales_per_sq_foot#275, apr_sales_per_sq_foot#276, may_sales_per_sq_foot#277, jun_sales_per_sq_foot#278, jul_sales_per_sq_foot#279, aug_sales_per_sq_foot#280, sep_sales_per_sq_foot#281, oct_sales_per_sq_foot#282, nov_sales_per_sq_foot#283, dec_sales_per_sq_foot#284, jan_net#285, feb_net#286, mar_net#287, apr_net#288, may_net#289, jun_net#290, jul_net#291, aug_net#292, sep_net#293, oct_net#294, nov_net#295, dec_net#296], 100, [w_warehouse_name#9 ASC NULLS FIRST], [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138, jan_sales#261, feb_sales#262, mar_sales#263, apr_sales#264, may_sales#265, jun_sales#266, jul_sales#267, aug_sales#268, sep_sales#269, oct_sales#270, nov_sales#271, dec_sales#272, jan_sales_per_sq_foot#273, feb_sales_per_sq_foot#274, mar_sales_per_sq_foot#275, apr_sales_per_sq_foot#276, may_sales_per_sq_foot#277, jun_sales_per_sq_foot#278, jul_sales_per_sq_foot#279, aug_sales_per_sq_foot#280, sep_sales_per_sq_foot#281, oct_sales_per_sq_foot#282, nov_sales_per_sq_foot#283, dec_sales_per_sq_foot#284, jan_net#285, feb_net#286, mar_net#287, apr_net#288, may_net#289, jun_net#290, jul_net#291, aug_net#292, sep_net#293, oct_net#294, nov_net#295, dec_net#296] + +(50) ColumnarToRow [codegen id : 1] +Input [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#137, year#138, jan_sales#261, feb_sales#262, mar_sales#263, apr_sales#264, may_sales#265, jun_sales#266, jul_sales#267, aug_sales#268, sep_sales#269, oct_sales#270, nov_sales#271, dec_sales#272, jan_sales_per_sq_foot#273, feb_sales_per_sq_foot#274, mar_sales_per_sq_foot#275, apr_sales_per_sq_foot#276, may_sales_per_sq_foot#277, jun_sales_per_sq_foot#278, jul_sales_per_sq_foot#279, aug_sales_per_sq_foot#280, sep_sales_per_sq_foot#281, oct_sales_per_sq_foot#282, nov_sales_per_sq_foot#283, dec_sales_per_sq_foot#284, jan_net#285, feb_net#286, mar_net#287, apr_net#288, may_net#289, jun_net#290, jul_net#291, aug_net#292, sep_net#293, oct_net#294, nov_net#295, dec_net#296] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..72cdf51a1f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q66.native_iceberg_compat/simplified.txt @@ -0,0 +1,52 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum((jan_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((feb_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((mar_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((apr_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((may_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jun_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jul_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((aug_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((sep_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((oct_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((nov_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((dec_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net)] + CometExchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1 + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] + CometUnion [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum(CASE WHEN (d_moy = 1) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END)] + CometExchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2 + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,d_moy,ws_ext_sales_price,ws_quantity,ws_net_paid] + CometProject [ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,sm_ship_mode_sk] + CometProject [ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,t_time_sk] + CometProject [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_date_sk,d_year,d_moy] + CometProject [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometFilter [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] #3 + CometFilter [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [t_time_sk] #5 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_time] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_time] + CometBroadcastExchange [sm_ship_mode_sk] #6 + CometProject [sm_ship_mode_sk] + CometFilter [sm_ship_mode_sk,sm_carrier] + CometScan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_carrier] + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum(CASE WHEN (d_moy = 1) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END)] + CometExchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #7 + CometHashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,d_moy,cs_sales_price,cs_quantity,cs_net_paid_inc_tax] + CometProject [cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,sm_ship_mode_sk] + CometProject [cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [cs_sold_time_sk,cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,t_time_sk] + CometProject [cs_sold_time_sk,cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [cs_sold_time_sk,cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_date_sk,d_year,d_moy] + CometProject [cs_sold_time_sk,cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometBroadcastHashJoin [cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk,w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometFilter [cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk] + ReusedExchange [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] #3 + ReusedExchange [d_date_sk,d_year,d_moy] #4 + ReusedExchange [t_time_sk] #5 + ReusedExchange [sm_ship_mode_sk] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_datafusion/explain.txt new file mode 100644 index 0000000000..99de5c768f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_datafusion/explain.txt @@ -0,0 +1,162 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Filter (30) + +- Window (29) + +- WindowGroupLimit (28) + +- * Sort (27) + +- Exchange (26) + +- WindowGroupLimit (25) + +- * ColumnarToRow (24) + +- CometSort (23) + +- CometHashAggregate (22) + +- CometExchange (21) + +- CometHashAggregate (20) + +- CometExpand (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`item` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(4) CometFilter +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10], [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(6) CometBroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Right output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10], [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#11, s_store_id#12] +Arguments: [s_store_sk#11, s_store_id#12] + +(10) CometFilter +Input [2]: [s_store_sk#11, s_store_id#12] +Condition : isnotnull(s_store_sk#11) + +(11) CometBroadcastExchange +Input [2]: [s_store_sk#11, s_store_id#12] +Arguments: [s_store_sk#11, s_store_id#12] + +(12) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] +Right output [2]: [s_store_sk#11, s_store_id#12] +Arguments: [ss_store_sk#2], [s_store_sk#11], Inner, BuildRight + +(13) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] +Arguments: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12], [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] + +(14) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(15) CometFilter +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) + +(16) CometBroadcastExchange +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(17) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] +Right output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [ss_item_sk#1], [i_item_sk#13], Inner, BuildRight + +(18) CometProject +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] + +(19) CometExpand +Input [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 0], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null, 1], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null, null, 3], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null, null, null, 7], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, null, null, null, null, 15], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, null, null, null, null, null, 31], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, null, null, null, null, null, null, 63], [ss_quantity#3, ss_sales_price#4, i_category#16, null, null, null, null, null, null, null, 127], [ss_quantity#3, ss_sales_price#4, null, null, null, null, null, null, null, null, 255]], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] + +(20) CometHashAggregate +Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] + +(21) CometExchange +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#27, isEmpty#28] +Arguments: hashpartitioning(i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#27, isEmpty#28] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] + +(23) CometSort +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29], [i_category#18 ASC NULLS FIRST, sumsales#29 DESC NULLS LAST] + +(24) ColumnarToRow [codegen id : 1] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] + +(25) WindowGroupLimit +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: [i_category#18], [sumsales#29 DESC NULLS LAST], rank(sumsales#29), 100, Partial + +(26) Exchange +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: hashpartitioning(i_category#18, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(27) Sort [codegen id : 2] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: [i_category#18 ASC NULLS FIRST, sumsales#29 DESC NULLS LAST], false, 0 + +(28) WindowGroupLimit +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: [i_category#18], [sumsales#29 DESC NULLS LAST], rank(sumsales#29), 100, Final + +(29) Window +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: [rank(sumsales#29) windowspecdefinition(i_category#18, sumsales#29 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#30], [i_category#18], [sumsales#29 DESC NULLS LAST] + +(30) Filter [codegen id : 3] +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29, rk#30] +Condition : (rk#30 <= 100) + +(31) TakeOrderedAndProject +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29, rk#30] +Arguments: 100, [i_category#18 ASC NULLS FIRST, i_class#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, i_product_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_qoy#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, sumsales#29 ASC NULLS FIRST, rk#30 ASC NULLS FIRST], [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29, rk#30] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_datafusion/simplified.txt new file mode 100644 index 0000000000..138c373993 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_datafusion/simplified.txt @@ -0,0 +1,37 @@ +TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] + WholeStageCodegen (3) + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WindowGroupLimit [i_category,sumsales] + WholeStageCodegen (2) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WindowGroupLimit [i_category,sumsales] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,spark_grouping_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + CometExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty,ss_sales_price,ss_quantity] + CometExpand [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] + CometProject [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_sk,s_store_id] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_moy,d_qoy] + CometFilter [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy,d_qoy] #3 + CometProject [d_date_sk,d_year,d_moy,d_qoy] + CometFilter [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + CometBroadcastExchange [s_store_sk,s_store_id] #4 + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #5 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_product_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..4dbd53b80b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_iceberg_compat/explain.txt @@ -0,0 +1,175 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Filter (30) + +- Window (29) + +- WindowGroupLimit (28) + +- * Sort (27) + +- Exchange (26) + +- WindowGroupLimit (25) + +- * ColumnarToRow (24) + +- CometSort (23) + +- CometHashAggregate (22) + +- CometExchange (21) + +- CometHashAggregate (20) + +- CometExpand (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.date_dim (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometScan parquet spark_catalog.default.store (9) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometScan parquet spark_catalog.default.item (14) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10], [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(6) CometBroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Right output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10], [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] + +(9) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_store_id#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [s_store_sk#11, s_store_id#12] +Condition : isnotnull(s_store_sk#11) + +(11) CometBroadcastExchange +Input [2]: [s_store_sk#11, s_store_id#12] +Arguments: [s_store_sk#11, s_store_id#12] + +(12) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] +Right output [2]: [s_store_sk#11, s_store_id#12] +Arguments: [ss_store_sk#2], [s_store_sk#11], Inner, BuildRight + +(13) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] +Arguments: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12], [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] + +(14) CometScan parquet spark_catalog.default.item +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(15) CometFilter +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) + +(16) CometBroadcastExchange +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(17) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] +Right output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [ss_item_sk#1], [i_item_sk#13], Inner, BuildRight + +(18) CometProject +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] + +(19) CometExpand +Input [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 0], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null, 1], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null, null, 3], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null, null, null, 7], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, null, null, null, null, 15], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, null, null, null, null, null, 31], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, null, null, null, null, null, null, 63], [ss_quantity#3, ss_sales_price#4, i_category#16, null, null, null, null, null, null, null, 127], [ss_quantity#3, ss_sales_price#4, null, null, null, null, null, null, null, null, 255]], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] + +(20) CometHashAggregate +Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] + +(21) CometExchange +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#27, isEmpty#28] +Arguments: hashpartitioning(i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#27, isEmpty#28] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] + +(23) CometSort +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29], [i_category#18 ASC NULLS FIRST, sumsales#29 DESC NULLS LAST] + +(24) ColumnarToRow [codegen id : 1] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] + +(25) WindowGroupLimit +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: [i_category#18], [sumsales#29 DESC NULLS LAST], rank(sumsales#29), 100, Partial + +(26) Exchange +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: hashpartitioning(i_category#18, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(27) Sort [codegen id : 2] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: [i_category#18 ASC NULLS FIRST, sumsales#29 DESC NULLS LAST], false, 0 + +(28) WindowGroupLimit +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: [i_category#18], [sumsales#29 DESC NULLS LAST], rank(sumsales#29), 100, Final + +(29) Window +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29] +Arguments: [rank(sumsales#29) windowspecdefinition(i_category#18, sumsales#29 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#30], [i_category#18], [sumsales#29 DESC NULLS LAST] + +(30) Filter [codegen id : 3] +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29, rk#30] +Condition : (rk#30 <= 100) + +(31) TakeOrderedAndProject +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29, rk#30] +Arguments: 100, [i_category#18 ASC NULLS FIRST, i_class#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, i_product_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_qoy#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, sumsales#29 ASC NULLS FIRST, rk#30 ASC NULLS FIRST], [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#29, rk#30] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..378ab01aaf --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q67.native_iceberg_compat/simplified.txt @@ -0,0 +1,37 @@ +TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] + WholeStageCodegen (3) + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WindowGroupLimit [i_category,sumsales] + WholeStageCodegen (2) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WindowGroupLimit [i_category,sumsales] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,spark_grouping_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + CometExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty,ss_sales_price,ss_quantity] + CometExpand [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] + CometProject [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_sk,s_store_id] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_moy,d_qoy] + CometFilter [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy,d_qoy] #3 + CometProject [d_date_sk,d_year,d_moy,d_qoy] + CometFilter [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + CometBroadcastExchange [s_store_sk,s_store_id] #4 + CometFilter [s_store_sk,s_store_id] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #5 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_datafusion/explain.txt new file mode 100644 index 0000000000..662ae98432 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_datafusion/explain.txt @@ -0,0 +1,199 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometTakeOrderedAndProject (37) + +- CometProject (36) + +- CometBroadcastHashJoin (35) + :- CometProject (33) + : +- CometBroadcastHashJoin (32) + : :- CometHashAggregate (28) + : : +- CometExchange (27) + : : +- CometHashAggregate (26) + : : +- CometProject (25) + : : +- CometBroadcastHashJoin (24) + : : :- CometProject (20) + : : : +- CometBroadcastHashJoin (19) + : : : :- CometProject (14) + : : : : +- CometBroadcastHashJoin (13) + : : : : :- CometProject (8) + : : : : : +- CometBroadcastHashJoin (7) + : : : : : :- CometFilter (2) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : +- CometBroadcastExchange (6) + : : : : : +- CometProject (5) + : : : : : +- CometFilter (4) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : : +- CometBroadcastExchange (12) + : : : : +- CometProject (11) + : : : : +- CometFilter (10) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : : : +- CometBroadcastExchange (18) + : : : +- CometProject (17) + : : : +- CometFilter (16) + : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + : : +- CometBroadcastExchange (23) + : : +- CometFilter (22) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (21) + : +- CometBroadcastExchange (31) + : +- CometFilter (30) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (29) + +- ReusedExchange (34) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] + +(2) CometFilter +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#10, d_year#11, d_dom#12] +Arguments: [d_date_sk#10, d_year#11, d_dom#12] + +(4) CometFilter +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Condition : ((((isnotnull(d_dom#12) AND (d_dom#12 >= 1)) AND (d_dom#12 <= 2)) AND d_year#11 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) + +(5) CometProject +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(7) CometBroadcastHashJoin +Left output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Right output [1]: [d_date_sk#10] +Arguments: [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + +(8) CometProject +Input [10]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9, d_date_sk#10] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#13, s_city#14] +Arguments: [s_store_sk#13, s_city#14] + +(10) CometFilter +Input [2]: [s_store_sk#13, s_city#14] +Condition : (s_city#14 IN (Midway,Fairview) AND isnotnull(s_store_sk#13)) + +(11) CometProject +Input [2]: [s_store_sk#13, s_city#14] +Arguments: [s_store_sk#13], [s_store_sk#13] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: [s_store_sk#13] + +(13) CometBroadcastHashJoin +Left output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Right output [1]: [s_store_sk#13] +Arguments: [ss_store_sk#4], [s_store_sk#13], Inner, BuildRight + +(14) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#13] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Arguments: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(16) CometFilter +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Condition : (((hd_dep_count#16 = 4) OR (hd_vehicle_count#17 = 3)) AND isnotnull(hd_demo_sk#15)) + +(17) CometProject +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Arguments: [hd_demo_sk#15], [hd_demo_sk#15] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#15] +Arguments: [hd_demo_sk#15] + +(19) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Right output [1]: [hd_demo_sk#15] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#15], Inner, BuildRight + +(20) CometProject +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#15] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] + +(21) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#18, ca_city#19] +Arguments: [ca_address_sk#18, ca_city#19] + +(22) CometFilter +Input [2]: [ca_address_sk#18, ca_city#19] +Condition : (isnotnull(ca_address_sk#18) AND isnotnull(ca_city#19)) + +(23) CometBroadcastExchange +Input [2]: [ca_address_sk#18, ca_city#19] +Arguments: [ca_address_sk#18, ca_city#19] + +(24) CometBroadcastHashJoin +Left output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Right output [2]: [ca_address_sk#18, ca_city#19] +Arguments: [ss_addr_sk#3], [ca_address_sk#18], Inner, BuildRight + +(25) CometProject +Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#18, ca_city#19] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] + +(26) CometHashAggregate +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(UnscaledValue(ss_ext_list_price#7)), partial_sum(UnscaledValue(ss_ext_tax#8))] + +(27) CometExchange +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#20, sum#21, sum#22] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(28) CometHashAggregate +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#20, sum#21, sum#22] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(UnscaledValue(ss_ext_list_price#7)), sum(UnscaledValue(ss_ext_tax#8))] + +(29) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Arguments: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] + +(30) CometFilter +Input [4]: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Condition : (isnotnull(c_customer_sk#23) AND isnotnull(c_current_addr_sk#24)) + +(31) CometBroadcastExchange +Input [4]: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Arguments: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] + +(32) CometBroadcastHashJoin +Left output [6]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#27, extended_price#28, list_price#29, extended_tax#30] +Right output [4]: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Arguments: [ss_customer_sk#1], [c_customer_sk#23], Inner, BuildRight + +(33) CometProject +Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#27, extended_price#28, list_price#29, extended_tax#30, c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Arguments: [ss_ticket_number#5, bought_city#27, extended_price#28, list_price#29, extended_tax#30, c_current_addr_sk#24, c_first_name#25, c_last_name#26], [ss_ticket_number#5, bought_city#27, extended_price#28, list_price#29, extended_tax#30, c_current_addr_sk#24, c_first_name#25, c_last_name#26] + +(34) ReusedExchange [Reuses operator id: 23] +Output [2]: [ca_address_sk#31, ca_city#32] + +(35) CometBroadcastHashJoin +Left output [8]: [ss_ticket_number#5, bought_city#27, extended_price#28, list_price#29, extended_tax#30, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Right output [2]: [ca_address_sk#31, ca_city#32] +Arguments: [c_current_addr_sk#24], [ca_address_sk#31], Inner, NOT (ca_city#32 = bought_city#27), BuildRight + +(36) CometProject +Input [10]: [ss_ticket_number#5, bought_city#27, extended_price#28, list_price#29, extended_tax#30, c_current_addr_sk#24, c_first_name#25, c_last_name#26, ca_address_sk#31, ca_city#32] +Arguments: [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29], [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29] + +(37) CometTakeOrderedAndProject +Input [8]: [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_last_name#26 ASC NULLS FIRST,ss_ticket_number#5 ASC NULLS FIRST], output=[c_last_name#26,c_first_name#25,ca_city#32,bought_city#27,ss_ticket_number#5,extended_price#28,extended_tax#30,list_price#29]), [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29], 100, [c_last_name#26 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29] + +(38) ColumnarToRow [codegen id : 1] +Input [8]: [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9fbeb0195c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_datafusion/simplified.txt @@ -0,0 +1,40 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,extended_price,extended_tax,list_price] + CometProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,extended_price,extended_tax,list_price] + CometBroadcastHashJoin [ss_ticket_number,bought_city,extended_price,list_price,extended_tax,c_current_addr_sk,c_first_name,c_last_name,ca_address_sk,ca_city] + CometProject [ss_ticket_number,bought_city,extended_price,list_price,extended_tax,c_current_addr_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,bought_city,extended_price,list_price,extended_tax,c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + CometHashAggregate [ss_ticket_number,ss_customer_sk,bought_city,extended_price,list_price,extended_tax,ss_addr_sk,ca_city,sum,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_tax))] + CometExchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + CometHashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum,sum,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ca_city] + CometBroadcastHashJoin [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ca_address_sk,ca_city] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #3 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_city] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_city] + CometBroadcastExchange [hd_demo_sk] #4 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [ca_address_sk,ca_city] #5 + CometFilter [ca_address_sk,ca_city] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_city] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] #6 + CometFilter [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..b05b902ba6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_iceberg_compat/explain.txt @@ -0,0 +1,218 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometTakeOrderedAndProject (37) + +- CometProject (36) + +- CometBroadcastHashJoin (35) + :- CometProject (33) + : +- CometBroadcastHashJoin (32) + : :- CometHashAggregate (28) + : : +- CometExchange (27) + : : +- CometHashAggregate (26) + : : +- CometProject (25) + : : +- CometBroadcastHashJoin (24) + : : :- CometProject (20) + : : : +- CometBroadcastHashJoin (19) + : : : :- CometProject (14) + : : : : +- CometBroadcastHashJoin (13) + : : : : :- CometProject (8) + : : : : : +- CometBroadcastHashJoin (7) + : : : : : :- CometFilter (2) + : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : +- CometBroadcastExchange (6) + : : : : : +- CometProject (5) + : : : : : +- CometFilter (4) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : : : +- CometBroadcastExchange (12) + : : : : +- CometProject (11) + : : : : +- CometFilter (10) + : : : : +- CometScan parquet spark_catalog.default.store (9) + : : : +- CometBroadcastExchange (18) + : : : +- CometProject (17) + : : : +- CometFilter (16) + : : : +- CometScan parquet spark_catalog.default.household_demographics (15) + : : +- CometBroadcastExchange (23) + : : +- CometFilter (22) + : : +- CometScan parquet spark_catalog.default.customer_address (21) + : +- CometBroadcastExchange (31) + : +- CometFilter (30) + : +- CometScan parquet spark_catalog.default.customer (29) + +- ReusedExchange (34) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#9)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_customer_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_dom#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Condition : ((((isnotnull(d_dom#12) AND (d_dom#12 >= 1)) AND (d_dom#12 <= 2)) AND d_year#11 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) + +(5) CometProject +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(7) CometBroadcastHashJoin +Left output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Right output [1]: [d_date_sk#10] +Arguments: [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + +(8) CometProject +Input [10]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9, d_date_sk#10] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] + +(9) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#13, s_city#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [s_store_sk#13, s_city#14] +Condition : (s_city#14 IN (Midway,Fairview) AND isnotnull(s_store_sk#13)) + +(11) CometProject +Input [2]: [s_store_sk#13, s_city#14] +Arguments: [s_store_sk#13], [s_store_sk#13] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: [s_store_sk#13] + +(13) CometBroadcastHashJoin +Left output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Right output [1]: [s_store_sk#13] +Arguments: [ss_store_sk#4], [s_store_sk#13], Inner, BuildRight + +(14) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#13] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] + +(15) CometScan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(16) CometFilter +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Condition : (((hd_dep_count#16 = 4) OR (hd_vehicle_count#17 = 3)) AND isnotnull(hd_demo_sk#15)) + +(17) CometProject +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Arguments: [hd_demo_sk#15], [hd_demo_sk#15] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#15] +Arguments: [hd_demo_sk#15] + +(19) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Right output [1]: [hd_demo_sk#15] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#15], Inner, BuildRight + +(20) CometProject +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#15] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] + +(21) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#18, ca_city#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(22) CometFilter +Input [2]: [ca_address_sk#18, ca_city#19] +Condition : (isnotnull(ca_address_sk#18) AND isnotnull(ca_city#19)) + +(23) CometBroadcastExchange +Input [2]: [ca_address_sk#18, ca_city#19] +Arguments: [ca_address_sk#18, ca_city#19] + +(24) CometBroadcastHashJoin +Left output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Right output [2]: [ca_address_sk#18, ca_city#19] +Arguments: [ss_addr_sk#3], [ca_address_sk#18], Inner, BuildRight + +(25) CometProject +Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#18, ca_city#19] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] + +(26) CometHashAggregate +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(UnscaledValue(ss_ext_list_price#7)), partial_sum(UnscaledValue(ss_ext_tax#8))] + +(27) CometExchange +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#20, sum#21, sum#22] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(28) CometHashAggregate +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#20, sum#21, sum#22] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(UnscaledValue(ss_ext_list_price#7)), sum(UnscaledValue(ss_ext_tax#8))] + +(29) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(30) CometFilter +Input [4]: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Condition : (isnotnull(c_customer_sk#23) AND isnotnull(c_current_addr_sk#24)) + +(31) CometBroadcastExchange +Input [4]: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Arguments: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] + +(32) CometBroadcastHashJoin +Left output [6]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#27, extended_price#28, list_price#29, extended_tax#30] +Right output [4]: [c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Arguments: [ss_customer_sk#1], [c_customer_sk#23], Inner, BuildRight + +(33) CometProject +Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#27, extended_price#28, list_price#29, extended_tax#30, c_customer_sk#23, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Arguments: [ss_ticket_number#5, bought_city#27, extended_price#28, list_price#29, extended_tax#30, c_current_addr_sk#24, c_first_name#25, c_last_name#26], [ss_ticket_number#5, bought_city#27, extended_price#28, list_price#29, extended_tax#30, c_current_addr_sk#24, c_first_name#25, c_last_name#26] + +(34) ReusedExchange [Reuses operator id: 23] +Output [2]: [ca_address_sk#31, ca_city#32] + +(35) CometBroadcastHashJoin +Left output [8]: [ss_ticket_number#5, bought_city#27, extended_price#28, list_price#29, extended_tax#30, c_current_addr_sk#24, c_first_name#25, c_last_name#26] +Right output [2]: [ca_address_sk#31, ca_city#32] +Arguments: [c_current_addr_sk#24], [ca_address_sk#31], Inner, NOT (ca_city#32 = bought_city#27), BuildRight + +(36) CometProject +Input [10]: [ss_ticket_number#5, bought_city#27, extended_price#28, list_price#29, extended_tax#30, c_current_addr_sk#24, c_first_name#25, c_last_name#26, ca_address_sk#31, ca_city#32] +Arguments: [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29], [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29] + +(37) CometTakeOrderedAndProject +Input [8]: [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_last_name#26 ASC NULLS FIRST,ss_ticket_number#5 ASC NULLS FIRST], output=[c_last_name#26,c_first_name#25,ca_city#32,bought_city#27,ss_ticket_number#5,extended_price#28,extended_tax#30,list_price#29]), [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29], 100, [c_last_name#26 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29] + +(38) ColumnarToRow [codegen id : 1] +Input [8]: [c_last_name#26, c_first_name#25, ca_city#32, bought_city#27, ss_ticket_number#5, extended_price#28, extended_tax#30, list_price#29] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..5d636231fa --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q68.native_iceberg_compat/simplified.txt @@ -0,0 +1,40 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,extended_price,extended_tax,list_price] + CometProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,extended_price,extended_tax,list_price] + CometBroadcastHashJoin [ss_ticket_number,bought_city,extended_price,list_price,extended_tax,c_current_addr_sk,c_first_name,c_last_name,ca_address_sk,ca_city] + CometProject [ss_ticket_number,bought_city,extended_price,list_price,extended_tax,c_current_addr_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,bought_city,extended_price,list_price,extended_tax,c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + CometHashAggregate [ss_ticket_number,ss_customer_sk,bought_city,extended_price,list_price,extended_tax,ss_addr_sk,ca_city,sum,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_tax))] + CometExchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + CometHashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum,sum,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ca_city] + CometBroadcastHashJoin [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ca_address_sk,ca_city] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #3 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_city] + CometScan parquet spark_catalog.default.store [s_store_sk,s_city] + CometBroadcastExchange [hd_demo_sk] #4 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [ca_address_sk,ca_city] #5 + CometFilter [ca_address_sk,ca_city] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] #6 + CometFilter [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_datafusion/explain.txt new file mode 100644 index 0000000000..709357a617 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_datafusion/explain.txt @@ -0,0 +1,209 @@ +== Physical Plan == +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (22) + : : +- * BroadcastHashJoin LeftAnti BuildRight (21) + : : :- * BroadcastHashJoin LeftAnti BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (13) + : : : +- ReusedExchange (14) + : : +- ReusedExchange (20) + : +- BroadcastExchange (27) + : +- * ColumnarToRow (26) + : +- CometProject (25) + : +- CometFilter (24) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (23) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(2) CometFilter +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Arguments: [ss_customer_sk#4, ss_sold_date_sk#5] + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Arguments: [d_date_sk#6, d_year#7, d_moy#8] + +(5) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2001)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 6)) AND isnotnull(d_date_sk#6)) + +(6) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#4], [ss_customer_sk#4] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: [ss_customer_sk#4] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [ss_customer_sk#4] +Arguments: [c_customer_sk#1], [ss_customer_sk#4], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(13) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Arguments: [ws_bill_customer_sk#9, ws_sold_date_sk#10] + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#11] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Right output [1]: [d_date_sk#11] +Arguments: [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] +Arguments: [ws_bill_customer_sk#9], [ws_bill_customer_sk#9] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#9] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#9] +Join type: LeftAnti +Join condition: None + +(20) ReusedExchange [Reuses operator id: 18] +Output [1]: [cs_ship_customer_sk#12] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_ship_customer_sk#12] +Join type: LeftAnti +Join condition: None + +(22) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(23) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#13, ca_state#14] +Arguments: [ca_address_sk#13, ca_state#14] + +(24) CometFilter +Input [2]: [ca_address_sk#13, ca_state#14] +Condition : (ca_state#14 IN (KY,GA,NM) AND isnotnull(ca_address_sk#13)) + +(25) CometProject +Input [2]: [ca_address_sk#13, ca_state#14] +Arguments: [ca_address_sk#13], [ca_address_sk#13] + +(26) ColumnarToRow [codegen id : 3] +Input [1]: [ca_address_sk#13] + +(27) BroadcastExchange +Input [1]: [ca_address_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#13] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#13] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [6]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Arguments: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] + +(31) CometFilter +Input [6]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Condition : isnotnull(cd_demo_sk#15) + +(32) ColumnarToRow [codegen id : 4] +Input [6]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] + +(33) BroadcastExchange +Input [6]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#15] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 5] +Output [5]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] + +(36) HashAggregate [codegen id : 5] +Input [5]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Keys [5]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#21] +Results [6]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20, count#22] + +(37) Exchange +Input [6]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20, count#22] +Arguments: hashpartitioning(cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 6] +Input [6]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20, count#22] +Keys [5]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#23] +Results [8]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, count(1)#23 AS cnt1#24, cd_purchase_estimate#19, count(1)#23 AS cnt2#25, cd_credit_rating#20, count(1)#23 AS cnt3#26] + +(39) TakeOrderedAndProject +Input [8]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cnt1#24, cd_purchase_estimate#19, cnt2#25, cd_credit_rating#20, cnt3#26] +Arguments: 100, [cd_gender#16 ASC NULLS FIRST, cd_marital_status#17 ASC NULLS FIRST, cd_education_status#18 ASC NULLS FIRST, cd_purchase_estimate#19 ASC NULLS FIRST, cd_credit_rating#20 ASC NULLS FIRST], [cd_gender#16, cd_marital_status#17, cd_education_status#18, cnt1#24, cd_purchase_estimate#19, cnt2#25, cd_credit_rating#20, cnt3#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f8b77163ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_datafusion/simplified.txt @@ -0,0 +1,53 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cnt1,cnt2,cnt3] + WholeStageCodegen (6) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,count] [count(1),cnt1,cnt2,cnt3,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] #1 + WholeStageCodegen (5) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [cs_ship_customer_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..994669f4d6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_iceberg_compat/explain.txt @@ -0,0 +1,255 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (34) + : +- * BroadcastHashJoin Inner BuildRight (33) + : :- * Project (27) + : : +- * BroadcastHashJoin LeftAnti BuildRight (26) + : : :- * BroadcastHashJoin LeftAnti BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometScan parquet spark_catalog.default.store_sales (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometScan parquet spark_catalog.default.date_dim (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometScan parquet spark_catalog.default.web_sales (13) + : : : +- ReusedExchange (14) + : : +- BroadcastExchange (25) + : : +- * ColumnarToRow (24) + : : +- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometScan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (21) + : +- BroadcastExchange (32) + : +- * ColumnarToRow (31) + : +- CometProject (30) + : +- CometFilter (29) + : +- CometScan parquet spark_catalog.default.customer_address (28) + +- BroadcastExchange (38) + +- * ColumnarToRow (37) + +- CometFilter (36) + +- CometScan parquet spark_catalog.default.customer_demographics (35) + + +(1) CometScan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +ReadSchema: struct + +(4) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2001)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 6)) AND isnotnull(d_date_sk#6)) + +(6) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#4], [ss_customer_sk#4] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: [ss_customer_sk#4] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [ss_customer_sk#4] +Arguments: [c_customer_sk#1], [ss_customer_sk#4], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(13) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#10)] +ReadSchema: struct + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#11] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Right output [1]: [d_date_sk#11] +Arguments: [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] +Arguments: [ws_bill_customer_sk#9], [ws_bill_customer_sk#9] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#9] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#9] +Join type: LeftAnti +Join condition: None + +(20) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#13)] +ReadSchema: struct + +(21) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#14] + +(22) CometBroadcastHashJoin +Left output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] +Right output [1]: [d_date_sk#14] +Arguments: [cs_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + +(23) CometProject +Input [3]: [cs_ship_customer_sk#12, cs_sold_date_sk#13, d_date_sk#14] +Arguments: [cs_ship_customer_sk#12], [cs_ship_customer_sk#12] + +(24) ColumnarToRow [codegen id : 2] +Input [1]: [cs_ship_customer_sk#12] + +(25) BroadcastExchange +Input [1]: [cs_ship_customer_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_ship_customer_sk#12] +Join type: LeftAnti +Join condition: None + +(27) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(28) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#15, ca_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [GA,KY,NM]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(29) CometFilter +Input [2]: [ca_address_sk#15, ca_state#16] +Condition : (ca_state#16 IN (KY,GA,NM) AND isnotnull(ca_address_sk#15)) + +(30) CometProject +Input [2]: [ca_address_sk#15, ca_state#16] +Arguments: [ca_address_sk#15], [ca_address_sk#15] + +(31) ColumnarToRow [codegen id : 3] +Input [1]: [ca_address_sk#15] + +(32) BroadcastExchange +Input [1]: [ca_address_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(33) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#15] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 5] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#15] + +(35) CometScan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(36) CometFilter +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Condition : isnotnull(cd_demo_sk#17) + +(37) ColumnarToRow [codegen id : 4] +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] + +(38) BroadcastExchange +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(39) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 5] +Output [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] + +(41) HashAggregate [codegen id : 5] +Input [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Keys [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [6]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, count#24] + +(42) Exchange +Input [6]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, count#24] +Arguments: hashpartitioning(cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(43) HashAggregate [codegen id : 6] +Input [6]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, count#24] +Keys [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, count(1)#25 AS cnt1#26, cd_purchase_estimate#21, count(1)#25 AS cnt2#27, cd_credit_rating#22, count(1)#25 AS cnt3#28] + +(44) TakeOrderedAndProject +Input [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cnt1#26, cd_purchase_estimate#21, cnt2#27, cd_credit_rating#22, cnt3#28] +Arguments: 100, [cd_gender#18 ASC NULLS FIRST, cd_marital_status#19 ASC NULLS FIRST, cd_education_status#20 ASC NULLS FIRST, cd_purchase_estimate#21 ASC NULLS FIRST, cd_credit_rating#22 ASC NULLS FIRST], [cd_gender#18, cd_marital_status#19, cd_education_status#20, cnt1#26, cd_purchase_estimate#21, cnt2#27, cd_credit_rating#22, cnt3#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..05daff4e3c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q69.native_iceberg_compat/simplified.txt @@ -0,0 +1,60 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cnt1,cnt2,cnt3] + WholeStageCodegen (6) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,count] [count(1),cnt1,cnt2,cnt3,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] #1 + WholeStageCodegen (5) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometProject [cs_ship_customer_sk] + CometBroadcastHashJoin [cs_ship_customer_sk,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_datafusion/explain.txt new file mode 100644 index 0000000000..135833f1ca --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_datafusion/explain.txt @@ -0,0 +1,158 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometTakeOrderedAndProject (29) + +- CometHashAggregate (28) + +- CometExchange (27) + +- CometHashAggregate (26) + +- CometProject (25) + +- CometBroadcastHashJoin (24) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (15) + +- CometBroadcastExchange (23) + +- CometProject (22) + +- CometFilter (21) + +- CometNativeScan: `spark_catalog`.`default`.`promotion` (20) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [ss_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8], [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7], [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] + +(15) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#15, i_item_id#16] +Arguments: [i_item_sk#15, i_item_id#16] + +(16) CometFilter +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) + +(17) CometBroadcastExchange +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: [i_item_sk#15, i_item_id#16] + +(18) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Right output [2]: [i_item_sk#15, i_item_id#16] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#15, i_item_id#16] +Arguments: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16], [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] + +(20) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Arguments: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(21) CometFilter +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) + +(22) CometProject +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Arguments: [p_promo_sk#17], [p_promo_sk#17] + +(23) CometBroadcastExchange +Input [1]: [p_promo_sk#17] +Arguments: [p_promo_sk#17] + +(24) CometBroadcastHashJoin +Left output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Right output [1]: [p_promo_sk#17] +Arguments: [ss_promo_sk#3], [p_promo_sk#17], Inner, BuildRight + +(25) CometProject +Input [7]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16, p_promo_sk#17] +Arguments: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] + +(26) CometHashAggregate +Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] +Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] + +(27) CometExchange +Input [9]: [i_item_id#16, sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(28) CometHashAggregate +Input [9]: [i_item_id#16, sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Keys [1]: [i_item_id#16] +Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] + +(29) CometTakeOrderedAndProject +Input [5]: [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#16 ASC NULLS FIRST], output=[i_item_id#16,agg1#28,agg2#29,agg3#30,agg4#31]), [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31], 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] + +(30) ColumnarToRow [codegen id : 1] +Input [5]: [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a0d42bf3ba --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_datafusion/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + CometHashAggregate [i_item_id,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count,avg(ss_quantity),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price))] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] + CometProject [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + CometBroadcastHashJoin [ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,p_promo_sk] + CometProject [ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #4 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [p_promo_sk] #5 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_email,p_channel_event] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk,p_channel_email,p_channel_event] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..7f59f1254c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_iceberg_compat/explain.txt @@ -0,0 +1,174 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometTakeOrderedAndProject (29) + +- CometHashAggregate (28) + +- CometExchange (27) + +- CometHashAggregate (26) + +- CometProject (25) + +- CometBroadcastHashJoin (24) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.customer_demographics (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.date_dim (9) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometScan parquet spark_catalog.default.item (15) + +- CometBroadcastExchange (23) + +- CometProject (22) + +- CometFilter (21) + +- CometScan parquet spark_catalog.default.promotion (20) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(3) CometScan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [ss_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8], [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(9) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7], [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] + +(15) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(16) CometFilter +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) + +(17) CometBroadcastExchange +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: [i_item_sk#15, i_item_id#16] + +(18) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Right output [2]: [i_item_sk#15, i_item_id#16] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#15, i_item_id#16] +Arguments: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16], [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] + +(20) CometScan parquet spark_catalog.default.promotion +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(21) CometFilter +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) + +(22) CometProject +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Arguments: [p_promo_sk#17], [p_promo_sk#17] + +(23) CometBroadcastExchange +Input [1]: [p_promo_sk#17] +Arguments: [p_promo_sk#17] + +(24) CometBroadcastHashJoin +Left output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Right output [1]: [p_promo_sk#17] +Arguments: [ss_promo_sk#3], [p_promo_sk#17], Inner, BuildRight + +(25) CometProject +Input [7]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16, p_promo_sk#17] +Arguments: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] + +(26) CometHashAggregate +Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] +Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] + +(27) CometExchange +Input [9]: [i_item_id#16, sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(28) CometHashAggregate +Input [9]: [i_item_id#16, sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Keys [1]: [i_item_id#16] +Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] + +(29) CometTakeOrderedAndProject +Input [5]: [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#16 ASC NULLS FIRST], output=[i_item_id#16,agg1#28,agg2#29,agg3#30,agg4#31]), [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31], 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] + +(30) ColumnarToRow [codegen id : 1] +Input [5]: [i_item_id#16, agg1#28, agg2#29, agg3#30, agg4#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..904a9e0eac --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q7.native_iceberg_compat/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + CometHashAggregate [i_item_id,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count,avg(ss_quantity),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price))] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] + CometProject [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + CometBroadcastHashJoin [ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,p_promo_sk] + CometProject [ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #4 + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] + CometBroadcastExchange [p_promo_sk] #5 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_email,p_channel_event] + CometScan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_datafusion/explain.txt new file mode 100644 index 0000000000..a2e5e6afea --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_datafusion/explain.txt @@ -0,0 +1,239 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * Project (44) + +- Window (43) + +- * Sort (42) + +- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Expand (37) + +- * Project (36) + +- * BroadcastHashJoin Inner BuildRight (35) + :- * ColumnarToRow (9) + : +- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : +- CometBroadcastExchange (6) + : +- CometProject (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + +- BroadcastExchange (34) + +- * BroadcastHashJoin LeftSemi BuildRight (33) + :- * ColumnarToRow (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (10) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- Window (29) + +- WindowGroupLimit (28) + +- * ColumnarToRow (27) + +- CometSort (26) + +- CometHashAggregate (25) + +- CometExchange (24) + +- CometHashAggregate (23) + +- CometProject (22) + +- CometBroadcastHashJoin (21) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (13) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (15) + +- ReusedExchange (20) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Arguments: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4, d_month_seq#5] + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_store_sk#1, ss_net_profit#2], [ss_store_sk#1, ss_net_profit#2] + +(9) ColumnarToRow [codegen id : 4] +Input [2]: [ss_store_sk#1, ss_net_profit#2] + +(10) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: [s_store_sk#6, s_county#7, s_state#8] + +(11) CometFilter +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Condition : isnotnull(s_store_sk#6) + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] + +(13) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Arguments: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] + +(14) CometFilter +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_store_sk#9) + +(15) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#12, s_state#13] +Arguments: [s_store_sk#12, s_state#13] + +(16) CometFilter +Input [2]: [s_store_sk#12, s_state#13] +Condition : isnotnull(s_store_sk#12) + +(17) CometBroadcastExchange +Input [2]: [s_store_sk#12, s_state#13] +Arguments: [s_store_sk#12, s_state#13] + +(18) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Right output [2]: [s_store_sk#12, s_state#13] +Arguments: [ss_store_sk#9], [s_store_sk#12], Inner, BuildRight + +(19) CometProject +Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] +Arguments: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13], [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] + +(20) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] +Right output [1]: [d_date_sk#14] +Arguments: [ss_sold_date_sk#11], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] +Arguments: [ss_net_profit#10, s_state#13], [ss_net_profit#10, s_state#13] + +(23) CometHashAggregate +Input [2]: [ss_net_profit#10, s_state#13] +Keys [1]: [s_state#13] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] + +(24) CometExchange +Input [2]: [s_state#13, sum#15] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(25) CometHashAggregate +Input [2]: [s_state#13, sum#15] +Keys [1]: [s_state#13] +Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] + +(26) CometSort +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [s_state#13, _w0#16, s_state#13], [s_state#13 ASC NULLS FIRST, _w0#16 DESC NULLS LAST] + +(27) ColumnarToRow [codegen id : 1] +Input [3]: [s_state#13, _w0#16, s_state#13] + +(28) WindowGroupLimit +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [s_state#13], [_w0#16 DESC NULLS LAST], rank(_w0#16), 5, Final + +(29) Window +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [rank(_w0#16) windowspecdefinition(s_state#13, _w0#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#17], [s_state#13], [_w0#16 DESC NULLS LAST] + +(30) Filter [codegen id : 2] +Input [4]: [s_state#13, _w0#16, s_state#13, ranking#17] +Condition : (ranking#17 <= 5) + +(31) Project [codegen id : 2] +Output [1]: [s_state#13] +Input [4]: [s_state#13, _w0#16, s_state#13, ranking#17] + +(32) BroadcastExchange +Input [1]: [s_state#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] + +(33) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [s_state#8] +Right keys [1]: [s_state#13] +Join type: LeftSemi +Join condition: None + +(34) BroadcastExchange +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(35) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#6] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 4] +Output [3]: [ss_net_profit#2, s_state#8, s_county#7] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] + +(37) Expand [codegen id : 4] +Input [3]: [ss_net_profit#2, s_state#8, s_county#7] +Arguments: [[ss_net_profit#2, s_state#8, s_county#7, 0], [ss_net_profit#2, s_state#8, null, 1], [ss_net_profit#2, null, null, 3]], [ss_net_profit#2, s_state#18, s_county#19, spark_grouping_id#20] + +(38) HashAggregate [codegen id : 4] +Input [4]: [ss_net_profit#2, s_state#18, s_county#19, spark_grouping_id#20] +Keys [3]: [s_state#18, s_county#19, spark_grouping_id#20] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#21] +Results [4]: [s_state#18, s_county#19, spark_grouping_id#20, sum#22] + +(39) Exchange +Input [4]: [s_state#18, s_county#19, spark_grouping_id#20, sum#22] +Arguments: hashpartitioning(s_state#18, s_county#19, spark_grouping_id#20, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 5] +Input [4]: [s_state#18, s_county#19, spark_grouping_id#20, sum#22] +Keys [3]: [s_state#18, s_county#19, spark_grouping_id#20] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#23] +Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#23,17,2) AS total_sum#24, s_state#18, s_county#19, (cast((shiftright(spark_grouping_id#20, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#20, 0) & 1) as tinyint)) AS lochierarchy#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#23,17,2) AS _w0#26, (cast((shiftright(spark_grouping_id#20, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#20, 0) & 1) as tinyint)) AS _w1#27, CASE WHEN (cast((shiftright(spark_grouping_id#20, 0) & 1) as tinyint) = 0) THEN s_state#18 END AS _w2#28] + +(41) Exchange +Input [7]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, _w0#26, _w1#27, _w2#28] +Arguments: hashpartitioning(_w1#27, _w2#28, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(42) Sort [codegen id : 6] +Input [7]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, _w0#26, _w1#27, _w2#28] +Arguments: [_w1#27 ASC NULLS FIRST, _w2#28 ASC NULLS FIRST, _w0#26 DESC NULLS LAST], false, 0 + +(43) Window +Input [7]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, _w0#26, _w1#27, _w2#28] +Arguments: [rank(_w0#26) windowspecdefinition(_w1#27, _w2#28, _w0#26 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#29], [_w1#27, _w2#28], [_w0#26 DESC NULLS LAST] + +(44) Project [codegen id : 7] +Output [5]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, rank_within_parent#29] +Input [8]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, _w0#26, _w1#27, _w2#28, rank_within_parent#29] + +(45) TakeOrderedAndProject +Input [5]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, rank_within_parent#29] +Arguments: 100, [lochierarchy#25 DESC NULLS LAST, CASE WHEN (lochierarchy#25 = 0) THEN s_state#18 END ASC NULLS FIRST, rank_within_parent#29 ASC NULLS FIRST], [total_sum#24, s_state#18, s_county#19, lochierarchy#25, rank_within_parent#29] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_datafusion/simplified.txt new file mode 100644 index 0000000000..963f843456 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_datafusion/simplified.txt @@ -0,0 +1,61 @@ +TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] + WholeStageCodegen (7) + Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (6) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (5) + HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w0,_w1,_w2,sum] + InputAdapter + Exchange [s_state,s_county,spark_grouping_id] #2 + WholeStageCodegen (4) + HashAggregate [s_state,s_county,spark_grouping_id,ss_net_profit] [sum,sum] + Expand [ss_net_profit,s_state,s_county] + Project [ss_net_profit,s_state,s_county] + BroadcastHashJoin [ss_store_sk,s_store_sk] + ColumnarToRow + InputAdapter + CometProject [ss_store_sk,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + BroadcastHashJoin [s_state,s_state] + ColumnarToRow + InputAdapter + CometFilter [s_store_sk,s_county,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w0,s_state] + WindowGroupLimit [s_state,_w0] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [s_state,_w0] + CometHashAggregate [s_state,_w0,sum,sum(UnscaledValue(ss_net_profit))] + CometExchange [s_state] #6 + CometHashAggregate [s_state,sum,ss_net_profit] + CometProject [ss_net_profit,s_state] + CometBroadcastHashJoin [ss_net_profit,ss_sold_date_sk,s_state,d_date_sk] + CometProject [ss_net_profit,ss_sold_date_sk,s_state] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,s_store_sk,s_state] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk,s_state] #7 + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..cc3bf9c12e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_iceberg_compat/explain.txt @@ -0,0 +1,256 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * Project (44) + +- Window (43) + +- * Sort (42) + +- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Expand (37) + +- * Project (36) + +- * BroadcastHashJoin Inner BuildRight (35) + :- * ColumnarToRow (9) + : +- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.store_sales (1) + : +- CometBroadcastExchange (6) + : +- CometProject (5) + : +- CometFilter (4) + : +- CometScan parquet spark_catalog.default.date_dim (3) + +- BroadcastExchange (34) + +- * BroadcastHashJoin LeftSemi BuildRight (33) + :- * ColumnarToRow (12) + : +- CometFilter (11) + : +- CometScan parquet spark_catalog.default.store (10) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- Window (29) + +- WindowGroupLimit (28) + +- * ColumnarToRow (27) + +- CometSort (26) + +- CometHashAggregate (25) + +- CometExchange (24) + +- CometHashAggregate (23) + +- CometProject (22) + +- CometBroadcastHashJoin (21) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometFilter (14) + : : +- CometScan parquet spark_catalog.default.store_sales (13) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometScan parquet spark_catalog.default.store (15) + +- ReusedExchange (20) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_store_sk#1, ss_net_profit#2], [ss_store_sk#1, ss_net_profit#2] + +(9) ColumnarToRow [codegen id : 4] +Input [2]: [ss_store_sk#1, ss_net_profit#2] + +(10) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#6, s_county#7, s_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(11) CometFilter +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Condition : isnotnull(s_store_sk#6) + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] + +(13) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#11)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(14) CometFilter +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_store_sk#9) + +(15) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#12, s_state#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(16) CometFilter +Input [2]: [s_store_sk#12, s_state#13] +Condition : isnotnull(s_store_sk#12) + +(17) CometBroadcastExchange +Input [2]: [s_store_sk#12, s_state#13] +Arguments: [s_store_sk#12, s_state#13] + +(18) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Right output [2]: [s_store_sk#12, s_state#13] +Arguments: [ss_store_sk#9], [s_store_sk#12], Inner, BuildRight + +(19) CometProject +Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] +Arguments: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13], [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] + +(20) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] +Right output [1]: [d_date_sk#14] +Arguments: [ss_sold_date_sk#11], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] +Arguments: [ss_net_profit#10, s_state#13], [ss_net_profit#10, s_state#13] + +(23) CometHashAggregate +Input [2]: [ss_net_profit#10, s_state#13] +Keys [1]: [s_state#13] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] + +(24) CometExchange +Input [2]: [s_state#13, sum#15] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(25) CometHashAggregate +Input [2]: [s_state#13, sum#15] +Keys [1]: [s_state#13] +Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] + +(26) CometSort +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [s_state#13, _w0#16, s_state#13], [s_state#13 ASC NULLS FIRST, _w0#16 DESC NULLS LAST] + +(27) ColumnarToRow [codegen id : 1] +Input [3]: [s_state#13, _w0#16, s_state#13] + +(28) WindowGroupLimit +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [s_state#13], [_w0#16 DESC NULLS LAST], rank(_w0#16), 5, Final + +(29) Window +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [rank(_w0#16) windowspecdefinition(s_state#13, _w0#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#17], [s_state#13], [_w0#16 DESC NULLS LAST] + +(30) Filter [codegen id : 2] +Input [4]: [s_state#13, _w0#16, s_state#13, ranking#17] +Condition : (ranking#17 <= 5) + +(31) Project [codegen id : 2] +Output [1]: [s_state#13] +Input [4]: [s_state#13, _w0#16, s_state#13, ranking#17] + +(32) BroadcastExchange +Input [1]: [s_state#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] + +(33) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [s_state#8] +Right keys [1]: [s_state#13] +Join type: LeftSemi +Join condition: None + +(34) BroadcastExchange +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(35) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#6] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 4] +Output [3]: [ss_net_profit#2, s_state#8, s_county#7] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] + +(37) Expand [codegen id : 4] +Input [3]: [ss_net_profit#2, s_state#8, s_county#7] +Arguments: [[ss_net_profit#2, s_state#8, s_county#7, 0], [ss_net_profit#2, s_state#8, null, 1], [ss_net_profit#2, null, null, 3]], [ss_net_profit#2, s_state#18, s_county#19, spark_grouping_id#20] + +(38) HashAggregate [codegen id : 4] +Input [4]: [ss_net_profit#2, s_state#18, s_county#19, spark_grouping_id#20] +Keys [3]: [s_state#18, s_county#19, spark_grouping_id#20] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#21] +Results [4]: [s_state#18, s_county#19, spark_grouping_id#20, sum#22] + +(39) Exchange +Input [4]: [s_state#18, s_county#19, spark_grouping_id#20, sum#22] +Arguments: hashpartitioning(s_state#18, s_county#19, spark_grouping_id#20, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 5] +Input [4]: [s_state#18, s_county#19, spark_grouping_id#20, sum#22] +Keys [3]: [s_state#18, s_county#19, spark_grouping_id#20] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#23] +Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#23,17,2) AS total_sum#24, s_state#18, s_county#19, (cast((shiftright(spark_grouping_id#20, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#20, 0) & 1) as tinyint)) AS lochierarchy#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#23,17,2) AS _w0#26, (cast((shiftright(spark_grouping_id#20, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#20, 0) & 1) as tinyint)) AS _w1#27, CASE WHEN (cast((shiftright(spark_grouping_id#20, 0) & 1) as tinyint) = 0) THEN s_state#18 END AS _w2#28] + +(41) Exchange +Input [7]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, _w0#26, _w1#27, _w2#28] +Arguments: hashpartitioning(_w1#27, _w2#28, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(42) Sort [codegen id : 6] +Input [7]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, _w0#26, _w1#27, _w2#28] +Arguments: [_w1#27 ASC NULLS FIRST, _w2#28 ASC NULLS FIRST, _w0#26 DESC NULLS LAST], false, 0 + +(43) Window +Input [7]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, _w0#26, _w1#27, _w2#28] +Arguments: [rank(_w0#26) windowspecdefinition(_w1#27, _w2#28, _w0#26 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#29], [_w1#27, _w2#28], [_w0#26 DESC NULLS LAST] + +(44) Project [codegen id : 7] +Output [5]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, rank_within_parent#29] +Input [8]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, _w0#26, _w1#27, _w2#28, rank_within_parent#29] + +(45) TakeOrderedAndProject +Input [5]: [total_sum#24, s_state#18, s_county#19, lochierarchy#25, rank_within_parent#29] +Arguments: 100, [lochierarchy#25 DESC NULLS LAST, CASE WHEN (lochierarchy#25 = 0) THEN s_state#18 END ASC NULLS FIRST, rank_within_parent#29 ASC NULLS FIRST], [total_sum#24, s_state#18, s_county#19, lochierarchy#25, rank_within_parent#29] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..7caaf68a02 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q70.native_iceberg_compat/simplified.txt @@ -0,0 +1,61 @@ +TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] + WholeStageCodegen (7) + Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (6) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (5) + HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w0,_w1,_w2,sum] + InputAdapter + Exchange [s_state,s_county,spark_grouping_id] #2 + WholeStageCodegen (4) + HashAggregate [s_state,s_county,spark_grouping_id,ss_net_profit] [sum,sum] + Expand [ss_net_profit,s_state,s_county] + Project [ss_net_profit,s_state,s_county] + BroadcastHashJoin [ss_store_sk,s_store_sk] + ColumnarToRow + InputAdapter + CometProject [ss_store_sk,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + BroadcastHashJoin [s_state,s_state] + ColumnarToRow + InputAdapter + CometFilter [s_store_sk,s_county,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w0,s_state] + WindowGroupLimit [s_state,_w0] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [s_state,_w0] + CometHashAggregate [s_state,_w0,sum,sum(UnscaledValue(ss_net_profit))] + CometExchange [s_state] #6 + CometHashAggregate [s_state,sum,ss_net_profit] + CometProject [ss_net_profit,s_state] + CometBroadcastHashJoin [ss_net_profit,ss_sold_date_sk,s_state,d_date_sk] + CometProject [ss_net_profit,ss_sold_date_sk,s_state] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,s_store_sk,s_state] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk,s_state] #7 + CometFilter [s_store_sk,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_state] + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_datafusion/explain.txt new file mode 100644 index 0000000000..6f27297066 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_datafusion/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +* ColumnarToRow (37) ++- CometSort (36) + +- CometColumnarExchange (35) + +- CometHashAggregate (34) + +- CometExchange (33) + +- CometHashAggregate (32) + +- CometProject (31) + +- CometBroadcastHashJoin (30) + :- CometProject (25) + : +- CometBroadcastHashJoin (24) + : :- CometBroadcastExchange (4) + : : +- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : +- CometUnion (23) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometFilter (6) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (5) + : : +- CometBroadcastExchange (10) + : : +- CometProject (9) + : : +- CometFilter (8) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (7) + : :- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (13) + : : +- ReusedExchange (15) + : +- CometProject (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (19) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (18) + : +- ReusedExchange (20) + +- CometBroadcastExchange (29) + +- CometProject (28) + +- CometFilter (27) + +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (26) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Arguments: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(2) CometFilter +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Condition : ((isnotnull(i_manager_id#4) AND (i_manager_id#4 = 1)) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Arguments: [i_item_sk#1, i_brand_id#2, i_brand#3], [i_item_sk#1, i_brand_id#2, i_brand#3] + +(4) CometBroadcastExchange +Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Arguments: [i_item_sk#1, i_brand_id#2, i_brand#3] + +(5) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Arguments: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] + +(6) CometFilter +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Condition : (isnotnull(ws_item_sk#6) AND isnotnull(ws_sold_time_sk#5)) + +(7) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9, d_year#10, d_moy#11] + +(8) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(9) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(10) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(11) CometBroadcastHashJoin +Left output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ws_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(12) CometProject +Input [5]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8, d_date_sk#9] +Arguments: [ext_price#12, sold_item_sk#13, time_sk#14], [ws_ext_sales_price#7 AS ext_price#12, ws_item_sk#6 AS sold_item_sk#13, ws_sold_time_sk#5 AS time_sk#14] + +(13) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Arguments: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] + +(14) CometFilter +Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_sold_time_sk#15)) + +(15) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#19] + +(16) CometBroadcastHashJoin +Left output [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [cs_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(17) CometProject +Input [5]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Arguments: [ext_price#20, sold_item_sk#21, time_sk#22], [cs_ext_sales_price#17 AS ext_price#20, cs_item_sk#16 AS sold_item_sk#21, cs_sold_time_sk#15 AS time_sk#22] + +(18) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Arguments: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] + +(19) CometFilter +Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_time_sk#23)) + +(20) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#27] + +(21) CometBroadcastHashJoin +Left output [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Right output [1]: [d_date_sk#27] +Arguments: [ss_sold_date_sk#26], [d_date_sk#27], Inner, BuildRight + +(22) CometProject +Input [5]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26, d_date_sk#27] +Arguments: [ext_price#28, sold_item_sk#29, time_sk#30], [ss_ext_sales_price#25 AS ext_price#28, ss_item_sk#24 AS sold_item_sk#29, ss_sold_time_sk#23 AS time_sk#30] + +(23) CometUnion +Child 0 Input [3]: [ext_price#12, sold_item_sk#13, time_sk#14] +Child 1 Input [3]: [ext_price#20, sold_item_sk#21, time_sk#22] +Child 2 Input [3]: [ext_price#28, sold_item_sk#29, time_sk#30] + +(24) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Right output [3]: [ext_price#12, sold_item_sk#13, time_sk#14] +Arguments: [i_item_sk#1], [sold_item_sk#13], Inner, BuildLeft + +(25) CometProject +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#12, sold_item_sk#13, time_sk#14] +Arguments: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14], [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14] + +(26) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Arguments: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(27) CometFilter +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Condition : (((t_meal_time#34 = breakfast ) OR (t_meal_time#34 = dinner )) AND isnotnull(t_time_sk#31)) + +(28) CometProject +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Arguments: [t_time_sk#31, t_hour#32, t_minute#33], [t_time_sk#31, t_hour#32, t_minute#33] + +(29) CometBroadcastExchange +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [t_time_sk#31, t_hour#32, t_minute#33] + +(30) CometBroadcastHashJoin +Left output [4]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14] +Right output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [time_sk#14], [t_time_sk#31], Inner, BuildRight + +(31) CometProject +Input [7]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14, t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33], [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] + +(32) CometHashAggregate +Input [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [partial_sum(UnscaledValue(ext_price#12))] + +(33) CometExchange +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#35] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(34) CometHashAggregate +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#35] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [sum(UnscaledValue(ext_price#12))] + +(35) CometColumnarExchange +Input [5]: [brand_id#36, brand#37, t_hour#32, t_minute#33, ext_price#38] +Arguments: rangepartitioning(ext_price#38 DESC NULLS LAST, brand_id#36 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(36) CometSort +Input [5]: [brand_id#36, brand#37, t_hour#32, t_minute#33, ext_price#38] +Arguments: [brand_id#36, brand#37, t_hour#32, t_minute#33, ext_price#38], [ext_price#38 DESC NULLS LAST, brand_id#36 ASC NULLS FIRST] + +(37) ColumnarToRow [codegen id : 1] +Input [5]: [brand_id#36, brand#37, t_hour#32, t_minute#33, ext_price#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_datafusion/simplified.txt new file mode 100644 index 0000000000..98df4c26e6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_datafusion/simplified.txt @@ -0,0 +1,39 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [brand_id,brand,t_hour,t_minute,ext_price] + CometColumnarExchange [ext_price,brand_id] #1 + CometHashAggregate [brand_id,brand,t_hour,t_minute,ext_price,i_brand,i_brand_id,sum,sum(UnscaledValue(ext_price))] + CometExchange [i_brand,i_brand_id,t_hour,t_minute] #2 + CometHashAggregate [i_brand,i_brand_id,t_hour,t_minute,sum,ext_price] + CometProject [i_brand_id,i_brand,ext_price,t_hour,t_minute] + CometBroadcastHashJoin [i_brand_id,i_brand,ext_price,time_sk,t_time_sk,t_hour,t_minute] + CometProject [i_brand_id,i_brand,ext_price,time_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_brand,ext_price,sold_item_sk,time_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometUnion [ext_price,sold_item_sk,time_sk] + CometProject [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] [ext_price,sold_item_sk,time_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk,d_date_sk] + CometFilter [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometProject [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] [ext_price,sold_item_sk,time_sk] + CometBroadcastHashJoin [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #4 + CometProject [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] [ext_price,sold_item_sk,time_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #4 + CometBroadcastExchange [t_time_sk,t_hour,t_minute] #5 + CometProject [t_time_sk,t_hour,t_minute] + CometFilter [t_time_sk,t_hour,t_minute,t_meal_time] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute,t_meal_time] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..0b9da7af1f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_iceberg_compat/explain.txt @@ -0,0 +1,214 @@ +== Physical Plan == +* ColumnarToRow (37) ++- CometSort (36) + +- CometColumnarExchange (35) + +- CometHashAggregate (34) + +- CometExchange (33) + +- CometHashAggregate (32) + +- CometProject (31) + +- CometBroadcastHashJoin (30) + :- CometProject (25) + : +- CometBroadcastHashJoin (24) + : :- CometBroadcastExchange (4) + : : +- CometProject (3) + : : +- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.item (1) + : +- CometUnion (23) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometFilter (6) + : : : +- CometScan parquet spark_catalog.default.web_sales (5) + : : +- CometBroadcastExchange (10) + : : +- CometProject (9) + : : +- CometFilter (8) + : : +- CometScan parquet spark_catalog.default.date_dim (7) + : :- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometFilter (14) + : : : +- CometScan parquet spark_catalog.default.catalog_sales (13) + : : +- ReusedExchange (15) + : +- CometProject (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (19) + : : +- CometScan parquet spark_catalog.default.store_sales (18) + : +- ReusedExchange (20) + +- CometBroadcastExchange (29) + +- CometProject (28) + +- CometFilter (27) + +- CometScan parquet spark_catalog.default.time_dim (26) + + +(1) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Condition : ((isnotnull(i_manager_id#4) AND (i_manager_id#4 = 1)) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Arguments: [i_item_sk#1, i_brand_id#2, i_brand#3], [i_item_sk#1, i_brand_id#2, i_brand#3] + +(4) CometBroadcastExchange +Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Arguments: [i_item_sk#1, i_brand_id#2, i_brand#3] + +(5) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#8)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)] +ReadSchema: struct + +(6) CometFilter +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Condition : (isnotnull(ws_item_sk#6) AND isnotnull(ws_sold_time_sk#5)) + +(7) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(9) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(10) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(11) CometBroadcastHashJoin +Left output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ws_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(12) CometProject +Input [5]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8, d_date_sk#9] +Arguments: [ext_price#12, sold_item_sk#13, time_sk#14], [ws_ext_sales_price#7 AS ext_price#12, ws_item_sk#6 AS sold_item_sk#13, ws_sold_time_sk#5 AS time_sk#14] + +(13) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#18)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)] +ReadSchema: struct + +(14) CometFilter +Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_sold_time_sk#15)) + +(15) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#19] + +(16) CometBroadcastHashJoin +Left output [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [cs_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(17) CometProject +Input [5]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Arguments: [ext_price#20, sold_item_sk#21, time_sk#22], [cs_ext_sales_price#17 AS ext_price#20, cs_item_sk#16 AS sold_item_sk#21, cs_sold_time_sk#15 AS time_sk#22] + +(18) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#26)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)] +ReadSchema: struct + +(19) CometFilter +Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_time_sk#23)) + +(20) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#27] + +(21) CometBroadcastHashJoin +Left output [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Right output [1]: [d_date_sk#27] +Arguments: [ss_sold_date_sk#26], [d_date_sk#27], Inner, BuildRight + +(22) CometProject +Input [5]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26, d_date_sk#27] +Arguments: [ext_price#28, sold_item_sk#29, time_sk#30], [ss_ext_sales_price#25 AS ext_price#28, ss_item_sk#24 AS sold_item_sk#29, ss_sold_time_sk#23 AS time_sk#30] + +(23) CometUnion +Child 0 Input [3]: [ext_price#12, sold_item_sk#13, time_sk#14] +Child 1 Input [3]: [ext_price#20, sold_item_sk#21, time_sk#22] +Child 2 Input [3]: [ext_price#28, sold_item_sk#29, time_sk#30] + +(24) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Right output [3]: [ext_price#12, sold_item_sk#13, time_sk#14] +Arguments: [i_item_sk#1], [sold_item_sk#13], Inner, BuildLeft + +(25) CometProject +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#12, sold_item_sk#13, time_sk#14] +Arguments: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14], [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14] + +(26) CometScan parquet spark_catalog.default.time_dim +Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [Or(EqualTo(t_meal_time,breakfast ),EqualTo(t_meal_time,dinner )), IsNotNull(t_time_sk)] +ReadSchema: struct + +(27) CometFilter +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Condition : (((t_meal_time#34 = breakfast ) OR (t_meal_time#34 = dinner )) AND isnotnull(t_time_sk#31)) + +(28) CometProject +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Arguments: [t_time_sk#31, t_hour#32, t_minute#33], [t_time_sk#31, t_hour#32, t_minute#33] + +(29) CometBroadcastExchange +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [t_time_sk#31, t_hour#32, t_minute#33] + +(30) CometBroadcastHashJoin +Left output [4]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14] +Right output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [time_sk#14], [t_time_sk#31], Inner, BuildRight + +(31) CometProject +Input [7]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14, t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33], [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] + +(32) CometHashAggregate +Input [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [partial_sum(UnscaledValue(ext_price#12))] + +(33) CometExchange +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#35] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(34) CometHashAggregate +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#35] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [sum(UnscaledValue(ext_price#12))] + +(35) CometColumnarExchange +Input [5]: [brand_id#36, brand#37, t_hour#32, t_minute#33, ext_price#38] +Arguments: rangepartitioning(ext_price#38 DESC NULLS LAST, brand_id#36 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(36) CometSort +Input [5]: [brand_id#36, brand#37, t_hour#32, t_minute#33, ext_price#38] +Arguments: [brand_id#36, brand#37, t_hour#32, t_minute#33, ext_price#38], [ext_price#38 DESC NULLS LAST, brand_id#36 ASC NULLS FIRST] + +(37) ColumnarToRow [codegen id : 1] +Input [5]: [brand_id#36, brand#37, t_hour#32, t_minute#33, ext_price#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..d8d6d148b1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q71.native_iceberg_compat/simplified.txt @@ -0,0 +1,39 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [brand_id,brand,t_hour,t_minute,ext_price] + CometColumnarExchange [ext_price,brand_id] #1 + CometHashAggregate [brand_id,brand,t_hour,t_minute,ext_price,i_brand,i_brand_id,sum,sum(UnscaledValue(ext_price))] + CometExchange [i_brand,i_brand_id,t_hour,t_minute] #2 + CometHashAggregate [i_brand,i_brand_id,t_hour,t_minute,sum,ext_price] + CometProject [i_brand_id,i_brand,ext_price,t_hour,t_minute] + CometBroadcastHashJoin [i_brand_id,i_brand,ext_price,time_sk,t_time_sk,t_hour,t_minute] + CometProject [i_brand_id,i_brand,ext_price,time_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_brand,ext_price,sold_item_sk,time_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometUnion [ext_price,sold_item_sk,time_sk] + CometProject [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] [ext_price,sold_item_sk,time_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk,d_date_sk] + CometFilter [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometProject [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] [ext_price,sold_item_sk,time_sk] + CometBroadcastHashJoin [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #4 + CometProject [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] [ext_price,sold_item_sk,time_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #4 + CometBroadcastExchange [t_time_sk,t_hour,t_minute] #5 + CometProject [t_time_sk,t_hour,t_minute] + CometFilter [t_time_sk,t_hour,t_minute,t_meal_time] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_datafusion/explain.txt new file mode 100644 index 0000000000..2ef4c4fa34 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_datafusion/explain.txt @@ -0,0 +1,323 @@ +== Physical Plan == +* ColumnarToRow (62) ++- CometTakeOrderedAndProject (61) + +- CometHashAggregate (60) + +- CometExchange (59) + +- CometHashAggregate (58) + +- CometProject (57) + +- CometSortMergeJoin (56) + :- CometSort (50) + : +- CometExchange (49) + : +- CometProject (48) + : +- CometBroadcastHashJoin (47) + : :- CometProject (43) + : : +- CometBroadcastHashJoin (42) + : : :- CometProject (38) + : : : +- CometBroadcastHashJoin (37) + : : : :- CometProject (33) + : : : : +- CometBroadcastHashJoin (32) + : : : : :- CometProject (27) + : : : : : +- CometBroadcastHashJoin (26) + : : : : : :- CometProject (21) + : : : : : : +- CometBroadcastHashJoin (20) + : : : : : : :- CometProject (15) + : : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : : :- CometProject (12) + : : : : : : : : +- CometBroadcastHashJoin (11) + : : : : : : : : :- CometProject (7) + : : : : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : : : : :- CometFilter (2) + : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : : : : : : +- CometBroadcastExchange (5) + : : : : : : : : : +- CometFilter (4) + : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (3) + : : : : : : : : +- CometBroadcastExchange (10) + : : : : : : : : +- CometFilter (9) + : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (8) + : : : : : : : +- ReusedExchange (13) + : : : : : : +- CometBroadcastExchange (19) + : : : : : : +- CometProject (18) + : : : : : : +- CometFilter (17) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (16) + : : : : : +- CometBroadcastExchange (25) + : : : : : +- CometProject (24) + : : : : : +- CometFilter (23) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (22) + : : : : +- CometBroadcastExchange (31) + : : : : +- CometProject (30) + : : : : +- CometFilter (29) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (28) + : : : +- CometBroadcastExchange (36) + : : : +- CometFilter (35) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (34) + : : +- CometBroadcastExchange (41) + : : +- CometFilter (40) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (39) + : +- CometBroadcastExchange (46) + : +- CometFilter (45) + : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (44) + +- CometSort (55) + +- CometExchange (54) + +- CometProject (53) + +- CometFilter (52) + +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (51) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] + +(2) CometFilter +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(4) CometFilter +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Condition : ((isnotnull(inv_quantity_on_hand#11) AND isnotnull(inv_item_sk#9)) AND isnotnull(inv_warehouse_sk#10)) + +(5) CometBroadcastExchange +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(6) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Right output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [cs_item_sk#4], [inv_item_sk#9], Inner, (inv_quantity_on_hand#11 < cs_quantity#7), BuildRight + +(7) CometProject +Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] + +(8) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [w_warehouse_sk#13, w_warehouse_name#14] + +(9) CometFilter +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Condition : isnotnull(w_warehouse_sk#13) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [w_warehouse_sk#13, w_warehouse_name#14] + +(11) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] +Right output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [inv_warehouse_sk#10], [w_warehouse_sk#13], Inner, BuildRight + +(12) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12, w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] + +(13) ReusedExchange [Reuses operator id: 10] +Output [2]: [i_item_sk#15, i_item_desc#16] + +(14) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] +Right output [2]: [i_item_sk#15, i_item_desc#16] +Arguments: [cs_item_sk#4], [i_item_sk#15], Inner, BuildRight + +(15) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_sk#15, i_item_desc#16] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(16) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [2]: [cd_demo_sk#17, cd_marital_status#18] +Arguments: [cd_demo_sk#17, cd_marital_status#18] + +(17) CometFilter +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Condition : ((isnotnull(cd_marital_status#18) AND (cd_marital_status#18 = D)) AND isnotnull(cd_demo_sk#17)) + +(18) CometProject +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Arguments: [cd_demo_sk#17], [cd_demo_sk#17] + +(19) CometBroadcastExchange +Input [1]: [cd_demo_sk#17] +Arguments: [cd_demo_sk#17] + +(20) CometBroadcastHashJoin +Left output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [1]: [cd_demo_sk#17] +Arguments: [cs_bill_cdemo_sk#2], [cd_demo_sk#17], Inner, BuildRight + +(21) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, cd_demo_sk#17] +Arguments: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(22) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#19, hd_buy_potential#20] +Arguments: [hd_demo_sk#19, hd_buy_potential#20] + +(23) CometFilter +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Condition : ((isnotnull(hd_buy_potential#20) AND (hd_buy_potential#20 = >10000 )) AND isnotnull(hd_demo_sk#19)) + +(24) CometProject +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Arguments: [hd_demo_sk#19], [hd_demo_sk#19] + +(25) CometBroadcastExchange +Input [1]: [hd_demo_sk#19] +Arguments: [hd_demo_sk#19] + +(26) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [1]: [hd_demo_sk#19] +Arguments: [cs_bill_hdemo_sk#3], [hd_demo_sk#19], Inner, BuildRight + +(27) CometProject +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, hd_demo_sk#19] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(28) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(29) CometFilter +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Condition : ((((isnotnull(d_year#24) AND (d_year#24 = 1999)) AND isnotnull(d_date_sk#21)) AND isnotnull(d_week_seq#23)) AND isnotnull(d_date#22)) + +(30) CometProject +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23], [d_date_sk#21, d_date#22, d_week_seq#23] + +(31) CometBroadcastExchange +Input [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23] + +(32) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [cs_sold_date_sk#8], [d_date_sk#21], Inner, BuildRight + +(33) CometProject +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] + +(34) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_date_sk#25, d_week_seq#26] + +(35) CometFilter +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) + +(36) CometBroadcastExchange +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_date_sk#25, d_week_seq#26] + +(37) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Right output [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_week_seq#23, inv_date_sk#12], [d_week_seq#26, d_date_sk#25], Inner, BuildRight + +(38) CometProject +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#25, d_week_seq#26] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] + +(39) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#27, d_date#28] +Arguments: [d_date_sk#27, d_date#28] + +(40) CometFilter +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) + +(41) CometBroadcastExchange +Input [2]: [d_date_sk#27, d_date#28] +Arguments: [d_date_sk#27, d_date#28] + +(42) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Right output [2]: [d_date_sk#27, d_date#28] +Arguments: [cs_ship_date_sk#1], [d_date_sk#27], Inner, (d_date#28 > date_add(d_date#22, 5)), BuildRight + +(43) CometProject +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#27, d_date#28] +Arguments: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(44) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [1]: [p_promo_sk#29] +Arguments: [p_promo_sk#29] + +(45) CometFilter +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) + +(46) CometBroadcastExchange +Input [1]: [p_promo_sk#29] +Arguments: [p_promo_sk#29] + +(47) CometBroadcastHashJoin +Left output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Right output [1]: [p_promo_sk#29] +Arguments: [cs_promo_sk#5], [p_promo_sk#29], LeftOuter, BuildRight + +(48) CometProject +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, p_promo_sk#29] +Arguments: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(49) CometExchange +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(50) CometSort +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST] + +(51) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Arguments: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(52) CometFilter +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) + +(53) CometProject +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Arguments: [cr_item_sk#30, cr_order_number#31], [cr_item_sk#30, cr_order_number#31] + +(54) CometExchange +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(55) CometSort +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30, cr_order_number#31], [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST] + +(56) CometSortMergeJoin +Left output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Right output [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cs_item_sk#4, cs_order_number#6], [cr_item_sk#30, cr_order_number#31], LeftOuter + +(57) CometProject +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, cr_item_sk#30, cr_order_number#31] +Arguments: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(58) CometHashAggregate +Input [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [partial_count(1)] + +(59) CometExchange +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#33] +Arguments: hashpartitioning(i_item_desc#16, w_warehouse_name#14, d_week_seq#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(60) CometHashAggregate +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#33] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [count(1)] + +(61) CometTakeOrderedAndProject +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[total_cnt#36 DESC NULLS LAST,i_item_desc#16 ASC NULLS FIRST,w_warehouse_name#14 ASC NULLS FIRST,d_week_seq#23 ASC NULLS FIRST], output=[i_item_desc#16,w_warehouse_name#14,d_week_seq#23,no_promo#34,promo#35,total_cnt#36]), [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36], 100, [total_cnt#36 DESC NULLS LAST, i_item_desc#16 ASC NULLS FIRST, w_warehouse_name#14 ASC NULLS FIRST, d_week_seq#23 ASC NULLS FIRST], [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] + +(62) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_datafusion/simplified.txt new file mode 100644 index 0000000000..e70defef63 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_datafusion/simplified.txt @@ -0,0 +1,64 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo,total_cnt] + CometHashAggregate [i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo,total_cnt,count,count(1)] + CometExchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + CometHashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] + CometProject [w_warehouse_name,i_item_desc,d_week_seq] + CometSortMergeJoin [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq,cr_item_sk,cr_order_number] + CometSort [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometExchange [cs_item_sk,cs_order_number] #2 + CometProject [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq,p_promo_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq,d_date_sk,d_date] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq,d_date_sk,d_week_seq] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,d_date_sk,d_date,d_week_seq] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,hd_demo_sk] + CometProject [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,cd_demo_sk] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_sk,i_item_desc] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk,w_warehouse_sk,w_warehouse_name] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometFilter [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] #3 + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + ReusedExchange [i_item_sk,i_item_desc] #4 + CometBroadcastExchange [cd_demo_sk] #5 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_marital_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status] + CometBroadcastExchange [hd_demo_sk] #6 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_buy_potential] + CometBroadcastExchange [d_date_sk,d_date,d_week_seq] #7 + CometProject [d_date_sk,d_date,d_week_seq] + CometFilter [d_date_sk,d_date,d_week_seq,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_week_seq,d_year] + CometBroadcastExchange [d_date_sk,d_week_seq] #8 + CometFilter [d_date_sk,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq] + CometBroadcastExchange [d_date_sk,d_date] #9 + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [p_promo_sk] #10 + CometFilter [p_promo_sk] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk] + CometSort [cr_item_sk,cr_order_number] + CometExchange [cr_item_sk,cr_order_number] #11 + CometProject [cr_item_sk,cr_order_number] + CometFilter [cr_item_sk,cr_order_number,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..c413105439 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_iceberg_compat/explain.txt @@ -0,0 +1,369 @@ +== Physical Plan == +* ColumnarToRow (64) ++- CometTakeOrderedAndProject (63) + +- CometHashAggregate (62) + +- CometExchange (61) + +- CometHashAggregate (60) + +- CometProject (59) + +- CometSortMergeJoin (58) + :- CometSort (52) + : +- CometExchange (51) + : +- CometProject (50) + : +- CometBroadcastHashJoin (49) + : :- CometProject (45) + : : +- CometBroadcastHashJoin (44) + : : :- CometProject (40) + : : : +- CometBroadcastHashJoin (39) + : : : :- CometProject (35) + : : : : +- CometBroadcastHashJoin (34) + : : : : :- CometProject (29) + : : : : : +- CometBroadcastHashJoin (28) + : : : : : :- CometProject (23) + : : : : : : +- CometBroadcastHashJoin (22) + : : : : : : :- CometProject (17) + : : : : : : : +- CometBroadcastHashJoin (16) + : : : : : : : :- CometProject (12) + : : : : : : : : +- CometBroadcastHashJoin (11) + : : : : : : : : :- CometProject (7) + : : : : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : : : : :- CometFilter (2) + : : : : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : : : : : : : +- CometBroadcastExchange (5) + : : : : : : : : : +- CometFilter (4) + : : : : : : : : : +- CometScan parquet spark_catalog.default.inventory (3) + : : : : : : : : +- CometBroadcastExchange (10) + : : : : : : : : +- CometFilter (9) + : : : : : : : : +- CometScan parquet spark_catalog.default.warehouse (8) + : : : : : : : +- CometBroadcastExchange (15) + : : : : : : : +- CometFilter (14) + : : : : : : : +- CometScan parquet spark_catalog.default.item (13) + : : : : : : +- CometBroadcastExchange (21) + : : : : : : +- CometProject (20) + : : : : : : +- CometFilter (19) + : : : : : : +- CometScan parquet spark_catalog.default.customer_demographics (18) + : : : : : +- CometBroadcastExchange (27) + : : : : : +- CometProject (26) + : : : : : +- CometFilter (25) + : : : : : +- CometScan parquet spark_catalog.default.household_demographics (24) + : : : : +- CometBroadcastExchange (33) + : : : : +- CometProject (32) + : : : : +- CometFilter (31) + : : : : +- CometScan parquet spark_catalog.default.date_dim (30) + : : : +- CometBroadcastExchange (38) + : : : +- CometFilter (37) + : : : +- CometScan parquet spark_catalog.default.date_dim (36) + : : +- CometBroadcastExchange (43) + : : +- CometFilter (42) + : : +- CometScan parquet spark_catalog.default.date_dim (41) + : +- CometBroadcastExchange (48) + : +- CometFilter (47) + : +- CometScan parquet spark_catalog.default.promotion (46) + +- CometSort (57) + +- CometExchange (56) + +- CometProject (55) + +- CometFilter (54) + +- CometScan parquet spark_catalog.default.catalog_returns (53) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1)) + +(3) CometScan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#12)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Condition : ((isnotnull(inv_quantity_on_hand#11) AND isnotnull(inv_item_sk#9)) AND isnotnull(inv_warehouse_sk#10)) + +(5) CometBroadcastExchange +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(6) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Right output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [cs_item_sk#4], [inv_item_sk#9], Inner, (inv_quantity_on_hand#11 < cs_quantity#7), BuildRight + +(7) CometProject +Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] + +(8) CometScan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Condition : isnotnull(w_warehouse_sk#13) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [w_warehouse_sk#13, w_warehouse_name#14] + +(11) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] +Right output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [inv_warehouse_sk#10], [w_warehouse_sk#13], Inner, BuildRight + +(12) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12, w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] + +(13) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_desc#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [i_item_sk#15, i_item_desc#16] +Condition : isnotnull(i_item_sk#15) + +(15) CometBroadcastExchange +Input [2]: [i_item_sk#15, i_item_desc#16] +Arguments: [i_item_sk#15, i_item_desc#16] + +(16) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] +Right output [2]: [i_item_sk#15, i_item_desc#16] +Arguments: [cs_item_sk#4], [i_item_sk#15], Inner, BuildRight + +(17) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_sk#15, i_item_desc#16] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(18) CometScan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#17, cd_marital_status#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(19) CometFilter +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Condition : ((isnotnull(cd_marital_status#18) AND (cd_marital_status#18 = D)) AND isnotnull(cd_demo_sk#17)) + +(20) CometProject +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Arguments: [cd_demo_sk#17], [cd_demo_sk#17] + +(21) CometBroadcastExchange +Input [1]: [cd_demo_sk#17] +Arguments: [cd_demo_sk#17] + +(22) CometBroadcastHashJoin +Left output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [1]: [cd_demo_sk#17] +Arguments: [cs_bill_cdemo_sk#2], [cd_demo_sk#17], Inner, BuildRight + +(23) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, cd_demo_sk#17] +Arguments: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(24) CometScan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#19, hd_buy_potential#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000 ), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(25) CometFilter +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Condition : ((isnotnull(hd_buy_potential#20) AND (hd_buy_potential#20 = >10000 )) AND isnotnull(hd_demo_sk#19)) + +(26) CometProject +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Arguments: [hd_demo_sk#19], [hd_demo_sk#19] + +(27) CometBroadcastExchange +Input [1]: [hd_demo_sk#19] +Arguments: [hd_demo_sk#19] + +(28) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [1]: [hd_demo_sk#19] +Arguments: [cs_bill_hdemo_sk#3], [hd_demo_sk#19], Inner, BuildRight + +(29) CometProject +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, hd_demo_sk#19] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(30) CometScan parquet spark_catalog.default.date_dim +Output [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(31) CometFilter +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Condition : ((((isnotnull(d_year#24) AND (d_year#24 = 1999)) AND isnotnull(d_date_sk#21)) AND isnotnull(d_week_seq#23)) AND isnotnull(d_date#22)) + +(32) CometProject +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23], [d_date_sk#21, d_date#22, d_week_seq#23] + +(33) CometBroadcastExchange +Input [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23] + +(34) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [cs_sold_date_sk#8], [d_date_sk#21], Inner, BuildRight + +(35) CometProject +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] + +(36) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#25, d_week_seq#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(37) CometFilter +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) + +(38) CometBroadcastExchange +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_date_sk#25, d_week_seq#26] + +(39) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Right output [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_week_seq#23, inv_date_sk#12], [d_week_seq#26, d_date_sk#25], Inner, BuildRight + +(40) CometProject +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#25, d_week_seq#26] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] + +(41) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#27, d_date#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] +ReadSchema: struct + +(42) CometFilter +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) + +(43) CometBroadcastExchange +Input [2]: [d_date_sk#27, d_date#28] +Arguments: [d_date_sk#27, d_date#28] + +(44) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Right output [2]: [d_date_sk#27, d_date#28] +Arguments: [cs_ship_date_sk#1], [d_date_sk#27], Inner, (d_date#28 > date_add(d_date#22, 5)), BuildRight + +(45) CometProject +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#27, d_date#28] +Arguments: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(46) CometScan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(47) CometFilter +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) + +(48) CometBroadcastExchange +Input [1]: [p_promo_sk#29] +Arguments: [p_promo_sk#29] + +(49) CometBroadcastHashJoin +Left output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Right output [1]: [p_promo_sk#29] +Arguments: [cs_promo_sk#5], [p_promo_sk#29], LeftOuter, BuildRight + +(50) CometProject +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, p_promo_sk#29] +Arguments: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(51) CometExchange +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(52) CometSort +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST] + +(53) CometScan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(54) CometFilter +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) + +(55) CometProject +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Arguments: [cr_item_sk#30, cr_order_number#31], [cr_item_sk#30, cr_order_number#31] + +(56) CometExchange +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(57) CometSort +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30, cr_order_number#31], [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST] + +(58) CometSortMergeJoin +Left output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Right output [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cs_item_sk#4, cs_order_number#6], [cr_item_sk#30, cr_order_number#31], LeftOuter + +(59) CometProject +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, cr_item_sk#30, cr_order_number#31] +Arguments: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(60) CometHashAggregate +Input [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [partial_count(1)] + +(61) CometExchange +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#33] +Arguments: hashpartitioning(i_item_desc#16, w_warehouse_name#14, d_week_seq#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(62) CometHashAggregate +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#33] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [count(1)] + +(63) CometTakeOrderedAndProject +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[total_cnt#36 DESC NULLS LAST,i_item_desc#16 ASC NULLS FIRST,w_warehouse_name#14 ASC NULLS FIRST,d_week_seq#23 ASC NULLS FIRST], output=[i_item_desc#16,w_warehouse_name#14,d_week_seq#23,no_promo#34,promo#35,total_cnt#36]), [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36], 100, [total_cnt#36 DESC NULLS LAST, i_item_desc#16 ASC NULLS FIRST, w_warehouse_name#14 ASC NULLS FIRST, d_week_seq#23 ASC NULLS FIRST], [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] + +(64) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..59c8e37099 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q72.native_iceberg_compat/simplified.txt @@ -0,0 +1,66 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo,total_cnt] + CometHashAggregate [i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo,total_cnt,count,count(1)] + CometExchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + CometHashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] + CometProject [w_warehouse_name,i_item_desc,d_week_seq] + CometSortMergeJoin [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq,cr_item_sk,cr_order_number] + CometSort [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometExchange [cs_item_sk,cs_order_number] #2 + CometProject [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq,p_promo_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq,d_date_sk,d_date] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq,d_date_sk,d_week_seq] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,d_date_sk,d_date,d_week_seq] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,hd_demo_sk] + CometProject [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,cd_demo_sk] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_sk,i_item_desc] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk,w_warehouse_sk,w_warehouse_name] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometFilter [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] #3 + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [i_item_sk,i_item_desc] #5 + CometFilter [i_item_sk,i_item_desc] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + CometBroadcastExchange [cd_demo_sk] #6 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_marital_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + CometBroadcastExchange [hd_demo_sk] #7 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential] + CometBroadcastExchange [d_date_sk,d_date,d_week_seq] #8 + CometProject [d_date_sk,d_date,d_week_seq] + CometFilter [d_date_sk,d_date,d_week_seq,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + CometBroadcastExchange [d_date_sk,d_week_seq] #9 + CometFilter [d_date_sk,d_week_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + CometBroadcastExchange [d_date_sk,d_date] #10 + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [p_promo_sk] #11 + CometFilter [p_promo_sk] + CometScan parquet spark_catalog.default.promotion [p_promo_sk] + CometSort [cr_item_sk,cr_order_number] + CometExchange [cr_item_sk,cr_order_number] #12 + CometProject [cr_item_sk,cr_order_number] + CometFilter [cr_item_sk,cr_order_number,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_datafusion/explain.txt new file mode 100644 index 0000000000..13f2c1319f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_datafusion/explain.txt @@ -0,0 +1,168 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometSort (31) + +- CometColumnarExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (24) + : +- CometHashAggregate (23) + : +- CometExchange (22) + : +- CometHashAggregate (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6, d_year#7, d_dom#8] + +(4) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : ((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9, s_county#10] + +(10) CometFilter +Input [2]: [s_store_sk#9, s_county#10] +Condition : (s_county#10 IN (Williamson County,Franklin Parish,Bronx County,Orange County) AND isnotnull(s_store_sk#9)) + +(11) CometProject +Input [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9], [s_store_sk#9] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: [s_store_sk#9] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Right output [1]: [s_store_sk#9] +Arguments: [ss_store_sk#3], [s_store_sk#9], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(16) CometFilter +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(hd_dep_count#13 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(hd_vehicle_count#14 as double)))))) > 1.0) END) AND isnotnull(hd_demo_sk#11)) + +(17) CometProject +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11], [hd_demo_sk#11] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: [hd_demo_sk#11] + +(19) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Right output [1]: [hd_demo_sk#11] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#11], Inner, BuildRight + +(20) CometProject +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Arguments: [ss_customer_sk#1, ss_ticket_number#4], [ss_customer_sk#1, ss_ticket_number#4] + +(21) CometHashAggregate +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] + +(22) CometExchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] + +(24) CometFilter +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Condition : ((cnt#16 >= 1) AND (cnt#16 <= 5)) + +(25) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(26) CometFilter +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Condition : isnotnull(c_customer_sk#17) + +(27) CometBroadcastExchange +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(28) CometBroadcastHashJoin +Left output [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Right output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [ss_customer_sk#1], [c_customer_sk#17], Inner, BuildRight + +(29) CometProject +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16, c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + +(30) CometColumnarExchange +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: rangepartitioning(cnt#16 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(31) CometSort +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [cnt#16 DESC NULLS LAST] + +(32) ColumnarToRow [codegen id : 1] +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_datafusion/simplified.txt new file mode 100644 index 0000000000..35a931bfdf --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometColumnarExchange [cnt] #1 + CometProject [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,cnt,c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometFilter [ss_ticket_number,ss_customer_sk,cnt] + CometHashAggregate [ss_ticket_number,ss_customer_sk,cnt,count,count(1)] + CometExchange [ss_ticket_number,ss_customer_sk] #2 + CometHashAggregate [ss_ticket_number,ss_customer_sk,count] + CometProject [ss_customer_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_county] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_county] + CometBroadcastExchange [hd_demo_sk] #5 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] #6 + CometFilter [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..da8c661251 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_iceberg_compat/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometSort (31) + +- CometColumnarExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (24) + : +- CometHashAggregate (23) + : +- CometExchange (22) + : +- CometHashAggregate (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.store (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometScan parquet spark_catalog.default.household_demographics (15) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometScan parquet spark_catalog.default.customer (25) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : ((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] + +(9) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#9, s_county#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_county, [Bronx County,Franklin Parish,Orange County,Williamson County]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [s_store_sk#9, s_county#10] +Condition : (s_county#10 IN (Williamson County,Franklin Parish,Bronx County,Orange County) AND isnotnull(s_store_sk#9)) + +(11) CometProject +Input [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9], [s_store_sk#9] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: [s_store_sk#9] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Right output [1]: [s_store_sk#9] +Arguments: [ss_store_sk#3], [s_store_sk#9], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] + +(15) CometScan parquet spark_catalog.default.household_demographics +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(16) CometFilter +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(hd_dep_count#13 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(hd_vehicle_count#14 as double)))))) > 1.0) END) AND isnotnull(hd_demo_sk#11)) + +(17) CometProject +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11], [hd_demo_sk#11] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: [hd_demo_sk#11] + +(19) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Right output [1]: [hd_demo_sk#11] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#11], Inner, BuildRight + +(20) CometProject +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Arguments: [ss_customer_sk#1, ss_ticket_number#4], [ss_customer_sk#1, ss_ticket_number#4] + +(21) CometHashAggregate +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] + +(22) CometExchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] + +(24) CometFilter +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Condition : ((cnt#16 >= 1) AND (cnt#16 <= 5)) + +(25) CometScan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(26) CometFilter +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Condition : isnotnull(c_customer_sk#17) + +(27) CometBroadcastExchange +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(28) CometBroadcastHashJoin +Left output [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Right output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [ss_customer_sk#1], [c_customer_sk#17], Inner, BuildRight + +(29) CometProject +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16, c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + +(30) CometColumnarExchange +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: rangepartitioning(cnt#16 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(31) CometSort +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [cnt#16 DESC NULLS LAST] + +(32) ColumnarToRow [codegen id : 1] +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..72ee19a917 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q73.native_iceberg_compat/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometColumnarExchange [cnt] #1 + CometProject [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,cnt,c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometFilter [ss_ticket_number,ss_customer_sk,cnt] + CometHashAggregate [ss_ticket_number,ss_customer_sk,cnt,count,count(1)] + CometExchange [ss_ticket_number,ss_customer_sk] #2 + CometHashAggregate [ss_ticket_number,ss_customer_sk,count] + CometProject [ss_customer_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_county] + CometScan parquet spark_catalog.default.store [s_store_sk,s_county] + CometBroadcastExchange [hd_demo_sk] #5 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] #6 + CometFilter [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_datafusion/explain.txt new file mode 100644 index 0000000000..f2a45c5e59 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_datafusion/explain.txt @@ -0,0 +1,232 @@ +== Physical Plan == +* ColumnarToRow (44) ++- CometTakeOrderedAndProject (43) + +- CometProject (42) + +- CometBroadcastHashJoin (41) + :- CometProject (37) + : +- CometBroadcastHashJoin (36) + : :- CometBroadcastHashJoin (31) + : : :- CometFilter (16) + : : : +- CometHashAggregate (15) + : : : +- CometExchange (14) + : : : +- CometHashAggregate (13) + : : : +- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (30) + : : +- CometHashAggregate (29) + : : +- CometExchange (28) + : : +- CometHashAggregate (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometProject (21) + : : : +- CometBroadcastHashJoin (20) + : : : :- CometFilter (18) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (17) + : : : +- ReusedExchange (19) + : : +- CometBroadcastExchange (24) + : : +- CometFilter (23) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (22) + : +- CometBroadcastExchange (35) + : +- CometFilter (34) + : +- CometHashAggregate (33) + : +- ReusedExchange (32) + +- CometBroadcastExchange (40) + +- CometHashAggregate (39) + +- ReusedExchange (38) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Arguments: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(2) CometFilter +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(4) CometFilter +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_customer_sk#5) + +(5) CometBroadcastExchange +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Right output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [c_customer_sk#1], [ss_customer_sk#5], Inner, BuildRight + +(7) CometProject +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7], [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#8, d_year#9] +Arguments: [d_date_sk#8, d_year#9] + +(9) CometFilter +Input [2]: [d_date_sk#8, d_year#9] +Condition : (((isnotnull(d_year#9) AND (d_year#9 = 2001)) AND d_year#9 IN (2001,2002)) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_year#9] +Arguments: [d_date_sk#8, d_year#9] + +(11) CometBroadcastHashJoin +Left output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_year#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9], [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] + +(13) CometHashAggregate +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))] + +(14) CometExchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#10] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [sum(UnscaledValue(ss_net_paid#6))] + +(16) CometFilter +Input [2]: [customer_id#11, year_total#12] +Condition : (isnotnull(year_total#12) AND (year_total#12 > 0.00)) + +(17) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Arguments: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] + +(18) CometFilter +Input [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Condition : (isnotnull(c_customer_sk#13) AND isnotnull(c_customer_id#14)) + +(19) ReusedExchange [Reuses operator id: 5] +Output [3]: [ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] + +(20) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Right output [3]: [ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] +Arguments: [c_customer_sk#13], [ss_customer_sk#17], Inner, BuildRight + +(21) CometProject +Input [7]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16, ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] +Arguments: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19], [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19] + +(22) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#20, d_year#21] +Arguments: [d_date_sk#20, d_year#21] + +(23) CometFilter +Input [2]: [d_date_sk#20, d_year#21] +Condition : (((isnotnull(d_year#21) AND (d_year#21 = 2002)) AND d_year#21 IN (2001,2002)) AND isnotnull(d_date_sk#20)) + +(24) CometBroadcastExchange +Input [2]: [d_date_sk#20, d_year#21] +Arguments: [d_date_sk#20, d_year#21] + +(25) CometBroadcastHashJoin +Left output [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19] +Right output [2]: [d_date_sk#20, d_year#21] +Arguments: [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + +(26) CometProject +Input [7]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19, d_date_sk#20, d_year#21] +Arguments: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21], [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21] + +(27) CometHashAggregate +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21] +Keys [4]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#18))] + +(28) CometExchange +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, sum#22] +Arguments: hashpartitioning(c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, sum#22] +Keys [4]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21] +Functions [1]: [sum(UnscaledValue(ss_net_paid#18))] + +(30) CometBroadcastExchange +Input [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] + +(31) CometBroadcastHashJoin +Left output [2]: [customer_id#11, year_total#12] +Right output [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: [customer_id#11], [customer_id#23], Inner, BuildRight + +(32) ReusedExchange [Reuses operator id: 14] +Output [5]: [c_customer_id#27, c_first_name#28, c_last_name#29, d_year#30, sum#31] + +(33) CometHashAggregate +Input [5]: [c_customer_id#27, c_first_name#28, c_last_name#29, d_year#30, sum#31] +Keys [4]: [c_customer_id#27, c_first_name#28, c_last_name#29, d_year#30] +Functions [1]: [sum(UnscaledValue(ws_net_paid#32))] + +(34) CometFilter +Input [2]: [customer_id#33, year_total#34] +Condition : (isnotnull(year_total#34) AND (year_total#34 > 0.00)) + +(35) CometBroadcastExchange +Input [2]: [customer_id#33, year_total#34] +Arguments: [customer_id#33, year_total#34] + +(36) CometBroadcastHashJoin +Left output [6]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Right output [2]: [customer_id#33, year_total#34] +Arguments: [customer_id#11], [customer_id#33], Inner, BuildRight + +(37) CometProject +Input [8]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, customer_id#33, year_total#34] +Arguments: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#34], [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#34] + +(38) ReusedExchange [Reuses operator id: 28] +Output [5]: [c_customer_id#35, c_first_name#36, c_last_name#37, d_year#38, sum#39] + +(39) CometHashAggregate +Input [5]: [c_customer_id#35, c_first_name#36, c_last_name#37, d_year#38, sum#39] +Keys [4]: [c_customer_id#35, c_first_name#36, c_last_name#37, d_year#38] +Functions [1]: [sum(UnscaledValue(ws_net_paid#40))] + +(40) CometBroadcastExchange +Input [2]: [customer_id#41, year_total#42] +Arguments: [customer_id#41, year_total#42] + +(41) CometBroadcastHashJoin +Left output [7]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#34] +Right output [2]: [customer_id#41, year_total#42] +Arguments: [customer_id#11], [customer_id#41], Inner, (CASE WHEN (year_total#34 > 0.00) THEN (year_total#42 / year_total#34) END > CASE WHEN (year_total#12 > 0.00) THEN (year_total#26 / year_total#12) END), BuildRight + +(42) CometProject +Input [9]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#34, customer_id#41, year_total#42] +Arguments: [customer_id#23, customer_first_name#24, customer_last_name#25], [customer_id#23, customer_first_name#24, customer_last_name#25] + +(43) CometTakeOrderedAndProject +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[customer_id#23 ASC NULLS FIRST,customer_id#23 ASC NULLS FIRST,customer_id#23 ASC NULLS FIRST], output=[customer_id#23,customer_first_name#24,customer_last_name#25]), [customer_id#23, customer_first_name#24, customer_last_name#25], 100, [customer_id#23 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST], [customer_id#23, customer_first_name#24, customer_last_name#25] + +(44) ColumnarToRow [codegen id : 1] +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ea904874f9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_datafusion/simplified.txt @@ -0,0 +1,46 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] + CometProject [customer_id,customer_first_name,customer_last_name] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,customer_id,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total] + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ss_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometBroadcastExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [customer_id,customer_first_name,customer_last_name,year_total] #4 + CometHashAggregate [customer_id,customer_first_name,customer_last_name,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ss_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name] + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [customer_id,year_total] #7 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] + ReusedExchange [c_customer_id,c_first_name,c_last_name,d_year,sum] #1 + CometBroadcastExchange [customer_id,year_total] #8 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] + ReusedExchange [c_customer_id,c_first_name,c_last_name,d_year,sum] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..68542d7d4a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_iceberg_compat/explain.txt @@ -0,0 +1,363 @@ +== Physical Plan == +* ColumnarToRow (64) ++- CometTakeOrderedAndProject (63) + +- CometProject (62) + +- CometBroadcastHashJoin (61) + :- CometProject (48) + : +- CometBroadcastHashJoin (47) + : :- CometBroadcastHashJoin (31) + : : :- CometFilter (16) + : : : +- CometHashAggregate (15) + : : : +- CometExchange (14) + : : : +- CometHashAggregate (13) + : : : +- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.store_sales (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (30) + : : +- CometHashAggregate (29) + : : +- CometExchange (28) + : : +- CometHashAggregate (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometProject (21) + : : : +- CometBroadcastHashJoin (20) + : : : :- CometFilter (18) + : : : : +- CometScan parquet spark_catalog.default.customer (17) + : : : +- ReusedExchange (19) + : : +- CometBroadcastExchange (24) + : : +- CometFilter (23) + : : +- CometScan parquet spark_catalog.default.date_dim (22) + : +- CometBroadcastExchange (46) + : +- CometFilter (45) + : +- CometHashAggregate (44) + : +- CometExchange (43) + : +- CometHashAggregate (42) + : +- CometProject (41) + : +- CometBroadcastHashJoin (40) + : :- CometProject (38) + : : +- CometBroadcastHashJoin (37) + : : :- CometFilter (33) + : : : +- CometScan parquet spark_catalog.default.customer (32) + : : +- CometBroadcastExchange (36) + : : +- CometFilter (35) + : : +- CometScan parquet spark_catalog.default.web_sales (34) + : +- ReusedExchange (39) + +- CometBroadcastExchange (60) + +- CometHashAggregate (59) + +- CometExchange (58) + +- CometHashAggregate (57) + +- CometProject (56) + +- CometBroadcastHashJoin (55) + :- CometProject (53) + : +- CometBroadcastHashJoin (52) + : :- CometFilter (50) + : : +- CometScan parquet spark_catalog.default.customer (49) + : +- ReusedExchange (51) + +- ReusedExchange (54) + + +(1) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_customer_sk#5) + +(5) CometBroadcastExchange +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Right output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [c_customer_sk#1], [ss_customer_sk#5], Inner, BuildRight + +(7) CometProject +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7], [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_year#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#8, d_year#9] +Condition : (((isnotnull(d_year#9) AND (d_year#9 = 2001)) AND d_year#9 IN (2001,2002)) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_year#9] +Arguments: [d_date_sk#8, d_year#9] + +(11) CometBroadcastHashJoin +Left output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_year#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9], [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] + +(13) CometHashAggregate +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))] + +(14) CometExchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#10] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [sum(UnscaledValue(ss_net_paid#6))] + +(16) CometFilter +Input [2]: [customer_id#11, year_total#12] +Condition : (isnotnull(year_total#12) AND (year_total#12 > 0.00)) + +(17) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(18) CometFilter +Input [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Condition : (isnotnull(c_customer_sk#13) AND isnotnull(c_customer_id#14)) + +(19) ReusedExchange [Reuses operator id: 5] +Output [3]: [ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] + +(20) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Right output [3]: [ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] +Arguments: [c_customer_sk#13], [ss_customer_sk#17], Inner, BuildRight + +(21) CometProject +Input [7]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16, ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] +Arguments: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19], [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19] + +(22) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#20, d_year#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) CometFilter +Input [2]: [d_date_sk#20, d_year#21] +Condition : (((isnotnull(d_year#21) AND (d_year#21 = 2002)) AND d_year#21 IN (2001,2002)) AND isnotnull(d_date_sk#20)) + +(24) CometBroadcastExchange +Input [2]: [d_date_sk#20, d_year#21] +Arguments: [d_date_sk#20, d_year#21] + +(25) CometBroadcastHashJoin +Left output [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19] +Right output [2]: [d_date_sk#20, d_year#21] +Arguments: [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + +(26) CometProject +Input [7]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19, d_date_sk#20, d_year#21] +Arguments: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21], [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21] + +(27) CometHashAggregate +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21] +Keys [4]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#18))] + +(28) CometExchange +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, sum#22] +Arguments: hashpartitioning(c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, sum#22] +Keys [4]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21] +Functions [1]: [sum(UnscaledValue(ss_net_paid#18))] + +(30) CometBroadcastExchange +Input [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] + +(31) CometBroadcastHashJoin +Left output [2]: [customer_id#11, year_total#12] +Right output [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: [customer_id#11], [customer_id#23], Inner, BuildRight + +(32) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(33) CometFilter +Input [4]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30] +Condition : (isnotnull(c_customer_sk#27) AND isnotnull(c_customer_id#28)) + +(34) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#33)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(35) CometFilter +Input [3]: [ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] +Condition : isnotnull(ws_bill_customer_sk#31) + +(36) CometBroadcastExchange +Input [3]: [ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] +Arguments: [ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] + +(37) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30] +Right output [3]: [ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] +Arguments: [c_customer_sk#27], [ws_bill_customer_sk#31], Inner, BuildRight + +(38) CometProject +Input [7]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30, ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] +Arguments: [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, ws_sold_date_sk#33], [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, ws_sold_date_sk#33] + +(39) ReusedExchange [Reuses operator id: 10] +Output [2]: [d_date_sk#34, d_year#35] + +(40) CometBroadcastHashJoin +Left output [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, ws_sold_date_sk#33] +Right output [2]: [d_date_sk#34, d_year#35] +Arguments: [ws_sold_date_sk#33], [d_date_sk#34], Inner, BuildRight + +(41) CometProject +Input [7]: [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, ws_sold_date_sk#33, d_date_sk#34, d_year#35] +Arguments: [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, d_year#35], [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, d_year#35] + +(42) CometHashAggregate +Input [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, d_year#35] +Keys [4]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#35] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#32))] + +(43) CometExchange +Input [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#35, sum#36] +Arguments: hashpartitioning(c_customer_id#28, c_first_name#29, c_last_name#30, d_year#35, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(44) CometHashAggregate +Input [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#35, sum#36] +Keys [4]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#35] +Functions [1]: [sum(UnscaledValue(ws_net_paid#32))] + +(45) CometFilter +Input [2]: [customer_id#37, year_total#38] +Condition : (isnotnull(year_total#38) AND (year_total#38 > 0.00)) + +(46) CometBroadcastExchange +Input [2]: [customer_id#37, year_total#38] +Arguments: [customer_id#37, year_total#38] + +(47) CometBroadcastHashJoin +Left output [6]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Right output [2]: [customer_id#37, year_total#38] +Arguments: [customer_id#11], [customer_id#37], Inner, BuildRight + +(48) CometProject +Input [8]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, customer_id#37, year_total#38] +Arguments: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#38], [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#38] + +(49) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(50) CometFilter +Input [4]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42] +Condition : (isnotnull(c_customer_sk#39) AND isnotnull(c_customer_id#40)) + +(51) ReusedExchange [Reuses operator id: 36] +Output [3]: [ws_bill_customer_sk#43, ws_net_paid#44, ws_sold_date_sk#45] + +(52) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42] +Right output [3]: [ws_bill_customer_sk#43, ws_net_paid#44, ws_sold_date_sk#45] +Arguments: [c_customer_sk#39], [ws_bill_customer_sk#43], Inner, BuildRight + +(53) CometProject +Input [7]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, ws_bill_customer_sk#43, ws_net_paid#44, ws_sold_date_sk#45] +Arguments: [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, ws_sold_date_sk#45], [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, ws_sold_date_sk#45] + +(54) ReusedExchange [Reuses operator id: 24] +Output [2]: [d_date_sk#46, d_year#47] + +(55) CometBroadcastHashJoin +Left output [5]: [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, ws_sold_date_sk#45] +Right output [2]: [d_date_sk#46, d_year#47] +Arguments: [ws_sold_date_sk#45], [d_date_sk#46], Inner, BuildRight + +(56) CometProject +Input [7]: [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, ws_sold_date_sk#45, d_date_sk#46, d_year#47] +Arguments: [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, d_year#47], [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, d_year#47] + +(57) CometHashAggregate +Input [5]: [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, d_year#47] +Keys [4]: [c_customer_id#40, c_first_name#41, c_last_name#42, d_year#47] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#44))] + +(58) CometExchange +Input [5]: [c_customer_id#40, c_first_name#41, c_last_name#42, d_year#47, sum#48] +Arguments: hashpartitioning(c_customer_id#40, c_first_name#41, c_last_name#42, d_year#47, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(59) CometHashAggregate +Input [5]: [c_customer_id#40, c_first_name#41, c_last_name#42, d_year#47, sum#48] +Keys [4]: [c_customer_id#40, c_first_name#41, c_last_name#42, d_year#47] +Functions [1]: [sum(UnscaledValue(ws_net_paid#44))] + +(60) CometBroadcastExchange +Input [2]: [customer_id#49, year_total#50] +Arguments: [customer_id#49, year_total#50] + +(61) CometBroadcastHashJoin +Left output [7]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#38] +Right output [2]: [customer_id#49, year_total#50] +Arguments: [customer_id#11], [customer_id#49], Inner, (CASE WHEN (year_total#38 > 0.00) THEN (year_total#50 / year_total#38) END > CASE WHEN (year_total#12 > 0.00) THEN (year_total#26 / year_total#12) END), BuildRight + +(62) CometProject +Input [9]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#38, customer_id#49, year_total#50] +Arguments: [customer_id#23, customer_first_name#24, customer_last_name#25], [customer_id#23, customer_first_name#24, customer_last_name#25] + +(63) CometTakeOrderedAndProject +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[customer_id#23 ASC NULLS FIRST,customer_id#23 ASC NULLS FIRST,customer_id#23 ASC NULLS FIRST], output=[customer_id#23,customer_first_name#24,customer_last_name#25]), [customer_id#23, customer_first_name#24, customer_last_name#25], 100, [customer_id#23 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST], [customer_id#23, customer_first_name#24, customer_last_name#25] + +(64) ColumnarToRow [codegen id : 1] +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..1d3e33a2fb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q74.native_iceberg_compat/simplified.txt @@ -0,0 +1,66 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] + CometProject [customer_id,customer_first_name,customer_last_name] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,customer_id,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total] + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ss_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometBroadcastExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [customer_id,customer_first_name,customer_last_name,year_total] #4 + CometHashAggregate [customer_id,customer_first_name,customer_last_name,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ss_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [customer_id,year_total] #7 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #8 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ws_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometBroadcastExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + CometFilter [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + ReusedExchange [d_date_sk,d_year] #3 + CometBroadcastExchange [customer_id,year_total] #10 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ws_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_datafusion/explain.txt new file mode 100644 index 0000000000..093bb56034 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_datafusion/explain.txt @@ -0,0 +1,443 @@ +== Physical Plan == +* ColumnarToRow (86) ++- CometTakeOrderedAndProject (85) + +- CometProject (84) + +- CometSortMergeJoin (83) + :- CometSort (44) + : +- CometExchange (43) + : +- CometFilter (42) + : +- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometHashAggregate (38) + : +- CometExchange (37) + : +- CometHashAggregate (36) + : +- CometUnion (35) + : :- CometProject (22) + : : +- CometSortMergeJoin (21) + : : :- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + : : +- CometSort (20) + : : +- CometExchange (19) + : : +- CometProject (18) + : : +- CometFilter (17) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (16) + : :- CometProject (28) + : : +- CometSortMergeJoin (27) + : : :- CometSort (24) + : : : +- ReusedExchange (23) + : : +- CometSort (26) + : : +- ReusedExchange (25) + : +- CometProject (34) + : +- CometSortMergeJoin (33) + : :- CometSort (30) + : : +- ReusedExchange (29) + : +- CometSort (32) + : +- ReusedExchange (31) + +- CometSort (82) + +- CometExchange (81) + +- CometFilter (80) + +- CometHashAggregate (79) + +- CometExchange (78) + +- CometHashAggregate (77) + +- CometHashAggregate (76) + +- CometExchange (75) + +- CometHashAggregate (74) + +- CometUnion (73) + :- CometProject (60) + : +- CometSortMergeJoin (59) + : :- CometSort (56) + : : +- CometExchange (55) + : : +- CometProject (54) + : : +- CometBroadcastHashJoin (53) + : : :- CometProject (49) + : : : +- CometBroadcastHashJoin (48) + : : : :- CometFilter (46) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (45) + : : : +- ReusedExchange (47) + : : +- CometBroadcastExchange (52) + : : +- CometFilter (51) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (50) + : +- CometSort (58) + : +- ReusedExchange (57) + :- CometProject (66) + : +- CometSortMergeJoin (65) + : :- CometSort (62) + : : +- ReusedExchange (61) + : +- CometSort (64) + : +- ReusedExchange (63) + +- CometProject (72) + +- CometSortMergeJoin (71) + :- CometSort (68) + : +- ReusedExchange (67) + +- CometSort (70) + +- ReusedExchange (69) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] + +(2) CometFilter +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(4) CometFilter +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books )) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(5) CometProject +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(6) CometBroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(7) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Right output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [cs_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(8) CometProject +Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#12, d_year#13] +Arguments: [d_date_sk#12, d_year#13] + +(10) CometFilter +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2002)) AND isnotnull(d_date_sk#12)) + +(11) CometBroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: [d_date_sk#12, d_year#13] + +(12) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right output [2]: [d_date_sk#12, d_year#13] +Arguments: [cs_sold_date_sk#5], [d_date_sk#12], Inner, BuildRight + +(13) CometProject +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#12, d_year#13] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13], [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] + +(14) CometExchange +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometSort +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13], [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST] + +(16) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(17) CometFilter +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Condition : (isnotnull(cr_order_number#15) AND isnotnull(cr_item_sk#14)) + +(18) CometProject +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17], [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] + +(19) CometExchange +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: hashpartitioning(cr_order_number#15, cr_item_sk#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometSort +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17], [cr_order_number#15 ASC NULLS FIRST, cr_item_sk#14 ASC NULLS FIRST] + +(21) CometSortMergeJoin +Left output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Right output [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cs_order_number#2, cs_item_sk#1], [cr_order_number#15, cr_item_sk#14], LeftOuter + +(22) CometProject +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13, cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20], [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#3 - coalesce(cr_return_quantity#16, 0)) AS sales_cnt#19, (cs_ext_sales_price#4 - coalesce(cr_return_amount#17, 0.00)) AS sales_amt#20] + +(23) ReusedExchange [Reuses operator id: 14] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29] + +(24) CometSort +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29] +Arguments: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29], [ss_ticket_number#22 ASC NULLS FIRST, ss_item_sk#21 ASC NULLS FIRST] + +(25) ReusedExchange [Reuses operator id: 19] +Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(26) CometSort +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33], [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST] + +(27) CometSortMergeJoin +Left output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29] +Right output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [ss_ticket_number#22, ss_item_sk#21], [sr_ticket_number#31, sr_item_sk#30], LeftOuter + +(28) CometProject +Input [13]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [d_year#29, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, sales_cnt#34, sales_amt#35], [d_year#29, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, (ss_quantity#23 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#34, (ss_ext_sales_price#24 - coalesce(sr_return_amt#33, 0.00)) AS sales_amt#35] + +(29) ReusedExchange [Reuses operator id: 14] +Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44] + +(30) CometSort +Input [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44] +Arguments: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44], [ws_order_number#37 ASC NULLS FIRST, ws_item_sk#36 ASC NULLS FIRST] + +(31) ReusedExchange [Reuses operator id: 19] +Output [4]: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] + +(32) CometSort +Input [4]: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] +Arguments: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48], [wr_order_number#46 ASC NULLS FIRST, wr_item_sk#45 ASC NULLS FIRST] + +(33) CometSortMergeJoin +Left output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44] +Right output [4]: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] +Arguments: [ws_order_number#37, ws_item_sk#36], [wr_order_number#46, wr_item_sk#45], LeftOuter + +(34) CometProject +Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44, wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] +Arguments: [d_year#44, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, sales_cnt#49, sales_amt#50], [d_year#44, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, (ws_quantity#38 - coalesce(wr_return_quantity#47, 0)) AS sales_cnt#49, (ws_ext_sales_price#39 - coalesce(wr_return_amt#48, 0.00)) AS sales_amt#50] + +(35) CometUnion +Child 0 Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Child 1 Input [7]: [d_year#29, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, sales_cnt#34, sales_amt#35] +Child 2 Input [7]: [d_year#44, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, sales_cnt#49, sales_amt#50] + +(36) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] + +(37) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(38) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] + +(39) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] + +(40) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(41) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] + +(42) CometFilter +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54] +Condition : isnotnull(sales_cnt#53) + +(43) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(44) CometSort +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54] +Arguments: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54], [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST] + +(45) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59] +Arguments: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59] + +(46) CometFilter +Input [5]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59] +Condition : isnotnull(cs_item_sk#55) + +(47) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] + +(48) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59] +Right output [5]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] +Arguments: [cs_item_sk#55], [i_item_sk#60], Inner, BuildRight + +(49) CometProject +Input [10]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59, i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] +Arguments: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64], [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] + +(50) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#65, d_year#66] +Arguments: [d_date_sk#65, d_year#66] + +(51) CometFilter +Input [2]: [d_date_sk#65, d_year#66] +Condition : ((isnotnull(d_year#66) AND (d_year#66 = 2001)) AND isnotnull(d_date_sk#65)) + +(52) CometBroadcastExchange +Input [2]: [d_date_sk#65, d_year#66] +Arguments: [d_date_sk#65, d_year#66] + +(53) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] +Right output [2]: [d_date_sk#65, d_year#66] +Arguments: [cs_sold_date_sk#59], [d_date_sk#65], Inner, BuildRight + +(54) CometProject +Input [11]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_date_sk#65, d_year#66] +Arguments: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66], [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66] + +(55) CometExchange +Input [9]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66] +Arguments: hashpartitioning(cs_order_number#56, cs_item_sk#55, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(56) CometSort +Input [9]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66] +Arguments: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66], [cs_order_number#56 ASC NULLS FIRST, cs_item_sk#55 ASC NULLS FIRST] + +(57) ReusedExchange [Reuses operator id: 19] +Output [4]: [cr_item_sk#67, cr_order_number#68, cr_return_quantity#69, cr_return_amount#70] + +(58) CometSort +Input [4]: [cr_item_sk#67, cr_order_number#68, cr_return_quantity#69, cr_return_amount#70] +Arguments: [cr_item_sk#67, cr_order_number#68, cr_return_quantity#69, cr_return_amount#70], [cr_order_number#68 ASC NULLS FIRST, cr_item_sk#67 ASC NULLS FIRST] + +(59) CometSortMergeJoin +Left output [9]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66] +Right output [4]: [cr_item_sk#67, cr_order_number#68, cr_return_quantity#69, cr_return_amount#70] +Arguments: [cs_order_number#56, cs_item_sk#55], [cr_order_number#68, cr_item_sk#67], LeftOuter + +(60) CometProject +Input [13]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66, cr_item_sk#67, cr_order_number#68, cr_return_quantity#69, cr_return_amount#70] +Arguments: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20], [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, (cs_quantity#57 - coalesce(cr_return_quantity#69, 0)) AS sales_cnt#19, (cs_ext_sales_price#58 - coalesce(cr_return_amount#70, 0.00)) AS sales_amt#20] + +(61) ReusedExchange [Reuses operator id: 55] +Output [9]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_ext_sales_price#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#79] + +(62) CometSort +Input [9]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_ext_sales_price#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#79] +Arguments: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_ext_sales_price#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#79], [ss_ticket_number#72 ASC NULLS FIRST, ss_item_sk#71 ASC NULLS FIRST] + +(63) ReusedExchange [Reuses operator id: 19] +Output [4]: [sr_item_sk#80, sr_ticket_number#81, sr_return_quantity#82, sr_return_amt#83] + +(64) CometSort +Input [4]: [sr_item_sk#80, sr_ticket_number#81, sr_return_quantity#82, sr_return_amt#83] +Arguments: [sr_item_sk#80, sr_ticket_number#81, sr_return_quantity#82, sr_return_amt#83], [sr_ticket_number#81 ASC NULLS FIRST, sr_item_sk#80 ASC NULLS FIRST] + +(65) CometSortMergeJoin +Left output [9]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_ext_sales_price#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#79] +Right output [4]: [sr_item_sk#80, sr_ticket_number#81, sr_return_quantity#82, sr_return_amt#83] +Arguments: [ss_ticket_number#72, ss_item_sk#71], [sr_ticket_number#81, sr_item_sk#80], LeftOuter + +(66) CometProject +Input [13]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_ext_sales_price#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#79, sr_item_sk#80, sr_ticket_number#81, sr_return_quantity#82, sr_return_amt#83] +Arguments: [d_year#79, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#34, sales_amt#35], [d_year#79, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, (ss_quantity#73 - coalesce(sr_return_quantity#82, 0)) AS sales_cnt#34, (ss_ext_sales_price#74 - coalesce(sr_return_amt#83, 0.00)) AS sales_amt#35] + +(67) ReusedExchange [Reuses operator id: 55] +Output [9]: [ws_item_sk#84, ws_order_number#85, ws_quantity#86, ws_ext_sales_price#87, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, d_year#92] + +(68) CometSort +Input [9]: [ws_item_sk#84, ws_order_number#85, ws_quantity#86, ws_ext_sales_price#87, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, d_year#92] +Arguments: [ws_item_sk#84, ws_order_number#85, ws_quantity#86, ws_ext_sales_price#87, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, d_year#92], [ws_order_number#85 ASC NULLS FIRST, ws_item_sk#84 ASC NULLS FIRST] + +(69) ReusedExchange [Reuses operator id: 19] +Output [4]: [wr_item_sk#93, wr_order_number#94, wr_return_quantity#95, wr_return_amt#96] + +(70) CometSort +Input [4]: [wr_item_sk#93, wr_order_number#94, wr_return_quantity#95, wr_return_amt#96] +Arguments: [wr_item_sk#93, wr_order_number#94, wr_return_quantity#95, wr_return_amt#96], [wr_order_number#94 ASC NULLS FIRST, wr_item_sk#93 ASC NULLS FIRST] + +(71) CometSortMergeJoin +Left output [9]: [ws_item_sk#84, ws_order_number#85, ws_quantity#86, ws_ext_sales_price#87, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, d_year#92] +Right output [4]: [wr_item_sk#93, wr_order_number#94, wr_return_quantity#95, wr_return_amt#96] +Arguments: [ws_order_number#85, ws_item_sk#84], [wr_order_number#94, wr_item_sk#93], LeftOuter + +(72) CometProject +Input [13]: [ws_item_sk#84, ws_order_number#85, ws_quantity#86, ws_ext_sales_price#87, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, d_year#92, wr_item_sk#93, wr_order_number#94, wr_return_quantity#95, wr_return_amt#96] +Arguments: [d_year#92, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, sales_cnt#49, sales_amt#50], [d_year#92, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, (ws_quantity#86 - coalesce(wr_return_quantity#95, 0)) AS sales_cnt#49, (ws_ext_sales_price#87 - coalesce(wr_return_amt#96, 0.00)) AS sales_amt#50] + +(73) CometUnion +Child 0 Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Child 1 Input [7]: [d_year#79, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#34, sales_amt#35] +Child 2 Input [7]: [d_year#92, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, sales_cnt#49, sales_amt#50] + +(74) CometHashAggregate +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Functions: [] + +(75) CometExchange +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(76) CometHashAggregate +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Functions: [] + +(77) CometHashAggregate +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] + +(78) CometExchange +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sum#51, sum#97] +Arguments: hashpartitioning(d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(79) CometHashAggregate +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sum#51, sum#97] +Keys [5]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] + +(80) CometFilter +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99] +Condition : isnotnull(sales_cnt#98) + +(81) CometExchange +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99] +Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(82) CometSort +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99] +Arguments: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99], [i_brand_id#61 ASC NULLS FIRST, i_class_id#62 ASC NULLS FIRST, i_category_id#63 ASC NULLS FIRST, i_manufact_id#64 ASC NULLS FIRST] + +(83) CometSortMergeJoin +Left output [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54] +Right output [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99] +Arguments: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64], Inner, ((cast(sales_cnt#53 as decimal(17,2)) / cast(sales_cnt#98 as decimal(17,2))) < 0.90000000000000000000) + +(84) CometProject +Input [14]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54, d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99] +Arguments: [prev_year#100, year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#102, curr_yr_cnt#103, sales_cnt_diff#104, sales_amt_diff#105], [d_year#66 AS prev_year#100, d_year#13 AS year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#98 AS prev_yr_cnt#102, sales_cnt#53 AS curr_yr_cnt#103, (sales_cnt#53 - sales_cnt#98) AS sales_cnt_diff#104, (sales_amt#54 - sales_amt#99) AS sales_amt_diff#105] + +(85) CometTakeOrderedAndProject +Input [10]: [prev_year#100, year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#102, curr_yr_cnt#103, sales_cnt_diff#104, sales_amt_diff#105] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#104 ASC NULLS FIRST], output=[prev_year#100,year#101,i_brand_id#7,i_class_id#8,i_category_id#9,i_manufact_id#11,prev_yr_cnt#102,curr_yr_cnt#103,sales_cnt_diff#104,sales_amt_diff#105]), [prev_year#100, year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#102, curr_yr_cnt#103, sales_cnt_diff#104, sales_amt_diff#105], 100, [sales_cnt_diff#104 ASC NULLS FIRST], [prev_year#100, year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#102, curr_yr_cnt#103, sales_cnt_diff#104, sales_amt_diff#105] + +(86) ColumnarToRow [codegen id : 1] +Input [10]: [prev_year#100, year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#102, curr_yr_cnt#103, sales_cnt_diff#104, sales_amt_diff#105] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6503d71685 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_datafusion/simplified.txt @@ -0,0 +1,88 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_cnt_diff,sales_amt_diff] + CometProject [d_year,d_year,sales_cnt,sales_cnt,sales_amt,sales_amt] [prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_cnt_diff,sales_amt_diff] + CometSortMergeJoin [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSort [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 + CometFilter [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,sum,sum,sum(sales_cnt),sum(UnscaledValue(sales_amt))] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometUnion [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometProject [cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometSort [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [cs_order_number,cs_item_sk] #4 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + CometProject [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometSort [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometExchange [cr_order_number,cr_item_sk] #7 + CometProject [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometFilter [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometProject [ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometSort [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #4 + CometSort [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 + CometProject [ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometSort [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #4 + CometSort [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #7 + CometSort [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #8 + CometFilter [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,sum,sum,sum(sales_cnt),sum(UnscaledValue(sales_amt))] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #9 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #10 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometUnion [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometProject [cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometSort [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [cs_order_number,cs_item_sk] #11 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + CometBroadcastExchange [d_date_sk,d_year] #12 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometSort [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + CometProject [ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometSort [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #11 + CometSort [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 + CometProject [ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometSort [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #11 + CometSort [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..69a6015121 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_iceberg_compat/explain.txt @@ -0,0 +1,681 @@ +== Physical Plan == +* ColumnarToRow (124) ++- CometTakeOrderedAndProject (123) + +- CometProject (122) + +- CometSortMergeJoin (121) + :- CometSort (66) + : +- CometExchange (65) + : +- CometFilter (64) + : +- CometHashAggregate (63) + : +- CometExchange (62) + : +- CometHashAggregate (61) + : +- CometHashAggregate (60) + : +- CometExchange (59) + : +- CometHashAggregate (58) + : +- CometUnion (57) + : :- CometProject (22) + : : +- CometSortMergeJoin (21) + : : :- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.item (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometScan parquet spark_catalog.default.date_dim (9) + : : +- CometSort (20) + : : +- CometExchange (19) + : : +- CometProject (18) + : : +- CometFilter (17) + : : +- CometScan parquet spark_catalog.default.catalog_returns (16) + : :- CometProject (39) + : : +- CometSortMergeJoin (38) + : : :- CometSort (32) + : : : +- CometExchange (31) + : : : +- CometProject (30) + : : : +- CometBroadcastHashJoin (29) + : : : :- CometProject (27) + : : : : +- CometBroadcastHashJoin (26) + : : : : :- CometFilter (24) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (23) + : : : : +- ReusedExchange (25) + : : : +- ReusedExchange (28) + : : +- CometSort (37) + : : +- CometExchange (36) + : : +- CometProject (35) + : : +- CometFilter (34) + : : +- CometScan parquet spark_catalog.default.store_returns (33) + : +- CometProject (56) + : +- CometSortMergeJoin (55) + : :- CometSort (49) + : : +- CometExchange (48) + : : +- CometProject (47) + : : +- CometBroadcastHashJoin (46) + : : :- CometProject (44) + : : : +- CometBroadcastHashJoin (43) + : : : :- CometFilter (41) + : : : : +- CometScan parquet spark_catalog.default.web_sales (40) + : : : +- ReusedExchange (42) + : : +- ReusedExchange (45) + : +- CometSort (54) + : +- CometExchange (53) + : +- CometProject (52) + : +- CometFilter (51) + : +- CometScan parquet spark_catalog.default.web_returns (50) + +- CometSort (120) + +- CometExchange (119) + +- CometFilter (118) + +- CometHashAggregate (117) + +- CometExchange (116) + +- CometHashAggregate (115) + +- CometHashAggregate (114) + +- CometExchange (113) + +- CometHashAggregate (112) + +- CometUnion (111) + :- CometProject (82) + : +- CometSortMergeJoin (81) + : :- CometSort (78) + : : +- CometExchange (77) + : : +- CometProject (76) + : : +- CometBroadcastHashJoin (75) + : : :- CometProject (71) + : : : +- CometBroadcastHashJoin (70) + : : : :- CometFilter (68) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (67) + : : : +- ReusedExchange (69) + : : +- CometBroadcastExchange (74) + : : +- CometFilter (73) + : : +- CometScan parquet spark_catalog.default.date_dim (72) + : +- CometSort (80) + : +- ReusedExchange (79) + :- CometProject (96) + : +- CometSortMergeJoin (95) + : :- CometSort (92) + : : +- CometExchange (91) + : : +- CometProject (90) + : : +- CometBroadcastHashJoin (89) + : : :- CometProject (87) + : : : +- CometBroadcastHashJoin (86) + : : : :- CometFilter (84) + : : : : +- CometScan parquet spark_catalog.default.store_sales (83) + : : : +- ReusedExchange (85) + : : +- ReusedExchange (88) + : +- CometSort (94) + : +- ReusedExchange (93) + +- CometProject (110) + +- CometSortMergeJoin (109) + :- CometSort (106) + : +- CometExchange (105) + : +- CometProject (104) + : +- CometBroadcastHashJoin (103) + : :- CometProject (101) + : : +- CometBroadcastHashJoin (100) + : : :- CometFilter (98) + : : : +- CometScan parquet spark_catalog.default.web_sales (97) + : : +- ReusedExchange (99) + : +- ReusedExchange (102) + +- CometSort (108) + +- ReusedExchange (107) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books ), IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_manufact_id)] +ReadSchema: struct + +(4) CometFilter +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books )) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(5) CometProject +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(6) CometBroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(7) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Right output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [cs_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(8) CometProject +Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(9) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_year#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2002)) AND isnotnull(d_date_sk#12)) + +(11) CometBroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: [d_date_sk#12, d_year#13] + +(12) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right output [2]: [d_date_sk#12, d_year#13] +Arguments: [cs_sold_date_sk#5], [d_date_sk#12], Inner, BuildRight + +(13) CometProject +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#12, d_year#13] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13], [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] + +(14) CometExchange +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometSort +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13], [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST] + +(16) CometScan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(17) CometFilter +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Condition : (isnotnull(cr_order_number#15) AND isnotnull(cr_item_sk#14)) + +(18) CometProject +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17], [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] + +(19) CometExchange +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: hashpartitioning(cr_order_number#15, cr_item_sk#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometSort +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17], [cr_order_number#15 ASC NULLS FIRST, cr_item_sk#14 ASC NULLS FIRST] + +(21) CometSortMergeJoin +Left output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Right output [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cs_order_number#2, cs_item_sk#1], [cr_order_number#15, cr_item_sk#14], LeftOuter + +(22) CometProject +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13, cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20], [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#3 - coalesce(cr_return_quantity#16, 0)) AS sales_cnt#19, (cs_ext_sales_price#4 - coalesce(cr_return_amount#17, 0.00)) AS sales_amt#20] + +(23) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#25)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(24) CometFilter +Input [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Condition : isnotnull(ss_item_sk#21) + +(25) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(26) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Right output [5]: [i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] +Arguments: [ss_item_sk#21], [i_item_sk#26], Inner, BuildRight + +(27) CometProject +Input [10]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] +Arguments: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30], [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(28) ReusedExchange [Reuses operator id: 11] +Output [2]: [d_date_sk#31, d_year#32] + +(29) CometBroadcastHashJoin +Left output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] +Right output [2]: [d_date_sk#31, d_year#32] +Arguments: [ss_sold_date_sk#25], [d_date_sk#31], Inner, BuildRight + +(30) CometProject +Input [11]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_date_sk#31, d_year#32] +Arguments: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32], [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] + +(31) CometExchange +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: hashpartitioning(ss_ticket_number#22, ss_item_sk#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32], [ss_ticket_number#22 ASC NULLS FIRST, ss_item_sk#21 ASC NULLS FIRST] + +(33) CometScan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(34) CometFilter +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Condition : (isnotnull(sr_ticket_number#34) AND isnotnull(sr_item_sk#33)) + +(35) CometProject +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Arguments: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36], [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] + +(36) CometExchange +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: hashpartitioning(sr_ticket_number#34, sr_item_sk#33, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(37) CometSort +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36], [sr_ticket_number#34 ASC NULLS FIRST, sr_item_sk#33 ASC NULLS FIRST] + +(38) CometSortMergeJoin +Left output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Right output [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: [ss_ticket_number#22, ss_item_sk#21], [sr_ticket_number#34, sr_item_sk#33], LeftOuter + +(39) CometProject +Input [13]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32, sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: [d_year#32, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, sales_cnt#38, sales_amt#39], [d_year#32, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, (ss_quantity#23 - coalesce(sr_return_quantity#35, 0)) AS sales_cnt#38, (ss_ext_sales_price#24 - coalesce(sr_return_amt#36, 0.00)) AS sales_amt#39] + +(40) CometScan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#44)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(41) CometFilter +Input [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Condition : isnotnull(ws_item_sk#40) + +(42) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(43) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Right output [5]: [i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] +Arguments: [ws_item_sk#40], [i_item_sk#45], Inner, BuildRight + +(44) CometProject +Input [10]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] +Arguments: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49], [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(45) ReusedExchange [Reuses operator id: 11] +Output [2]: [d_date_sk#50, d_year#51] + +(46) CometBroadcastHashJoin +Left output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] +Right output [2]: [d_date_sk#50, d_year#51] +Arguments: [ws_sold_date_sk#44], [d_date_sk#50], Inner, BuildRight + +(47) CometProject +Input [11]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_date_sk#50, d_year#51] +Arguments: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51], [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] + +(48) CometExchange +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: hashpartitioning(ws_order_number#41, ws_item_sk#40, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(49) CometSort +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51], [ws_order_number#41 ASC NULLS FIRST, ws_item_sk#40 ASC NULLS FIRST] + +(50) CometScan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(51) CometFilter +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Condition : (isnotnull(wr_order_number#53) AND isnotnull(wr_item_sk#52)) + +(52) CometProject +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Arguments: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55], [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] + +(53) CometExchange +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: hashpartitioning(wr_order_number#53, wr_item_sk#52, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(54) CometSort +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55], [wr_order_number#53 ASC NULLS FIRST, wr_item_sk#52 ASC NULLS FIRST] + +(55) CometSortMergeJoin +Left output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Right output [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: [ws_order_number#41, ws_item_sk#40], [wr_order_number#53, wr_item_sk#52], LeftOuter + +(56) CometProject +Input [13]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51, wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: [d_year#51, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, sales_cnt#57, sales_amt#58], [d_year#51, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, (ws_quantity#42 - coalesce(wr_return_quantity#54, 0)) AS sales_cnt#57, (ws_ext_sales_price#43 - coalesce(wr_return_amt#55, 0.00)) AS sales_amt#58] + +(57) CometUnion +Child 0 Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Child 1 Input [7]: [d_year#32, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, sales_cnt#38, sales_amt#39] +Child 2 Input [7]: [d_year#51, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, sales_cnt#57, sales_amt#58] + +(58) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] + +(59) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(60) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] + +(61) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] + +(62) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#59, sum#60] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(63) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#59, sum#60] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] + +(64) CometFilter +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62] +Condition : isnotnull(sales_cnt#61) + +(65) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(66) CometSort +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62] +Arguments: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62], [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST] + +(67) CometScan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#67)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(68) CometFilter +Input [5]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67] +Condition : isnotnull(cs_item_sk#63) + +(69) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] + +(70) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67] +Right output [5]: [i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] +Arguments: [cs_item_sk#63], [i_item_sk#68], Inner, BuildRight + +(71) CometProject +Input [10]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67, i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] +Arguments: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72], [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] + +(72) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#73, d_year#74] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(73) CometFilter +Input [2]: [d_date_sk#73, d_year#74] +Condition : ((isnotnull(d_year#74) AND (d_year#74 = 2001)) AND isnotnull(d_date_sk#73)) + +(74) CometBroadcastExchange +Input [2]: [d_date_sk#73, d_year#74] +Arguments: [d_date_sk#73, d_year#74] + +(75) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] +Right output [2]: [d_date_sk#73, d_year#74] +Arguments: [cs_sold_date_sk#67], [d_date_sk#73], Inner, BuildRight + +(76) CometProject +Input [11]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_date_sk#73, d_year#74] +Arguments: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74], [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74] + +(77) CometExchange +Input [9]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74] +Arguments: hashpartitioning(cs_order_number#64, cs_item_sk#63, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=10] + +(78) CometSort +Input [9]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74] +Arguments: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74], [cs_order_number#64 ASC NULLS FIRST, cs_item_sk#63 ASC NULLS FIRST] + +(79) ReusedExchange [Reuses operator id: 19] +Output [4]: [cr_item_sk#75, cr_order_number#76, cr_return_quantity#77, cr_return_amount#78] + +(80) CometSort +Input [4]: [cr_item_sk#75, cr_order_number#76, cr_return_quantity#77, cr_return_amount#78] +Arguments: [cr_item_sk#75, cr_order_number#76, cr_return_quantity#77, cr_return_amount#78], [cr_order_number#76 ASC NULLS FIRST, cr_item_sk#75 ASC NULLS FIRST] + +(81) CometSortMergeJoin +Left output [9]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74] +Right output [4]: [cr_item_sk#75, cr_order_number#76, cr_return_quantity#77, cr_return_amount#78] +Arguments: [cs_order_number#64, cs_item_sk#63], [cr_order_number#76, cr_item_sk#75], LeftOuter + +(82) CometProject +Input [13]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74, cr_item_sk#75, cr_order_number#76, cr_return_quantity#77, cr_return_amount#78] +Arguments: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20], [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, (cs_quantity#65 - coalesce(cr_return_quantity#77, 0)) AS sales_cnt#19, (cs_ext_sales_price#66 - coalesce(cr_return_amount#78, 0.00)) AS sales_amt#20] + +(83) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#83)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(84) CometFilter +Input [5]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83] +Condition : isnotnull(ss_item_sk#79) + +(85) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#84, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] + +(86) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83] +Right output [5]: [i_item_sk#84, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] +Arguments: [ss_item_sk#79], [i_item_sk#84], Inner, BuildRight + +(87) CometProject +Input [10]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83, i_item_sk#84, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] +Arguments: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88], [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] + +(88) ReusedExchange [Reuses operator id: 74] +Output [2]: [d_date_sk#89, d_year#90] + +(89) CometBroadcastHashJoin +Left output [9]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] +Right output [2]: [d_date_sk#89, d_year#90] +Arguments: [ss_sold_date_sk#83], [d_date_sk#89], Inner, BuildRight + +(90) CometProject +Input [11]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_date_sk#89, d_year#90] +Arguments: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90], [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] + +(91) CometExchange +Input [9]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] +Arguments: hashpartitioning(ss_ticket_number#80, ss_item_sk#79, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=11] + +(92) CometSort +Input [9]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] +Arguments: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90], [ss_ticket_number#80 ASC NULLS FIRST, ss_item_sk#79 ASC NULLS FIRST] + +(93) ReusedExchange [Reuses operator id: 36] +Output [4]: [sr_item_sk#91, sr_ticket_number#92, sr_return_quantity#93, sr_return_amt#94] + +(94) CometSort +Input [4]: [sr_item_sk#91, sr_ticket_number#92, sr_return_quantity#93, sr_return_amt#94] +Arguments: [sr_item_sk#91, sr_ticket_number#92, sr_return_quantity#93, sr_return_amt#94], [sr_ticket_number#92 ASC NULLS FIRST, sr_item_sk#91 ASC NULLS FIRST] + +(95) CometSortMergeJoin +Left output [9]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] +Right output [4]: [sr_item_sk#91, sr_ticket_number#92, sr_return_quantity#93, sr_return_amt#94] +Arguments: [ss_ticket_number#80, ss_item_sk#79], [sr_ticket_number#92, sr_item_sk#91], LeftOuter + +(96) CometProject +Input [13]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90, sr_item_sk#91, sr_ticket_number#92, sr_return_quantity#93, sr_return_amt#94] +Arguments: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#38, sales_amt#39], [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, (ss_quantity#81 - coalesce(sr_return_quantity#93, 0)) AS sales_cnt#38, (ss_ext_sales_price#82 - coalesce(sr_return_amt#94, 0.00)) AS sales_amt#39] + +(97) CometScan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#99)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(98) CometFilter +Input [5]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99] +Condition : isnotnull(ws_item_sk#95) + +(99) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#100, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104] + +(100) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99] +Right output [5]: [i_item_sk#100, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104] +Arguments: [ws_item_sk#95], [i_item_sk#100], Inner, BuildRight + +(101) CometProject +Input [10]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99, i_item_sk#100, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104] +Arguments: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104], [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104] + +(102) ReusedExchange [Reuses operator id: 74] +Output [2]: [d_date_sk#105, d_year#106] + +(103) CometBroadcastHashJoin +Left output [9]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104] +Right output [2]: [d_date_sk#105, d_year#106] +Arguments: [ws_sold_date_sk#99], [d_date_sk#105], Inner, BuildRight + +(104) CometProject +Input [11]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_date_sk#105, d_year#106] +Arguments: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106], [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106] + +(105) CometExchange +Input [9]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106] +Arguments: hashpartitioning(ws_order_number#96, ws_item_sk#95, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=12] + +(106) CometSort +Input [9]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106] +Arguments: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106], [ws_order_number#96 ASC NULLS FIRST, ws_item_sk#95 ASC NULLS FIRST] + +(107) ReusedExchange [Reuses operator id: 53] +Output [4]: [wr_item_sk#107, wr_order_number#108, wr_return_quantity#109, wr_return_amt#110] + +(108) CometSort +Input [4]: [wr_item_sk#107, wr_order_number#108, wr_return_quantity#109, wr_return_amt#110] +Arguments: [wr_item_sk#107, wr_order_number#108, wr_return_quantity#109, wr_return_amt#110], [wr_order_number#108 ASC NULLS FIRST, wr_item_sk#107 ASC NULLS FIRST] + +(109) CometSortMergeJoin +Left output [9]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106] +Right output [4]: [wr_item_sk#107, wr_order_number#108, wr_return_quantity#109, wr_return_amt#110] +Arguments: [ws_order_number#96, ws_item_sk#95], [wr_order_number#108, wr_item_sk#107], LeftOuter + +(110) CometProject +Input [13]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106, wr_item_sk#107, wr_order_number#108, wr_return_quantity#109, wr_return_amt#110] +Arguments: [d_year#106, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, sales_cnt#57, sales_amt#58], [d_year#106, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, (ws_quantity#97 - coalesce(wr_return_quantity#109, 0)) AS sales_cnt#57, (ws_ext_sales_price#98 - coalesce(wr_return_amt#110, 0.00)) AS sales_amt#58] + +(111) CometUnion +Child 0 Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Child 1 Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#38, sales_amt#39] +Child 2 Input [7]: [d_year#106, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, sales_cnt#57, sales_amt#58] + +(112) CometHashAggregate +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Functions: [] + +(113) CometExchange +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=13] + +(114) CometHashAggregate +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Functions: [] + +(115) CometHashAggregate +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] + +(116) CometExchange +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sum#59, sum#111] +Arguments: hashpartitioning(d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=14] + +(117) CometHashAggregate +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sum#59, sum#111] +Keys [5]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] + +(118) CometFilter +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113] +Condition : isnotnull(sales_cnt#112) + +(119) CometExchange +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113] +Arguments: hashpartitioning(i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=15] + +(120) CometSort +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113] +Arguments: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113], [i_brand_id#69 ASC NULLS FIRST, i_class_id#70 ASC NULLS FIRST, i_category_id#71 ASC NULLS FIRST, i_manufact_id#72 ASC NULLS FIRST] + +(121) CometSortMergeJoin +Left output [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62] +Right output [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113] +Arguments: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72], Inner, ((cast(sales_cnt#61 as decimal(17,2)) / cast(sales_cnt#112 as decimal(17,2))) < 0.90000000000000000000) + +(122) CometProject +Input [14]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62, d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113] +Arguments: [prev_year#114, year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#116, curr_yr_cnt#117, sales_cnt_diff#118, sales_amt_diff#119], [d_year#74 AS prev_year#114, d_year#13 AS year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#112 AS prev_yr_cnt#116, sales_cnt#61 AS curr_yr_cnt#117, (sales_cnt#61 - sales_cnt#112) AS sales_cnt_diff#118, (sales_amt#62 - sales_amt#113) AS sales_amt_diff#119] + +(123) CometTakeOrderedAndProject +Input [10]: [prev_year#114, year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#116, curr_yr_cnt#117, sales_cnt_diff#118, sales_amt_diff#119] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#118 ASC NULLS FIRST], output=[prev_year#114,year#115,i_brand_id#7,i_class_id#8,i_category_id#9,i_manufact_id#11,prev_yr_cnt#116,curr_yr_cnt#117,sales_cnt_diff#118,sales_amt_diff#119]), [prev_year#114, year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#116, curr_yr_cnt#117, sales_cnt_diff#118, sales_amt_diff#119], 100, [sales_cnt_diff#118 ASC NULLS FIRST], [prev_year#114, year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#116, curr_yr_cnt#117, sales_cnt_diff#118, sales_amt_diff#119] + +(124) ColumnarToRow [codegen id : 1] +Input [10]: [prev_year#114, year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#116, curr_yr_cnt#117, sales_cnt_diff#118, sales_amt_diff#119] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..2c0b850895 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q75.native_iceberg_compat/simplified.txt @@ -0,0 +1,126 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_cnt_diff,sales_amt_diff] + CometProject [d_year,d_year,sales_cnt,sales_cnt,sales_amt,sales_amt] [prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_cnt_diff,sales_amt_diff] + CometSortMergeJoin [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSort [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 + CometFilter [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,sum,sum,sum(sales_cnt),sum(UnscaledValue(sales_amt))] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometUnion [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometProject [cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometSort [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [cs_order_number,cs_item_sk] #4 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + CometProject [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometSort [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometExchange [cr_order_number,cr_item_sk] #7 + CometProject [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometFilter [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometProject [ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometSort [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [ss_ticket_number,ss_item_sk] #8 + CometProject [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + ReusedExchange [d_date_sk,d_year] #6 + CometSort [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometExchange [sr_ticket_number,sr_item_sk] #9 + CometProject [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometFilter [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + CometProject [ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometSort [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [ws_order_number,ws_item_sk] #10 + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + ReusedExchange [d_date_sk,d_year] #6 + CometSort [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometExchange [wr_order_number,wr_item_sk] #11 + CometProject [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometFilter [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometSort [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12 + CometFilter [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,sum,sum,sum(sales_cnt),sum(UnscaledValue(sales_amt))] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometUnion [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometProject [cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometSort [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [cs_order_number,cs_item_sk] #15 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + CometBroadcastExchange [d_date_sk,d_year] #16 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometSort [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + CometProject [ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometSort [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [ss_ticket_number,ss_item_sk] #17 + CometProject [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + ReusedExchange [d_date_sk,d_year] #16 + CometSort [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9 + CometProject [ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometSort [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [ws_order_number,ws_item_sk] #18 + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + ReusedExchange [d_date_sk,d_year] #16 + CometSort [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_datafusion/explain.txt new file mode 100644 index 0000000000..635e8158a2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_datafusion/explain.txt @@ -0,0 +1,177 @@ +== Physical Plan == +* ColumnarToRow (34) ++- CometTakeOrderedAndProject (33) + +- CometHashAggregate (32) + +- CometExchange (31) + +- CometHashAggregate (30) + +- CometUnion (29) + :- CometProject (12) + : +- CometBroadcastHashJoin (11) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : +- CometBroadcastExchange (10) + : +- CometFilter (9) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + :- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (13) + : : +- ReusedExchange (15) + : +- ReusedExchange (18) + +- CometProject (28) + +- CometBroadcastHashJoin (27) + :- CometProject (25) + : +- CometBroadcastHashJoin (24) + : :- CometFilter (22) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (21) + : +- ReusedExchange (23) + +- ReusedExchange (26) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#5, i_category#6] +Arguments: [i_item_sk#5, i_category#6] + +(4) CometFilter +Input [2]: [i_item_sk#5, i_category#6] +Condition : isnotnull(i_item_sk#5) + +(5) CometBroadcastExchange +Input [2]: [i_item_sk#5, i_category#6] +Arguments: [i_item_sk#5, i_category#6] + +(6) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [2]: [i_item_sk#5, i_category#6] +Arguments: [ss_item_sk#1], [i_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_item_sk#5, i_category#6] +Arguments: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6], [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [d_date_sk#7, d_year#8, d_qoy#9] + +(9) CometFilter +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Condition : isnotnull(d_date_sk#7) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [d_date_sk#7, d_year#8, d_qoy#9] + +(11) CometBroadcastHashJoin +Left output [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6] +Right output [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [ss_sold_date_sk#4], [d_date_sk#7], Inner, BuildRight + +(12) CometProject +Input [7]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6, d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12], [store AS channel#10, ss_store_sk#2 AS col_name#11, d_year#8, d_qoy#9, i_category#6, ss_ext_sales_price#3 AS ext_sales_price#12] + +(13) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Arguments: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] + +(14) CometFilter +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Condition : (isnull(ws_ship_customer_sk#14) AND isnotnull(ws_item_sk#13)) + +(15) ReusedExchange [Reuses operator id: 5] +Output [2]: [i_item_sk#17, i_category#18] + +(16) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Right output [2]: [i_item_sk#17, i_category#18] +Arguments: [ws_item_sk#13], [i_item_sk#17], Inner, BuildRight + +(17) CometProject +Input [6]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_item_sk#17, i_category#18] +Arguments: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18], [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18] + +(18) ReusedExchange [Reuses operator id: 10] +Output [3]: [d_date_sk#19, d_year#20, d_qoy#21] + +(19) CometBroadcastHashJoin +Left output [4]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18] +Right output [3]: [d_date_sk#19, d_year#20, d_qoy#21] +Arguments: [ws_sold_date_sk#16], [d_date_sk#19], Inner, BuildRight + +(20) CometProject +Input [7]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18, d_date_sk#19, d_year#20, d_qoy#21] +Arguments: [channel#22, col_name#23, d_year#20, d_qoy#21, i_category#18, ext_sales_price#24], [web AS channel#22, ws_ship_customer_sk#14 AS col_name#23, d_year#20, d_qoy#21, i_category#18, ws_ext_sales_price#15 AS ext_sales_price#24] + +(21) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Arguments: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] + +(22) CometFilter +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Condition : (isnull(cs_ship_addr_sk#25) AND isnotnull(cs_item_sk#26)) + +(23) ReusedExchange [Reuses operator id: 5] +Output [2]: [i_item_sk#29, i_category#30] + +(24) CometBroadcastHashJoin +Left output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Right output [2]: [i_item_sk#29, i_category#30] +Arguments: [cs_item_sk#26], [i_item_sk#29], Inner, BuildRight + +(25) CometProject +Input [6]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28, i_item_sk#29, i_category#30] +Arguments: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30], [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30] + +(26) ReusedExchange [Reuses operator id: 10] +Output [3]: [d_date_sk#31, d_year#32, d_qoy#33] + +(27) CometBroadcastHashJoin +Left output [4]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30] +Right output [3]: [d_date_sk#31, d_year#32, d_qoy#33] +Arguments: [cs_sold_date_sk#28], [d_date_sk#31], Inner, BuildRight + +(28) CometProject +Input [7]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30, d_date_sk#31, d_year#32, d_qoy#33] +Arguments: [channel#34, col_name#35, d_year#32, d_qoy#33, i_category#30, ext_sales_price#36], [catalog AS channel#34, cs_ship_addr_sk#25 AS col_name#35, d_year#32, d_qoy#33, i_category#30, cs_ext_sales_price#27 AS ext_sales_price#36] + +(29) CometUnion +Child 0 Input [6]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12] +Child 1 Input [6]: [channel#22, col_name#23, d_year#20, d_qoy#21, i_category#18, ext_sales_price#24] +Child 2 Input [6]: [channel#34, col_name#35, d_year#32, d_qoy#33, i_category#30, ext_sales_price#36] + +(30) CometHashAggregate +Input [6]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#12))] + +(31) CometExchange +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#37, sum#38] +Arguments: hashpartitioning(channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#37, sum#38] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#12))] + +(33) CometTakeOrderedAndProject +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#39, sales_amt#40] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[channel#10 ASC NULLS FIRST,col_name#11 ASC NULLS FIRST,d_year#8 ASC NULLS FIRST,d_qoy#9 ASC NULLS FIRST,i_category#6 ASC NULLS FIRST], output=[channel#10,col_name#11,d_year#8,d_qoy#9,i_category#6,sales_cnt#39,sales_amt#40]), [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#39, sales_amt#40], 100, [channel#10 ASC NULLS FIRST, col_name#11 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#9 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#39, sales_amt#40] + +(34) ColumnarToRow [codegen id : 1] +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#39, sales_amt#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_datafusion/simplified.txt new file mode 100644 index 0000000000..0b641cea7d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_datafusion/simplified.txt @@ -0,0 +1,36 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_amt] + CometHashAggregate [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_amt,count,sum,count(1),sum(UnscaledValue(ext_sales_price))] + CometExchange [channel,col_name,d_year,d_qoy,i_category] #1 + CometHashAggregate [channel,col_name,d_year,d_qoy,i_category,count,sum,ext_sales_price] + CometUnion [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometProject [ss_store_sk,ss_ext_sales_price] [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_category,d_date_sk,d_year,d_qoy] + CometProject [ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_category] + CometFilter [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_category] #2 + CometFilter [i_item_sk,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_category] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #3 + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + CometProject [ws_ship_customer_sk,ws_ext_sales_price] [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometBroadcastHashJoin [ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_category,d_date_sk,d_year,d_qoy] + CometProject [ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_category] + CometBroadcastHashJoin [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_category] + CometFilter [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_category] #2 + ReusedExchange [d_date_sk,d_year,d_qoy] #3 + CometProject [cs_ship_addr_sk,cs_ext_sales_price] [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometBroadcastHashJoin [cs_ship_addr_sk,cs_ext_sales_price,cs_sold_date_sk,i_category,d_date_sk,d_year,d_qoy] + CometProject [cs_ship_addr_sk,cs_ext_sales_price,cs_sold_date_sk,i_category] + CometBroadcastHashJoin [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_category] + CometFilter [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [i_item_sk,i_category] #2 + ReusedExchange [d_date_sk,d_year,d_qoy] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..a2a17ccfce --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_iceberg_compat/explain.txt @@ -0,0 +1,195 @@ +== Physical Plan == +* ColumnarToRow (34) ++- CometTakeOrderedAndProject (33) + +- CometHashAggregate (32) + +- CometExchange (31) + +- CometHashAggregate (30) + +- CometUnion (29) + :- CometProject (12) + : +- CometBroadcastHashJoin (11) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.item (3) + : +- CometBroadcastExchange (10) + : +- CometFilter (9) + : +- CometScan parquet spark_catalog.default.date_dim (8) + :- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometFilter (14) + : : : +- CometScan parquet spark_catalog.default.web_sales (13) + : : +- ReusedExchange (15) + : +- ReusedExchange (18) + +- CometProject (28) + +- CometBroadcastHashJoin (27) + :- CometProject (25) + : +- CometBroadcastHashJoin (24) + : :- CometFilter (22) + : : +- CometScan parquet spark_catalog.default.catalog_sales (21) + : +- ReusedExchange (23) + +- ReusedExchange (26) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#5, i_category#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [i_item_sk#5, i_category#6] +Condition : isnotnull(i_item_sk#5) + +(5) CometBroadcastExchange +Input [2]: [i_item_sk#5, i_category#6] +Arguments: [i_item_sk#5, i_category#6] + +(6) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [2]: [i_item_sk#5, i_category#6] +Arguments: [ss_item_sk#1], [i_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_item_sk#5, i_category#6] +Arguments: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6], [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Condition : isnotnull(d_date_sk#7) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [d_date_sk#7, d_year#8, d_qoy#9] + +(11) CometBroadcastHashJoin +Left output [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6] +Right output [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [ss_sold_date_sk#4], [d_date_sk#7], Inner, BuildRight + +(12) CometProject +Input [7]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6, d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12], [store AS channel#10, ss_store_sk#2 AS col_name#11, d_year#8, d_qoy#9, i_category#6, ss_ext_sales_price#3 AS ext_sales_price#12] + +(13) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#16)] +PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(14) CometFilter +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Condition : (isnull(ws_ship_customer_sk#14) AND isnotnull(ws_item_sk#13)) + +(15) ReusedExchange [Reuses operator id: 5] +Output [2]: [i_item_sk#17, i_category#18] + +(16) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Right output [2]: [i_item_sk#17, i_category#18] +Arguments: [ws_item_sk#13], [i_item_sk#17], Inner, BuildRight + +(17) CometProject +Input [6]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_item_sk#17, i_category#18] +Arguments: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18], [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18] + +(18) ReusedExchange [Reuses operator id: 10] +Output [3]: [d_date_sk#19, d_year#20, d_qoy#21] + +(19) CometBroadcastHashJoin +Left output [4]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18] +Right output [3]: [d_date_sk#19, d_year#20, d_qoy#21] +Arguments: [ws_sold_date_sk#16], [d_date_sk#19], Inner, BuildRight + +(20) CometProject +Input [7]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18, d_date_sk#19, d_year#20, d_qoy#21] +Arguments: [channel#22, col_name#23, d_year#20, d_qoy#21, i_category#18, ext_sales_price#24], [web AS channel#22, ws_ship_customer_sk#14 AS col_name#23, d_year#20, d_qoy#21, i_category#18, ws_ext_sales_price#15 AS ext_sales_price#24] + +(21) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#28)] +PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(22) CometFilter +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Condition : (isnull(cs_ship_addr_sk#25) AND isnotnull(cs_item_sk#26)) + +(23) ReusedExchange [Reuses operator id: 5] +Output [2]: [i_item_sk#29, i_category#30] + +(24) CometBroadcastHashJoin +Left output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Right output [2]: [i_item_sk#29, i_category#30] +Arguments: [cs_item_sk#26], [i_item_sk#29], Inner, BuildRight + +(25) CometProject +Input [6]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28, i_item_sk#29, i_category#30] +Arguments: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30], [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30] + +(26) ReusedExchange [Reuses operator id: 10] +Output [3]: [d_date_sk#31, d_year#32, d_qoy#33] + +(27) CometBroadcastHashJoin +Left output [4]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30] +Right output [3]: [d_date_sk#31, d_year#32, d_qoy#33] +Arguments: [cs_sold_date_sk#28], [d_date_sk#31], Inner, BuildRight + +(28) CometProject +Input [7]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30, d_date_sk#31, d_year#32, d_qoy#33] +Arguments: [channel#34, col_name#35, d_year#32, d_qoy#33, i_category#30, ext_sales_price#36], [catalog AS channel#34, cs_ship_addr_sk#25 AS col_name#35, d_year#32, d_qoy#33, i_category#30, cs_ext_sales_price#27 AS ext_sales_price#36] + +(29) CometUnion +Child 0 Input [6]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12] +Child 1 Input [6]: [channel#22, col_name#23, d_year#20, d_qoy#21, i_category#18, ext_sales_price#24] +Child 2 Input [6]: [channel#34, col_name#35, d_year#32, d_qoy#33, i_category#30, ext_sales_price#36] + +(30) CometHashAggregate +Input [6]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#12))] + +(31) CometExchange +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#37, sum#38] +Arguments: hashpartitioning(channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#37, sum#38] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#12))] + +(33) CometTakeOrderedAndProject +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#39, sales_amt#40] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[channel#10 ASC NULLS FIRST,col_name#11 ASC NULLS FIRST,d_year#8 ASC NULLS FIRST,d_qoy#9 ASC NULLS FIRST,i_category#6 ASC NULLS FIRST], output=[channel#10,col_name#11,d_year#8,d_qoy#9,i_category#6,sales_cnt#39,sales_amt#40]), [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#39, sales_amt#40], 100, [channel#10 ASC NULLS FIRST, col_name#11 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#9 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#39, sales_amt#40] + +(34) ColumnarToRow [codegen id : 1] +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#39, sales_amt#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..eb3e14d5ec --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q76.native_iceberg_compat/simplified.txt @@ -0,0 +1,36 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_amt] + CometHashAggregate [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_amt,count,sum,count(1),sum(UnscaledValue(ext_sales_price))] + CometExchange [channel,col_name,d_year,d_qoy,i_category] #1 + CometHashAggregate [channel,col_name,d_year,d_qoy,i_category,count,sum,ext_sales_price] + CometUnion [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometProject [ss_store_sk,ss_ext_sales_price] [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_category,d_date_sk,d_year,d_qoy] + CometProject [ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_category] + CometFilter [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_category] #2 + CometFilter [i_item_sk,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_category] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #3 + CometFilter [d_date_sk,d_year,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + CometProject [ws_ship_customer_sk,ws_ext_sales_price] [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometBroadcastHashJoin [ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_category,d_date_sk,d_year,d_qoy] + CometProject [ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_category] + CometBroadcastHashJoin [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_category] + CometFilter [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_category] #2 + ReusedExchange [d_date_sk,d_year,d_qoy] #3 + CometProject [cs_ship_addr_sk,cs_ext_sales_price] [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometBroadcastHashJoin [cs_ship_addr_sk,cs_ext_sales_price,cs_sold_date_sk,i_category,d_date_sk,d_year,d_qoy] + CometProject [cs_ship_addr_sk,cs_ext_sales_price,cs_sold_date_sk,i_category] + CometBroadcastHashJoin [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_category] + CometFilter [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [i_item_sk,i_category] #2 + ReusedExchange [d_date_sk,d_year,d_qoy] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_datafusion/explain.txt new file mode 100644 index 0000000000..2c6d00edda --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_datafusion/explain.txt @@ -0,0 +1,277 @@ +== Physical Plan == +TakeOrderedAndProject (53) ++- * HashAggregate (52) + +- Exchange (51) + +- * HashAggregate (50) + +- * Expand (49) + +- Union (48) + :- * ColumnarToRow (22) + : +- CometProject (21) + : +- CometBroadcastHashJoin (20) + : :- CometHashAggregate (16) + : : +- CometExchange (15) + : : +- CometHashAggregate (14) + : : +- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : +- CometBroadcastExchange (19) + : +- CometHashAggregate (18) + : +- ReusedExchange (17) + :- * Project (41) + : +- * BroadcastNestedLoopJoin Inner BuildLeft (40) + : :- BroadcastExchange (31) + : : +- * ColumnarToRow (30) + : : +- CometHashAggregate (29) + : : +- CometExchange (28) + : : +- CometHashAggregate (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (23) + : : +- ReusedExchange (24) + : +- * ColumnarToRow (39) + : +- CometHashAggregate (38) + : +- CometExchange (37) + : +- CometHashAggregate (36) + : +- CometProject (35) + : +- CometBroadcastHashJoin (34) + : :- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (32) + : +- ReusedExchange (33) + +- * ColumnarToRow (47) + +- CometProject (46) + +- CometBroadcastHashJoin (45) + :- CometHashAggregate (43) + : +- ReusedExchange (42) + +- ReusedExchange (44) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_date#6] +Arguments: [d_date_sk#5, d_date#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 2000-08-03)) AND (d_date#6 <= 2000-09-02)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_date#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3], [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(10) CometFilter +Input [1]: [s_store_sk#7] +Condition : isnotnull(s_store_sk#7) + +(11) CometBroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(12) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] +Right output [1]: [s_store_sk#7] +Arguments: [ss_store_sk#1], [s_store_sk#7], Inner, BuildRight + +(13) CometProject +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Arguments: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7], [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] + +(14) CometHashAggregate +Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Keys [1]: [s_store_sk#7] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] + +(15) CometExchange +Input [3]: [s_store_sk#7, sum#8, sum#9] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [s_store_sk#7, sum#8, sum#9] +Keys [1]: [s_store_sk#7] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] + +(17) ReusedExchange [Reuses operator id: 15] +Output [3]: [s_store_sk#10, sum#11, sum#12] + +(18) CometHashAggregate +Input [3]: [s_store_sk#10, sum#11, sum#12] +Keys [1]: [s_store_sk#10] +Functions [2]: [sum(UnscaledValue(sr_return_amt#13)), sum(UnscaledValue(sr_net_loss#14))] + +(19) CometBroadcastExchange +Input [3]: [s_store_sk#10, returns#15, profit_loss#16] +Arguments: [s_store_sk#10, returns#15, profit_loss#16] + +(20) CometBroadcastHashJoin +Left output [3]: [s_store_sk#7, sales#17, profit#18] +Right output [3]: [s_store_sk#10, returns#15, profit_loss#16] +Arguments: [s_store_sk#7], [s_store_sk#10], LeftOuter, BuildRight + +(21) CometProject +Input [6]: [s_store_sk#7, sales#17, profit#18, s_store_sk#10, returns#15, profit_loss#16] +Arguments: [sales#17, returns#19, profit#20, channel#21, id#22], [sales#17, coalesce(returns#15, 0.00) AS returns#19, (profit#18 - coalesce(profit_loss#16, 0.00)) AS profit#20, store channel AS channel#21, s_store_sk#7 AS id#22] + +(22) ColumnarToRow [codegen id : 1] +Input [5]: [sales#17, returns#19, profit#20, channel#21, id#22] + +(23) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25, cs_sold_date_sk#26] +Arguments: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25, cs_sold_date_sk#26] + +(24) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#27] + +(25) CometBroadcastHashJoin +Left output [4]: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25, cs_sold_date_sk#26] +Right output [1]: [d_date_sk#27] +Arguments: [cs_sold_date_sk#26], [d_date_sk#27], Inner, BuildRight + +(26) CometProject +Input [5]: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25, cs_sold_date_sk#26, d_date_sk#27] +Arguments: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25], [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25] + +(27) CometHashAggregate +Input [3]: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25] +Keys [1]: [cs_call_center_sk#23] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#24)), partial_sum(UnscaledValue(cs_net_profit#25))] + +(28) CometExchange +Input [3]: [cs_call_center_sk#23, sum#28, sum#29] +Arguments: hashpartitioning(cs_call_center_sk#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [3]: [cs_call_center_sk#23, sum#28, sum#29] +Keys [1]: [cs_call_center_sk#23] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#24)), sum(UnscaledValue(cs_net_profit#25))] + +(30) ColumnarToRow [codegen id : 2] +Input [3]: [cs_call_center_sk#23, sales#30, profit#31] + +(31) BroadcastExchange +Input [3]: [cs_call_center_sk#23, sales#30, profit#31] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(32) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [3]: [cr_return_amount#32, cr_net_loss#33, cr_returned_date_sk#34] +Arguments: [cr_return_amount#32, cr_net_loss#33, cr_returned_date_sk#34] + +(33) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#35] + +(34) CometBroadcastHashJoin +Left output [3]: [cr_return_amount#32, cr_net_loss#33, cr_returned_date_sk#34] +Right output [1]: [d_date_sk#35] +Arguments: [cr_returned_date_sk#34], [d_date_sk#35], Inner, BuildRight + +(35) CometProject +Input [4]: [cr_return_amount#32, cr_net_loss#33, cr_returned_date_sk#34, d_date_sk#35] +Arguments: [cr_return_amount#32, cr_net_loss#33], [cr_return_amount#32, cr_net_loss#33] + +(36) CometHashAggregate +Input [2]: [cr_return_amount#32, cr_net_loss#33] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#32)), partial_sum(UnscaledValue(cr_net_loss#33))] + +(37) CometExchange +Input [2]: [sum#36, sum#37] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(38) CometHashAggregate +Input [2]: [sum#36, sum#37] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#32)), sum(UnscaledValue(cr_net_loss#33))] + +(39) ColumnarToRow +Input [2]: [returns#38, profit_loss#39] + +(40) BroadcastNestedLoopJoin [codegen id : 3] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 3] +Output [5]: [sales#30, returns#38, (profit#31 - profit_loss#39) AS profit#40, catalog channel AS channel#41, cs_call_center_sk#23 AS id#42] +Input [5]: [cs_call_center_sk#23, sales#30, profit#31, returns#38, profit_loss#39] + +(42) ReusedExchange [Reuses operator id: 15] +Output [3]: [wp_web_page_sk#43, sum#44, sum#45] + +(43) CometHashAggregate +Input [3]: [wp_web_page_sk#43, sum#44, sum#45] +Keys [1]: [wp_web_page_sk#43] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#46)), sum(UnscaledValue(ws_net_profit#47))] + +(44) ReusedExchange [Reuses operator id: 19] +Output [3]: [wp_web_page_sk#48, returns#49, profit_loss#50] + +(45) CometBroadcastHashJoin +Left output [3]: [wp_web_page_sk#43, sales#51, profit#52] +Right output [3]: [wp_web_page_sk#48, returns#49, profit_loss#50] +Arguments: [wp_web_page_sk#43], [wp_web_page_sk#48], LeftOuter, BuildRight + +(46) CometProject +Input [6]: [wp_web_page_sk#43, sales#51, profit#52, wp_web_page_sk#48, returns#49, profit_loss#50] +Arguments: [sales#51, returns#53, profit#54, channel#55, id#56], [sales#51, coalesce(returns#49, 0.00) AS returns#53, (profit#52 - coalesce(profit_loss#50, 0.00)) AS profit#54, web channel AS channel#55, wp_web_page_sk#43 AS id#56] + +(47) ColumnarToRow [codegen id : 4] +Input [5]: [sales#51, returns#53, profit#54, channel#55, id#56] + +(48) Union + +(49) Expand [codegen id : 5] +Input [5]: [sales#17, returns#19, profit#20, channel#21, id#22] +Arguments: [[sales#17, returns#19, profit#20, channel#21, id#22, 0], [sales#17, returns#19, profit#20, channel#21, null, 1], [sales#17, returns#19, profit#20, null, null, 3]], [sales#17, returns#19, profit#20, channel#57, id#58, spark_grouping_id#59] + +(50) HashAggregate [codegen id : 5] +Input [6]: [sales#17, returns#19, profit#20, channel#57, id#58, spark_grouping_id#59] +Keys [3]: [channel#57, id#58, spark_grouping_id#59] +Functions [3]: [partial_sum(sales#17), partial_sum(returns#19), partial_sum(profit#20)] +Aggregate Attributes [6]: [sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Results [9]: [channel#57, id#58, spark_grouping_id#59, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71] + +(51) Exchange +Input [9]: [channel#57, id#58, spark_grouping_id#59, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71] +Arguments: hashpartitioning(channel#57, id#58, spark_grouping_id#59, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(52) HashAggregate [codegen id : 6] +Input [9]: [channel#57, id#58, spark_grouping_id#59, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71] +Keys [3]: [channel#57, id#58, spark_grouping_id#59] +Functions [3]: [sum(sales#17), sum(returns#19), sum(profit#20)] +Aggregate Attributes [3]: [sum(sales#17)#72, sum(returns#19)#73, sum(profit#20)#74] +Results [5]: [channel#57, id#58, sum(sales#17)#72 AS sales#75, sum(returns#19)#73 AS returns#76, sum(profit#20)#74 AS profit#77] + +(53) TakeOrderedAndProject +Input [5]: [channel#57, id#58, sales#75, returns#76, profit#77] +Arguments: 100, [channel#57 ASC NULLS FIRST, id#58 ASC NULLS FIRST], [channel#57, id#58, sales#75, returns#76, profit#77] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c41db2cfab --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_datafusion/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (6) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (5) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [returns,profit,profit_loss,s_store_sk] [sales,returns,profit,channel,id] + CometBroadcastHashJoin [s_store_sk,sales,profit,s_store_sk,returns,profit_loss] + CometHashAggregate [s_store_sk,sales,profit,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + CometExchange [s_store_sk] #2 + CometHashAggregate [s_store_sk,sum,sum,ss_ext_sales_price,ss_net_profit] + CometProject [ss_ext_sales_price,ss_net_profit,s_store_sk] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,s_store_sk] + CometProject [ss_store_sk,ss_ext_sales_price,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk] #4 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] + CometBroadcastExchange [s_store_sk,returns,profit_loss] #5 + CometHashAggregate [s_store_sk,returns,profit_loss,sum,sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss))] + ReusedExchange [s_store_sk,sum,sum] #2 + WholeStageCodegen (3) + Project [sales,returns,profit,profit_loss,cs_call_center_sk] + BroadcastNestedLoopJoin + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometHashAggregate [cs_call_center_sk,sales,profit,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] + CometExchange [cs_call_center_sk] #7 + CometHashAggregate [cs_call_center_sk,sum,sum,cs_ext_sales_price,cs_net_profit] + CometProject [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + CometBroadcastHashJoin [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ColumnarToRow + InputAdapter + CometHashAggregate [returns,profit_loss,sum,sum,sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss))] + CometExchange #8 + CometHashAggregate [sum,sum,cr_return_amount,cr_net_loss] + CometProject [cr_return_amount,cr_net_loss] + CometBroadcastHashJoin [cr_return_amount,cr_net_loss,cr_returned_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_return_amount,cr_net_loss,cr_returned_date_sk] + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometProject [returns,profit,profit_loss,wp_web_page_sk] [sales,returns,profit,channel,id] + CometBroadcastHashJoin [wp_web_page_sk,sales,profit,wp_web_page_sk,returns,profit_loss] + CometHashAggregate [wp_web_page_sk,sales,profit,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] + ReusedExchange [wp_web_page_sk,sum,sum] #2 + ReusedExchange [wp_web_page_sk,returns,profit_loss] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..938cdc8ee0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_iceberg_compat/explain.txt @@ -0,0 +1,471 @@ +== Physical Plan == +TakeOrderedAndProject (84) ++- * HashAggregate (83) + +- Exchange (82) + +- * HashAggregate (81) + +- * Expand (80) + +- Union (79) + :- * ColumnarToRow (31) + : +- CometProject (30) + : +- CometBroadcastHashJoin (29) + : :- CometHashAggregate (16) + : : +- CometExchange (15) + : : +- CometHashAggregate (14) + : : +- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.store (9) + : +- CometBroadcastExchange (28) + : +- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (21) + : : +- CometBroadcastHashJoin (20) + : : :- CometFilter (18) + : : : +- CometScan parquet spark_catalog.default.store_returns (17) + : : +- ReusedExchange (19) + : +- ReusedExchange (22) + :- * Project (50) + : +- * BroadcastNestedLoopJoin Inner BuildLeft (49) + : :- BroadcastExchange (40) + : : +- * ColumnarToRow (39) + : : +- CometHashAggregate (38) + : : +- CometExchange (37) + : : +- CometHashAggregate (36) + : : +- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometScan parquet spark_catalog.default.catalog_sales (32) + : : +- ReusedExchange (33) + : +- * ColumnarToRow (48) + : +- CometHashAggregate (47) + : +- CometExchange (46) + : +- CometHashAggregate (45) + : +- CometProject (44) + : +- CometBroadcastHashJoin (43) + : :- CometScan parquet spark_catalog.default.catalog_returns (41) + : +- ReusedExchange (42) + +- * ColumnarToRow (78) + +- CometProject (77) + +- CometBroadcastHashJoin (76) + :- CometHashAggregate (63) + : +- CometExchange (62) + : +- CometHashAggregate (61) + : +- CometProject (60) + : +- CometBroadcastHashJoin (59) + : :- CometProject (55) + : : +- CometBroadcastHashJoin (54) + : : :- CometFilter (52) + : : : +- CometScan parquet spark_catalog.default.web_sales (51) + : : +- ReusedExchange (53) + : +- CometBroadcastExchange (58) + : +- CometFilter (57) + : +- CometScan parquet spark_catalog.default.web_page (56) + +- CometBroadcastExchange (75) + +- CometHashAggregate (74) + +- CometExchange (73) + +- CometHashAggregate (72) + +- CometProject (71) + +- CometBroadcastHashJoin (70) + :- CometProject (68) + : +- CometBroadcastHashJoin (67) + : :- CometFilter (65) + : : +- CometScan parquet spark_catalog.default.web_returns (64) + : +- ReusedExchange (66) + +- ReusedExchange (69) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 2000-08-03)) AND (d_date#6 <= 2000-09-02)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_date#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3], [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] + +(9) CometScan parquet spark_catalog.default.store +Output [1]: [s_store_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [1]: [s_store_sk#7] +Condition : isnotnull(s_store_sk#7) + +(11) CometBroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(12) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] +Right output [1]: [s_store_sk#7] +Arguments: [ss_store_sk#1], [s_store_sk#7], Inner, BuildRight + +(13) CometProject +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Arguments: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7], [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] + +(14) CometHashAggregate +Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Keys [1]: [s_store_sk#7] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] + +(15) CometExchange +Input [3]: [s_store_sk#7, sum#8, sum#9] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [s_store_sk#7, sum#8, sum#9] +Keys [1]: [s_store_sk#7] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] + +(17) CometScan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#13)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(18) CometFilter +Input [4]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] +Condition : isnotnull(sr_store_sk#10) + +(19) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#14] + +(20) CometBroadcastHashJoin +Left output [4]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] +Right output [1]: [d_date_sk#14] +Arguments: [sr_returned_date_sk#13], [d_date_sk#14], Inner, BuildRight + +(21) CometProject +Input [5]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13, d_date_sk#14] +Arguments: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12], [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12] + +(22) ReusedExchange [Reuses operator id: 11] +Output [1]: [s_store_sk#15] + +(23) CometBroadcastHashJoin +Left output [3]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12] +Right output [1]: [s_store_sk#15] +Arguments: [sr_store_sk#10], [s_store_sk#15], Inner, BuildRight + +(24) CometProject +Input [4]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12, s_store_sk#15] +Arguments: [sr_return_amt#11, sr_net_loss#12, s_store_sk#15], [sr_return_amt#11, sr_net_loss#12, s_store_sk#15] + +(25) CometHashAggregate +Input [3]: [sr_return_amt#11, sr_net_loss#12, s_store_sk#15] +Keys [1]: [s_store_sk#15] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#11)), partial_sum(UnscaledValue(sr_net_loss#12))] + +(26) CometExchange +Input [3]: [s_store_sk#15, sum#16, sum#17] +Arguments: hashpartitioning(s_store_sk#15, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(27) CometHashAggregate +Input [3]: [s_store_sk#15, sum#16, sum#17] +Keys [1]: [s_store_sk#15] +Functions [2]: [sum(UnscaledValue(sr_return_amt#11)), sum(UnscaledValue(sr_net_loss#12))] + +(28) CometBroadcastExchange +Input [3]: [s_store_sk#15, returns#18, profit_loss#19] +Arguments: [s_store_sk#15, returns#18, profit_loss#19] + +(29) CometBroadcastHashJoin +Left output [3]: [s_store_sk#7, sales#20, profit#21] +Right output [3]: [s_store_sk#15, returns#18, profit_loss#19] +Arguments: [s_store_sk#7], [s_store_sk#15], LeftOuter, BuildRight + +(30) CometProject +Input [6]: [s_store_sk#7, sales#20, profit#21, s_store_sk#15, returns#18, profit_loss#19] +Arguments: [sales#20, returns#22, profit#23, channel#24, id#25], [sales#20, coalesce(returns#18, 0.00) AS returns#22, (profit#21 - coalesce(profit_loss#19, 0.00)) AS profit#23, store channel AS channel#24, s_store_sk#7 AS id#25] + +(31) ColumnarToRow [codegen id : 1] +Input [5]: [sales#20, returns#22, profit#23, channel#24, id#25] + +(32) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28, cs_sold_date_sk#29] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#29)] +ReadSchema: struct + +(33) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#30] + +(34) CometBroadcastHashJoin +Left output [4]: [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28, cs_sold_date_sk#29] +Right output [1]: [d_date_sk#30] +Arguments: [cs_sold_date_sk#29], [d_date_sk#30], Inner, BuildRight + +(35) CometProject +Input [5]: [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28, cs_sold_date_sk#29, d_date_sk#30] +Arguments: [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28], [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28] + +(36) CometHashAggregate +Input [3]: [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28] +Keys [1]: [cs_call_center_sk#26] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#27)), partial_sum(UnscaledValue(cs_net_profit#28))] + +(37) CometExchange +Input [3]: [cs_call_center_sk#26, sum#31, sum#32] +Arguments: hashpartitioning(cs_call_center_sk#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(38) CometHashAggregate +Input [3]: [cs_call_center_sk#26, sum#31, sum#32] +Keys [1]: [cs_call_center_sk#26] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#27)), sum(UnscaledValue(cs_net_profit#28))] + +(39) ColumnarToRow [codegen id : 2] +Input [3]: [cs_call_center_sk#26, sales#33, profit#34] + +(40) BroadcastExchange +Input [3]: [cs_call_center_sk#26, sales#33, profit#34] +Arguments: IdentityBroadcastMode, [plan_id=4] + +(41) CometScan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_return_amount#35, cr_net_loss#36, cr_returned_date_sk#37] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#37)] +ReadSchema: struct + +(42) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#38] + +(43) CometBroadcastHashJoin +Left output [3]: [cr_return_amount#35, cr_net_loss#36, cr_returned_date_sk#37] +Right output [1]: [d_date_sk#38] +Arguments: [cr_returned_date_sk#37], [d_date_sk#38], Inner, BuildRight + +(44) CometProject +Input [4]: [cr_return_amount#35, cr_net_loss#36, cr_returned_date_sk#37, d_date_sk#38] +Arguments: [cr_return_amount#35, cr_net_loss#36], [cr_return_amount#35, cr_net_loss#36] + +(45) CometHashAggregate +Input [2]: [cr_return_amount#35, cr_net_loss#36] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#35)), partial_sum(UnscaledValue(cr_net_loss#36))] + +(46) CometExchange +Input [2]: [sum#39, sum#40] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(47) CometHashAggregate +Input [2]: [sum#39, sum#40] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#35)), sum(UnscaledValue(cr_net_loss#36))] + +(48) ColumnarToRow +Input [2]: [returns#41, profit_loss#42] + +(49) BroadcastNestedLoopJoin [codegen id : 3] +Join type: Inner +Join condition: None + +(50) Project [codegen id : 3] +Output [5]: [sales#33, returns#41, (profit#34 - profit_loss#42) AS profit#43, catalog channel AS channel#44, cs_call_center_sk#26 AS id#45] +Input [5]: [cs_call_center_sk#26, sales#33, profit#34, returns#41, profit_loss#42] + +(51) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48, ws_sold_date_sk#49] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#49)] +PushedFilters: [IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(52) CometFilter +Input [4]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48, ws_sold_date_sk#49] +Condition : isnotnull(ws_web_page_sk#46) + +(53) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#50] + +(54) CometBroadcastHashJoin +Left output [4]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48, ws_sold_date_sk#49] +Right output [1]: [d_date_sk#50] +Arguments: [ws_sold_date_sk#49], [d_date_sk#50], Inner, BuildRight + +(55) CometProject +Input [5]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48, ws_sold_date_sk#49, d_date_sk#50] +Arguments: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48], [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48] + +(56) CometScan parquet spark_catalog.default.web_page +Output [1]: [wp_web_page_sk#51] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(57) CometFilter +Input [1]: [wp_web_page_sk#51] +Condition : isnotnull(wp_web_page_sk#51) + +(58) CometBroadcastExchange +Input [1]: [wp_web_page_sk#51] +Arguments: [wp_web_page_sk#51] + +(59) CometBroadcastHashJoin +Left output [3]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48] +Right output [1]: [wp_web_page_sk#51] +Arguments: [ws_web_page_sk#46], [wp_web_page_sk#51], Inner, BuildRight + +(60) CometProject +Input [4]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48, wp_web_page_sk#51] +Arguments: [ws_ext_sales_price#47, ws_net_profit#48, wp_web_page_sk#51], [ws_ext_sales_price#47, ws_net_profit#48, wp_web_page_sk#51] + +(61) CometHashAggregate +Input [3]: [ws_ext_sales_price#47, ws_net_profit#48, wp_web_page_sk#51] +Keys [1]: [wp_web_page_sk#51] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#47)), partial_sum(UnscaledValue(ws_net_profit#48))] + +(62) CometExchange +Input [3]: [wp_web_page_sk#51, sum#52, sum#53] +Arguments: hashpartitioning(wp_web_page_sk#51, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(63) CometHashAggregate +Input [3]: [wp_web_page_sk#51, sum#52, sum#53] +Keys [1]: [wp_web_page_sk#51] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#47)), sum(UnscaledValue(ws_net_profit#48))] + +(64) CometScan parquet spark_catalog.default.web_returns +Output [4]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56, wr_returned_date_sk#57] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#57)] +PushedFilters: [IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(65) CometFilter +Input [4]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56, wr_returned_date_sk#57] +Condition : isnotnull(wr_web_page_sk#54) + +(66) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#58] + +(67) CometBroadcastHashJoin +Left output [4]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56, wr_returned_date_sk#57] +Right output [1]: [d_date_sk#58] +Arguments: [wr_returned_date_sk#57], [d_date_sk#58], Inner, BuildRight + +(68) CometProject +Input [5]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56, wr_returned_date_sk#57, d_date_sk#58] +Arguments: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56], [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56] + +(69) ReusedExchange [Reuses operator id: 58] +Output [1]: [wp_web_page_sk#59] + +(70) CometBroadcastHashJoin +Left output [3]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56] +Right output [1]: [wp_web_page_sk#59] +Arguments: [wr_web_page_sk#54], [wp_web_page_sk#59], Inner, BuildRight + +(71) CometProject +Input [4]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#59] +Arguments: [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#59], [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#59] + +(72) CometHashAggregate +Input [3]: [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#59] +Keys [1]: [wp_web_page_sk#59] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#55)), partial_sum(UnscaledValue(wr_net_loss#56))] + +(73) CometExchange +Input [3]: [wp_web_page_sk#59, sum#60, sum#61] +Arguments: hashpartitioning(wp_web_page_sk#59, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(74) CometHashAggregate +Input [3]: [wp_web_page_sk#59, sum#60, sum#61] +Keys [1]: [wp_web_page_sk#59] +Functions [2]: [sum(UnscaledValue(wr_return_amt#55)), sum(UnscaledValue(wr_net_loss#56))] + +(75) CometBroadcastExchange +Input [3]: [wp_web_page_sk#59, returns#62, profit_loss#63] +Arguments: [wp_web_page_sk#59, returns#62, profit_loss#63] + +(76) CometBroadcastHashJoin +Left output [3]: [wp_web_page_sk#51, sales#64, profit#65] +Right output [3]: [wp_web_page_sk#59, returns#62, profit_loss#63] +Arguments: [wp_web_page_sk#51], [wp_web_page_sk#59], LeftOuter, BuildRight + +(77) CometProject +Input [6]: [wp_web_page_sk#51, sales#64, profit#65, wp_web_page_sk#59, returns#62, profit_loss#63] +Arguments: [sales#64, returns#66, profit#67, channel#68, id#69], [sales#64, coalesce(returns#62, 0.00) AS returns#66, (profit#65 - coalesce(profit_loss#63, 0.00)) AS profit#67, web channel AS channel#68, wp_web_page_sk#51 AS id#69] + +(78) ColumnarToRow [codegen id : 4] +Input [5]: [sales#64, returns#66, profit#67, channel#68, id#69] + +(79) Union + +(80) Expand [codegen id : 5] +Input [5]: [sales#20, returns#22, profit#23, channel#24, id#25] +Arguments: [[sales#20, returns#22, profit#23, channel#24, id#25, 0], [sales#20, returns#22, profit#23, channel#24, null, 1], [sales#20, returns#22, profit#23, null, null, 3]], [sales#20, returns#22, profit#23, channel#70, id#71, spark_grouping_id#72] + +(81) HashAggregate [codegen id : 5] +Input [6]: [sales#20, returns#22, profit#23, channel#70, id#71, spark_grouping_id#72] +Keys [3]: [channel#70, id#71, spark_grouping_id#72] +Functions [3]: [partial_sum(sales#20), partial_sum(returns#22), partial_sum(profit#23)] +Aggregate Attributes [6]: [sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78] +Results [9]: [channel#70, id#71, spark_grouping_id#72, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84] + +(82) Exchange +Input [9]: [channel#70, id#71, spark_grouping_id#72, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84] +Arguments: hashpartitioning(channel#70, id#71, spark_grouping_id#72, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(83) HashAggregate [codegen id : 6] +Input [9]: [channel#70, id#71, spark_grouping_id#72, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84] +Keys [3]: [channel#70, id#71, spark_grouping_id#72] +Functions [3]: [sum(sales#20), sum(returns#22), sum(profit#23)] +Aggregate Attributes [3]: [sum(sales#20)#85, sum(returns#22)#86, sum(profit#23)#87] +Results [5]: [channel#70, id#71, sum(sales#20)#85 AS sales#88, sum(returns#22)#86 AS returns#89, sum(profit#23)#87 AS profit#90] + +(84) TakeOrderedAndProject +Input [5]: [channel#70, id#71, sales#88, returns#89, profit#90] +Arguments: 100, [channel#70 ASC NULLS FIRST, id#71 ASC NULLS FIRST], [channel#70, id#71, sales#88, returns#89, profit#90] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..2a2293eef4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q77.native_iceberg_compat/simplified.txt @@ -0,0 +1,97 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (6) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (5) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [returns,profit,profit_loss,s_store_sk] [sales,returns,profit,channel,id] + CometBroadcastHashJoin [s_store_sk,sales,profit,s_store_sk,returns,profit_loss] + CometHashAggregate [s_store_sk,sales,profit,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + CometExchange [s_store_sk] #2 + CometHashAggregate [s_store_sk,sum,sum,ss_ext_sales_price,ss_net_profit] + CometProject [ss_ext_sales_price,ss_net_profit,s_store_sk] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,s_store_sk] + CometProject [ss_store_sk,ss_ext_sales_price,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk] #4 + CometFilter [s_store_sk] + CometScan parquet spark_catalog.default.store [s_store_sk] + CometBroadcastExchange [s_store_sk,returns,profit_loss] #5 + CometHashAggregate [s_store_sk,returns,profit_loss,sum,sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss))] + CometExchange [s_store_sk] #6 + CometHashAggregate [s_store_sk,sum,sum,sr_return_amt,sr_net_loss] + CometProject [sr_return_amt,sr_net_loss,s_store_sk] + CometBroadcastHashJoin [sr_store_sk,sr_return_amt,sr_net_loss,s_store_sk] + CometProject [sr_store_sk,sr_return_amt,sr_net_loss] + CometBroadcastHashJoin [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk,d_date_sk] + CometFilter [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [s_store_sk] #4 + WholeStageCodegen (3) + Project [sales,returns,profit,profit_loss,cs_call_center_sk] + BroadcastNestedLoopJoin + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometHashAggregate [cs_call_center_sk,sales,profit,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] + CometExchange [cs_call_center_sk] #8 + CometHashAggregate [cs_call_center_sk,sum,sum,cs_ext_sales_price,cs_net_profit] + CometProject [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + CometBroadcastHashJoin [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ColumnarToRow + InputAdapter + CometHashAggregate [returns,profit_loss,sum,sum,sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss))] + CometExchange #9 + CometHashAggregate [sum,sum,cr_return_amount,cr_net_loss] + CometProject [cr_return_amount,cr_net_loss] + CometBroadcastHashJoin [cr_return_amount,cr_net_loss,cr_returned_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk] + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometProject [returns,profit,profit_loss,wp_web_page_sk] [sales,returns,profit,channel,id] + CometBroadcastHashJoin [wp_web_page_sk,sales,profit,wp_web_page_sk,returns,profit_loss] + CometHashAggregate [wp_web_page_sk,sales,profit,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] + CometExchange [wp_web_page_sk] #10 + CometHashAggregate [wp_web_page_sk,sum,sum,ws_ext_sales_price,ws_net_profit] + CometProject [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + CometBroadcastHashJoin [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + CometProject [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + CometBroadcastHashJoin [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,d_date_sk] + CometFilter [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [wp_web_page_sk] #11 + CometFilter [wp_web_page_sk] + CometScan parquet spark_catalog.default.web_page [wp_web_page_sk] + CometBroadcastExchange [wp_web_page_sk,returns,profit_loss] #12 + CometHashAggregate [wp_web_page_sk,returns,profit_loss,sum,sum,sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss))] + CometExchange [wp_web_page_sk] #13 + CometHashAggregate [wp_web_page_sk,sum,sum,wr_return_amt,wr_net_loss] + CometProject [wr_return_amt,wr_net_loss,wp_web_page_sk] + CometBroadcastHashJoin [wr_web_page_sk,wr_return_amt,wr_net_loss,wp_web_page_sk] + CometProject [wr_web_page_sk,wr_return_amt,wr_net_loss] + CometBroadcastHashJoin [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk,d_date_sk] + CometFilter [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [wp_web_page_sk] #11 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_datafusion/explain.txt new file mode 100644 index 0000000000..ae6446061d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_datafusion/explain.txt @@ -0,0 +1,250 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Project (47) + +- * ColumnarToRow (46) + +- CometSortMergeJoin (45) + :- CometProject (27) + : +- CometSortMergeJoin (26) + : :- CometSort (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometFilter (11) + : : : +- CometSortMergeJoin (10) + : : : :- CometSort (4) + : : : : +- CometExchange (3) + : : : : +- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometSort (9) + : : : +- CometExchange (8) + : : : +- CometProject (7) + : : : +- CometFilter (6) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (5) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : +- CometSort (25) + : +- CometFilter (24) + : +- CometHashAggregate (23) + : +- ReusedExchange (22) + +- CometSort (44) + +- CometFilter (43) + +- CometHashAggregate (42) + +- CometExchange (41) + +- CometHashAggregate (40) + +- CometProject (39) + +- CometBroadcastHashJoin (38) + :- CometProject (36) + : +- CometFilter (35) + : +- CometSortMergeJoin (34) + : :- CometSort (31) + : : +- CometExchange (30) + : : +- CometFilter (29) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (28) + : +- CometSort (33) + : +- ReusedExchange (32) + +- ReusedExchange (37) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) + +(3) CometExchange +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7], [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(5) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(6) CometFilter +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(7) CometProject +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Arguments: [sr_item_sk#8, sr_ticket_number#9], [sr_item_sk#8, sr_ticket_number#9] + +(8) CometExchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [sr_item_sk#8, sr_ticket_number#9], [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [ss_ticket_number#3, ss_item_sk#1], [sr_ticket_number#9, sr_item_sk#8], LeftOuter + +(11) CometFilter +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(12) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7], [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#11, d_year#12] +Arguments: [d_date_sk#11, d_year#12] + +(14) CometFilter +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(15) CometBroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: [d_date_sk#11, d_year#12] + +(16) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#11, d_year#12] +Arguments: [ss_sold_date_sk#7], [d_date_sk#11], Inner, BuildRight + +(17) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#11, d_year#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12], [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] + +(18) CometHashAggregate +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] + +(19) CometExchange +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#13, sum#14, sum#15] +Arguments: hashpartitioning(d_year#12, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometHashAggregate +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#13, sum#14, sum#15] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] + +(21) CometSort +Input [6]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19], [ss_sold_year#16 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST] + +(22) ReusedExchange [Reuses operator id: 19] +Output [6]: [d_year#20, ws_item_sk#21, ws_bill_customer_sk#22, sum#23, sum#24, sum#25] + +(23) CometHashAggregate +Input [6]: [d_year#20, ws_item_sk#21, ws_bill_customer_sk#22, sum#23, sum#24, sum#25] +Keys [3]: [d_year#20, ws_item_sk#21, ws_bill_customer_sk#22] +Functions [3]: [sum(ws_quantity#26), sum(UnscaledValue(ws_wholesale_cost#27)), sum(UnscaledValue(ws_sales_price#28))] + +(24) CometFilter +Input [6]: [ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30, ws_qty#31, ws_wc#32, ws_sp#33] +Condition : (coalesce(ws_qty#31, 0) > 0) + +(25) CometSort +Input [6]: [ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30, ws_qty#31, ws_wc#32, ws_sp#33] +Arguments: [ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30, ws_qty#31, ws_wc#32, ws_sp#33], [ws_sold_year#29 ASC NULLS FIRST, ws_item_sk#21 ASC NULLS FIRST, ws_customer_sk#30 ASC NULLS FIRST] + +(26) CometSortMergeJoin +Left output [6]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Right output [6]: [ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30, ws_qty#31, ws_wc#32, ws_sp#33] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2], [ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30], Inner + +(27) CometProject +Input [12]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30, ws_qty#31, ws_wc#32, ws_sp#33] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, ws_wc#32, ws_sp#33], [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, ws_wc#32, ws_sp#33] + +(28) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [7]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Arguments: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] + +(29) CometFilter +Input [7]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Condition : (isnotnull(cs_item_sk#35) AND isnotnull(cs_bill_customer_sk#34)) + +(30) CometExchange +Input [7]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Arguments: hashpartitioning(cs_order_number#36, cs_item_sk#35, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(31) CometSort +Input [7]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Arguments: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40], [cs_order_number#36 ASC NULLS FIRST, cs_item_sk#35 ASC NULLS FIRST] + +(32) ReusedExchange [Reuses operator id: 8] +Output [2]: [cr_item_sk#41, cr_order_number#42] + +(33) CometSort +Input [2]: [cr_item_sk#41, cr_order_number#42] +Arguments: [cr_item_sk#41, cr_order_number#42], [cr_order_number#42 ASC NULLS FIRST, cr_item_sk#41 ASC NULLS FIRST] + +(34) CometSortMergeJoin +Left output [7]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Right output [2]: [cr_item_sk#41, cr_order_number#42] +Arguments: [cs_order_number#36, cs_item_sk#35], [cr_order_number#42, cr_item_sk#41], LeftOuter + +(35) CometFilter +Input [9]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40, cr_item_sk#41, cr_order_number#42] +Condition : isnull(cr_order_number#42) + +(36) CometProject +Input [9]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40, cr_item_sk#41, cr_order_number#42] +Arguments: [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40], [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] + +(37) ReusedExchange [Reuses operator id: 15] +Output [2]: [d_date_sk#43, d_year#44] + +(38) CometBroadcastHashJoin +Left output [6]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Right output [2]: [d_date_sk#43, d_year#44] +Arguments: [cs_sold_date_sk#40], [d_date_sk#43], Inner, BuildRight + +(39) CometProject +Input [8]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40, d_date_sk#43, d_year#44] +Arguments: [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, d_year#44], [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, d_year#44] + +(40) CometHashAggregate +Input [6]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, d_year#44] +Keys [3]: [d_year#44, cs_item_sk#35, cs_bill_customer_sk#34] +Functions [3]: [partial_sum(cs_quantity#37), partial_sum(UnscaledValue(cs_wholesale_cost#38)), partial_sum(UnscaledValue(cs_sales_price#39))] + +(41) CometExchange +Input [6]: [d_year#44, cs_item_sk#35, cs_bill_customer_sk#34, sum#45, sum#46, sum#47] +Arguments: hashpartitioning(d_year#44, cs_item_sk#35, cs_bill_customer_sk#34, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(42) CometHashAggregate +Input [6]: [d_year#44, cs_item_sk#35, cs_bill_customer_sk#34, sum#45, sum#46, sum#47] +Keys [3]: [d_year#44, cs_item_sk#35, cs_bill_customer_sk#34] +Functions [3]: [sum(cs_quantity#37), sum(UnscaledValue(cs_wholesale_cost#38)), sum(UnscaledValue(cs_sales_price#39))] + +(43) CometFilter +Input [6]: [cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52] +Condition : (coalesce(cs_qty#50, 0) > 0) + +(44) CometSort +Input [6]: [cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52] +Arguments: [cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52], [cs_sold_year#48 ASC NULLS FIRST, cs_item_sk#35 ASC NULLS FIRST, cs_customer_sk#49 ASC NULLS FIRST] + +(45) CometSortMergeJoin +Left output [9]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, ws_wc#32, ws_sp#33] +Right output [6]: [cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2], [cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49], Inner + +(46) ColumnarToRow [codegen id : 1] +Input [15]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, ws_wc#32, ws_sp#33, cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52] + +(47) Project [codegen id : 1] +Output [12]: [round((cast(ss_qty#17 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(coalesce((ws_qty#31 + cs_qty#50), 1) as double)))), 2) AS ratio#53, ss_qty#17 AS store_qty#54, ss_wc#18 AS store_wholesale_cost#55, ss_sp#19 AS store_sales_price#56, (coalesce(ws_qty#31, 0) + coalesce(cs_qty#50, 0)) AS other_chan_qty#57, (coalesce(ws_wc#32, 0.00) + coalesce(cs_wc#51, 0.00)) AS other_chan_wholesale_cost#58, (coalesce(ws_sp#33, 0.00) + coalesce(cs_sp#52, 0.00)) AS other_chan_sales_price#59, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, cs_qty#50] +Input [15]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, ws_wc#32, ws_sp#33, cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52] + +(48) TakeOrderedAndProject +Input [12]: [ratio#53, store_qty#54, store_wholesale_cost#55, store_sales_price#56, other_chan_qty#57, other_chan_wholesale_cost#58, other_chan_sales_price#59, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, cs_qty#50] +Arguments: 100, [ratio#53 ASC NULLS FIRST, ss_qty#17 DESC NULLS LAST, ss_wc#18 DESC NULLS LAST, ss_sp#19 DESC NULLS LAST, other_chan_qty#57 ASC NULLS FIRST, other_chan_wholesale_cost#58 ASC NULLS FIRST, other_chan_sales_price#59 ASC NULLS FIRST, round((cast(ss_qty#17 as double) / cast(coalesce((ws_qty#31 + cs_qty#50), 1) as double)), 2) ASC NULLS FIRST], [ratio#53, store_qty#54, store_wholesale_cost#55, store_sales_price#56, other_chan_qty#57, other_chan_wholesale_cost#58, other_chan_sales_price#59] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_datafusion/simplified.txt new file mode 100644 index 0000000000..060ad787bd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_datafusion/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ws_qty,cs_qty,store_qty,store_wholesale_cost,store_sales_price] + WholeStageCodegen (1) + Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp] + ColumnarToRow + InputAdapter + CometSortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp,cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] + CometSortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometSort [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp] + CometHashAggregate [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,d_year,sum,sum,sum,sum(ss_quantity),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price))] + CometExchange [d_year,ss_item_sk,ss_customer_sk] #1 + CometHashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum,ss_quantity,ss_wholesale_cost,ss_sales_price] + CometProject [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometExchange [ss_ticket_number,ss_item_sk] #2 + CometFilter [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #3 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #4 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometSort [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometFilter [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometHashAggregate [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp,d_year,ws_bill_customer_sk,sum,sum,sum,sum(ws_quantity),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price))] + ReusedExchange [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum] #1 + CometSort [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometFilter [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometHashAggregate [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp,d_year,cs_bill_customer_sk,sum,sum,sum,sum(cs_quantity),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price))] + CometExchange [d_year,cs_item_sk,cs_bill_customer_sk] #5 + CometHashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum,cs_quantity,cs_wholesale_cost,cs_sales_price] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,d_date_sk,d_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number] + CometSortMergeJoin [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number] + CometSort [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometExchange [cs_order_number,cs_item_sk] #6 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number] + ReusedExchange [cr_item_sk,cr_order_number] #3 + ReusedExchange [d_date_sk,d_year] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..c8c3085d9d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_iceberg_compat/explain.txt @@ -0,0 +1,373 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * Project (66) + +- * ColumnarToRow (65) + +- CometSortMergeJoin (64) + :- CometProject (43) + : +- CometSortMergeJoin (42) + : :- CometSort (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometFilter (11) + : : : +- CometSortMergeJoin (10) + : : : :- CometSort (4) + : : : : +- CometExchange (3) + : : : : +- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometSort (9) + : : : +- CometExchange (8) + : : : +- CometProject (7) + : : : +- CometFilter (6) + : : : +- CometScan parquet spark_catalog.default.store_returns (5) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometScan parquet spark_catalog.default.date_dim (13) + : +- CometSort (41) + : +- CometFilter (40) + : +- CometHashAggregate (39) + : +- CometExchange (38) + : +- CometHashAggregate (37) + : +- CometProject (36) + : +- CometBroadcastHashJoin (35) + : :- CometProject (33) + : : +- CometFilter (32) + : : +- CometSortMergeJoin (31) + : : :- CometSort (25) + : : : +- CometExchange (24) + : : : +- CometFilter (23) + : : : +- CometScan parquet spark_catalog.default.web_sales (22) + : : +- CometSort (30) + : : +- CometExchange (29) + : : +- CometProject (28) + : : +- CometFilter (27) + : : +- CometScan parquet spark_catalog.default.web_returns (26) + : +- ReusedExchange (34) + +- CometSort (63) + +- CometFilter (62) + +- CometHashAggregate (61) + +- CometExchange (60) + +- CometHashAggregate (59) + +- CometProject (58) + +- CometBroadcastHashJoin (57) + :- CometProject (55) + : +- CometFilter (54) + : +- CometSortMergeJoin (53) + : :- CometSort (47) + : : +- CometExchange (46) + : : +- CometFilter (45) + : : +- CometScan parquet spark_catalog.default.catalog_sales (44) + : +- CometSort (52) + : +- CometExchange (51) + : +- CometProject (50) + : +- CometFilter (49) + : +- CometScan parquet spark_catalog.default.catalog_returns (48) + +- ReusedExchange (56) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) + +(3) CometExchange +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7], [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(5) CometScan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(6) CometFilter +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(7) CometProject +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Arguments: [sr_item_sk#8, sr_ticket_number#9], [sr_item_sk#8, sr_ticket_number#9] + +(8) CometExchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [sr_item_sk#8, sr_ticket_number#9], [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [ss_ticket_number#3, ss_item_sk#1], [sr_ticket_number#9, sr_item_sk#8], LeftOuter + +(11) CometFilter +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(12) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7], [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#11, d_year#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(15) CometBroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: [d_date_sk#11, d_year#12] + +(16) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#11, d_year#12] +Arguments: [ss_sold_date_sk#7], [d_date_sk#11], Inner, BuildRight + +(17) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#11, d_year#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12], [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] + +(18) CometHashAggregate +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] + +(19) CometExchange +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#13, sum#14, sum#15] +Arguments: hashpartitioning(d_year#12, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometHashAggregate +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#13, sum#14, sum#15] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] + +(21) CometSort +Input [6]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19], [ss_sold_year#16 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST] + +(22) CometScan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#26)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(23) CometFilter +Input [7]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Condition : (isnotnull(ws_item_sk#20) AND isnotnull(ws_bill_customer_sk#21)) + +(24) CometExchange +Input [7]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Arguments: hashpartitioning(ws_order_number#22, ws_item_sk#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(25) CometSort +Input [7]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Arguments: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26], [ws_order_number#22 ASC NULLS FIRST, ws_item_sk#20 ASC NULLS FIRST] + +(26) CometScan parquet spark_catalog.default.web_returns +Output [3]: [wr_item_sk#27, wr_order_number#28, wr_returned_date_sk#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(27) CometFilter +Input [3]: [wr_item_sk#27, wr_order_number#28, wr_returned_date_sk#29] +Condition : (isnotnull(wr_order_number#28) AND isnotnull(wr_item_sk#27)) + +(28) CometProject +Input [3]: [wr_item_sk#27, wr_order_number#28, wr_returned_date_sk#29] +Arguments: [wr_item_sk#27, wr_order_number#28], [wr_item_sk#27, wr_order_number#28] + +(29) CometExchange +Input [2]: [wr_item_sk#27, wr_order_number#28] +Arguments: hashpartitioning(wr_order_number#28, wr_item_sk#27, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(30) CometSort +Input [2]: [wr_item_sk#27, wr_order_number#28] +Arguments: [wr_item_sk#27, wr_order_number#28], [wr_order_number#28 ASC NULLS FIRST, wr_item_sk#27 ASC NULLS FIRST] + +(31) CometSortMergeJoin +Left output [7]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Right output [2]: [wr_item_sk#27, wr_order_number#28] +Arguments: [ws_order_number#22, ws_item_sk#20], [wr_order_number#28, wr_item_sk#27], LeftOuter + +(32) CometFilter +Input [9]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26, wr_item_sk#27, wr_order_number#28] +Condition : isnull(wr_order_number#28) + +(33) CometProject +Input [9]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26, wr_item_sk#27, wr_order_number#28] +Arguments: [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26], [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] + +(34) ReusedExchange [Reuses operator id: 15] +Output [2]: [d_date_sk#30, d_year#31] + +(35) CometBroadcastHashJoin +Left output [6]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Right output [2]: [d_date_sk#30, d_year#31] +Arguments: [ws_sold_date_sk#26], [d_date_sk#30], Inner, BuildRight + +(36) CometProject +Input [8]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26, d_date_sk#30, d_year#31] +Arguments: [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, d_year#31], [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, d_year#31] + +(37) CometHashAggregate +Input [6]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, d_year#31] +Keys [3]: [d_year#31, ws_item_sk#20, ws_bill_customer_sk#21] +Functions [3]: [partial_sum(ws_quantity#23), partial_sum(UnscaledValue(ws_wholesale_cost#24)), partial_sum(UnscaledValue(ws_sales_price#25))] + +(38) CometExchange +Input [6]: [d_year#31, ws_item_sk#20, ws_bill_customer_sk#21, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(d_year#31, ws_item_sk#20, ws_bill_customer_sk#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(39) CometHashAggregate +Input [6]: [d_year#31, ws_item_sk#20, ws_bill_customer_sk#21, sum#32, sum#33, sum#34] +Keys [3]: [d_year#31, ws_item_sk#20, ws_bill_customer_sk#21] +Functions [3]: [sum(ws_quantity#23), sum(UnscaledValue(ws_wholesale_cost#24)), sum(UnscaledValue(ws_sales_price#25))] + +(40) CometFilter +Input [6]: [ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36, ws_qty#37, ws_wc#38, ws_sp#39] +Condition : (coalesce(ws_qty#37, 0) > 0) + +(41) CometSort +Input [6]: [ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36, ws_qty#37, ws_wc#38, ws_sp#39] +Arguments: [ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36, ws_qty#37, ws_wc#38, ws_sp#39], [ws_sold_year#35 ASC NULLS FIRST, ws_item_sk#20 ASC NULLS FIRST, ws_customer_sk#36 ASC NULLS FIRST] + +(42) CometSortMergeJoin +Left output [6]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Right output [6]: [ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36, ws_qty#37, ws_wc#38, ws_sp#39] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2], [ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36], Inner + +(43) CometProject +Input [12]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36, ws_qty#37, ws_wc#38, ws_sp#39] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, ws_wc#38, ws_sp#39], [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, ws_wc#38, ws_sp#39] + +(44) CometScan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#46)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(45) CometFilter +Input [7]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Condition : (isnotnull(cs_item_sk#41) AND isnotnull(cs_bill_customer_sk#40)) + +(46) CometExchange +Input [7]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Arguments: hashpartitioning(cs_order_number#42, cs_item_sk#41, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(47) CometSort +Input [7]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Arguments: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46], [cs_order_number#42 ASC NULLS FIRST, cs_item_sk#41 ASC NULLS FIRST] + +(48) CometScan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#47, cr_order_number#48, cr_returned_date_sk#49] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(49) CometFilter +Input [3]: [cr_item_sk#47, cr_order_number#48, cr_returned_date_sk#49] +Condition : (isnotnull(cr_order_number#48) AND isnotnull(cr_item_sk#47)) + +(50) CometProject +Input [3]: [cr_item_sk#47, cr_order_number#48, cr_returned_date_sk#49] +Arguments: [cr_item_sk#47, cr_order_number#48], [cr_item_sk#47, cr_order_number#48] + +(51) CometExchange +Input [2]: [cr_item_sk#47, cr_order_number#48] +Arguments: hashpartitioning(cr_order_number#48, cr_item_sk#47, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(52) CometSort +Input [2]: [cr_item_sk#47, cr_order_number#48] +Arguments: [cr_item_sk#47, cr_order_number#48], [cr_order_number#48 ASC NULLS FIRST, cr_item_sk#47 ASC NULLS FIRST] + +(53) CometSortMergeJoin +Left output [7]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Right output [2]: [cr_item_sk#47, cr_order_number#48] +Arguments: [cs_order_number#42, cs_item_sk#41], [cr_order_number#48, cr_item_sk#47], LeftOuter + +(54) CometFilter +Input [9]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46, cr_item_sk#47, cr_order_number#48] +Condition : isnull(cr_order_number#48) + +(55) CometProject +Input [9]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46, cr_item_sk#47, cr_order_number#48] +Arguments: [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46], [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] + +(56) ReusedExchange [Reuses operator id: 15] +Output [2]: [d_date_sk#50, d_year#51] + +(57) CometBroadcastHashJoin +Left output [6]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Right output [2]: [d_date_sk#50, d_year#51] +Arguments: [cs_sold_date_sk#46], [d_date_sk#50], Inner, BuildRight + +(58) CometProject +Input [8]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46, d_date_sk#50, d_year#51] +Arguments: [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, d_year#51], [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, d_year#51] + +(59) CometHashAggregate +Input [6]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, d_year#51] +Keys [3]: [d_year#51, cs_item_sk#41, cs_bill_customer_sk#40] +Functions [3]: [partial_sum(cs_quantity#43), partial_sum(UnscaledValue(cs_wholesale_cost#44)), partial_sum(UnscaledValue(cs_sales_price#45))] + +(60) CometExchange +Input [6]: [d_year#51, cs_item_sk#41, cs_bill_customer_sk#40, sum#52, sum#53, sum#54] +Arguments: hashpartitioning(d_year#51, cs_item_sk#41, cs_bill_customer_sk#40, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(61) CometHashAggregate +Input [6]: [d_year#51, cs_item_sk#41, cs_bill_customer_sk#40, sum#52, sum#53, sum#54] +Keys [3]: [d_year#51, cs_item_sk#41, cs_bill_customer_sk#40] +Functions [3]: [sum(cs_quantity#43), sum(UnscaledValue(cs_wholesale_cost#44)), sum(UnscaledValue(cs_sales_price#45))] + +(62) CometFilter +Input [6]: [cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59] +Condition : (coalesce(cs_qty#57, 0) > 0) + +(63) CometSort +Input [6]: [cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59] +Arguments: [cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59], [cs_sold_year#55 ASC NULLS FIRST, cs_item_sk#41 ASC NULLS FIRST, cs_customer_sk#56 ASC NULLS FIRST] + +(64) CometSortMergeJoin +Left output [9]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, ws_wc#38, ws_sp#39] +Right output [6]: [cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2], [cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56], Inner + +(65) ColumnarToRow [codegen id : 1] +Input [15]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, ws_wc#38, ws_sp#39, cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59] + +(66) Project [codegen id : 1] +Output [12]: [round((cast(ss_qty#17 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(coalesce((ws_qty#37 + cs_qty#57), 1) as double)))), 2) AS ratio#60, ss_qty#17 AS store_qty#61, ss_wc#18 AS store_wholesale_cost#62, ss_sp#19 AS store_sales_price#63, (coalesce(ws_qty#37, 0) + coalesce(cs_qty#57, 0)) AS other_chan_qty#64, (coalesce(ws_wc#38, 0.00) + coalesce(cs_wc#58, 0.00)) AS other_chan_wholesale_cost#65, (coalesce(ws_sp#39, 0.00) + coalesce(cs_sp#59, 0.00)) AS other_chan_sales_price#66, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, cs_qty#57] +Input [15]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, ws_wc#38, ws_sp#39, cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59] + +(67) TakeOrderedAndProject +Input [12]: [ratio#60, store_qty#61, store_wholesale_cost#62, store_sales_price#63, other_chan_qty#64, other_chan_wholesale_cost#65, other_chan_sales_price#66, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, cs_qty#57] +Arguments: 100, [ratio#60 ASC NULLS FIRST, ss_qty#17 DESC NULLS LAST, ss_wc#18 DESC NULLS LAST, ss_sp#19 DESC NULLS LAST, other_chan_qty#64 ASC NULLS FIRST, other_chan_wholesale_cost#65 ASC NULLS FIRST, other_chan_sales_price#66 ASC NULLS FIRST, round((cast(ss_qty#17 as double) / cast(coalesce((ws_qty#37 + cs_qty#57), 1) as double)), 2) ASC NULLS FIRST], [ratio#60, store_qty#61, store_wholesale_cost#62, store_sales_price#63, other_chan_qty#64, other_chan_wholesale_cost#65, other_chan_sales_price#66] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..13d62e25ea --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q78.native_iceberg_compat/simplified.txt @@ -0,0 +1,69 @@ +TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ws_qty,cs_qty,store_qty,store_wholesale_cost,store_sales_price] + WholeStageCodegen (1) + Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp] + ColumnarToRow + InputAdapter + CometSortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp,cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] + CometSortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometSort [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp] + CometHashAggregate [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,d_year,sum,sum,sum,sum(ss_quantity),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price))] + CometExchange [d_year,ss_item_sk,ss_customer_sk] #1 + CometHashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum,ss_quantity,ss_wholesale_cost,ss_sales_price] + CometProject [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometExchange [ss_ticket_number,ss_item_sk] #2 + CometFilter [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #3 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #4 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometSort [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometFilter [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometHashAggregate [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp,d_year,ws_bill_customer_sk,sum,sum,sum,sum(ws_quantity),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price))] + CometExchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 + CometHashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum,ws_quantity,ws_wholesale_cost,ws_sales_price] + CometProject [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,d_year] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk,wr_item_sk,wr_order_number] + CometSortMergeJoin [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk,wr_item_sk,wr_order_number] + CometSort [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + CometExchange [ws_order_number,ws_item_sk] #6 + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + CometSort [wr_item_sk,wr_order_number] + CometExchange [wr_order_number,wr_item_sk] #7 + CometProject [wr_item_sk,wr_order_number] + CometFilter [wr_item_sk,wr_order_number,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk] + ReusedExchange [d_date_sk,d_year] #4 + CometSort [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometFilter [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometHashAggregate [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp,d_year,cs_bill_customer_sk,sum,sum,sum,sum(cs_quantity),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price))] + CometExchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 + CometHashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum,cs_quantity,cs_wholesale_cost,cs_sales_price] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,d_date_sk,d_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number] + CometSortMergeJoin [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number] + CometSort [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometExchange [cs_order_number,cs_item_sk] #9 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number] + CometExchange [cr_order_number,cr_item_sk] #10 + CometProject [cr_item_sk,cr_order_number] + CometFilter [cr_item_sk,cr_order_number,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] + ReusedExchange [d_date_sk,d_year] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_datafusion/explain.txt new file mode 100644 index 0000000000..f22d1d9714 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_datafusion/explain.txt @@ -0,0 +1,158 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometTakeOrderedAndProject (29) + +- CometProject (28) + +- CometBroadcastHashJoin (27) + :- CometHashAggregate (23) + : +- CometExchange (22) + : +- CometHashAggregate (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + +- CometBroadcastExchange (26) + +- CometFilter (25) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (24) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Arguments: [d_date_sk#9, d_year#10, d_dow#11] + +(4) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(5) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Arguments: [s_store_sk#12, s_number_employees#13, s_city#14] + +(10) CometFilter +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Condition : (((isnotnull(s_number_employees#13) AND (s_number_employees#13 >= 200)) AND (s_number_employees#13 <= 295)) AND isnotnull(s_store_sk#12)) + +(11) CometProject +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Arguments: [s_store_sk#12, s_city#14], [s_store_sk#12, s_city#14] + +(12) CometBroadcastExchange +Input [2]: [s_store_sk#12, s_city#14] +Arguments: [s_store_sk#12, s_city#14] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [2]: [s_store_sk#12, s_city#14] +Arguments: [ss_store_sk#4], [s_store_sk#12], Inner, BuildRight + +(14) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#12, s_city#14] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Arguments: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(16) CometFilter +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Condition : (((hd_dep_count#16 = 6) OR (hd_vehicle_count#17 > 2)) AND isnotnull(hd_demo_sk#15)) + +(17) CometProject +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Arguments: [hd_demo_sk#15], [hd_demo_sk#15] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#15] +Arguments: [hd_demo_sk#15] + +(19) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Right output [1]: [hd_demo_sk#15] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#15], Inner, BuildRight + +(20) CometProject +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14, hd_demo_sk#15] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] + +(21) CometHashAggregate +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] + +(22) CometExchange +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#18, sum#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#18, sum#19] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [c_customer_sk#20, c_first_name#21, c_last_name#22] + +(25) CometFilter +Input [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Condition : isnotnull(c_customer_sk#20) + +(26) CometBroadcastExchange +Input [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [c_customer_sk#20, c_first_name#21, c_last_name#22] + +(27) CometBroadcastHashJoin +Left output [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#14, amt#23, profit#24] +Right output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [ss_customer_sk#1], [c_customer_sk#20], Inner, BuildRight + +(28) CometProject +Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#14, amt#23, profit#24, c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [c_last_name#22, c_first_name#21, substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24, s_city#14], [c_last_name#22, c_first_name#21, substr(s_city#14, 1, 30) AS substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24, s_city#14] + +(29) CometTakeOrderedAndProject +Input [7]: [c_last_name#22, c_first_name#21, substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24, s_city#14] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_last_name#22 ASC NULLS FIRST,c_first_name#21 ASC NULLS FIRST,substr(s_city#14, 1, 30) ASC NULLS FIRST,profit#24 ASC NULLS FIRST], output=[c_last_name#22,c_first_name#21,substr(s_city, 1, 30)#25,ss_ticket_number#5,amt#23,profit#24]), [c_last_name#22, c_first_name#21, substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24], 100, [c_last_name#22 ASC NULLS FIRST, c_first_name#21 ASC NULLS FIRST, substr(s_city#14, 1, 30) ASC NULLS FIRST, profit#24 ASC NULLS FIRST], [c_last_name#22, c_first_name#21, substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24] + +(30) ColumnarToRow [codegen id : 1] +Input [6]: [c_last_name#22, c_first_name#21, substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8c5b3a2e53 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_datafusion/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_last_name,c_first_name,substr(s_city, 1, 30),ss_ticket_number,amt,profit,s_city] + CometProject [c_last_name,c_first_name,substr(s_city, 1, 30),ss_ticket_number,amt,profit,s_city] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,s_city,amt,profit,c_customer_sk,c_first_name,c_last_name] + CometHashAggregate [ss_ticket_number,ss_customer_sk,s_city,amt,profit,ss_addr_sk,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + CometExchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 + CometHashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,sum,sum,ss_coupon_amt,ss_net_profit] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_store_sk,s_city] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dow] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dow] + CometBroadcastExchange [s_store_sk,s_city] #3 + CometProject [s_store_sk,s_city] + CometFilter [s_store_sk,s_number_employees,s_city] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_number_employees,s_city] + CometBroadcastExchange [hd_demo_sk] #4 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name] #5 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..9e01b2a9fc --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_iceberg_compat/explain.txt @@ -0,0 +1,174 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometTakeOrderedAndProject (29) + +- CometProject (28) + +- CometBroadcastHashJoin (27) + :- CometHashAggregate (23) + : +- CometExchange (22) + : +- CometHashAggregate (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.store (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometScan parquet spark_catalog.default.household_demographics (15) + +- CometBroadcastExchange (26) + +- CometFilter (25) + +- CometScan parquet spark_catalog.default.customer (24) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(5) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(9) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Condition : (((isnotnull(s_number_employees#13) AND (s_number_employees#13 >= 200)) AND (s_number_employees#13 <= 295)) AND isnotnull(s_store_sk#12)) + +(11) CometProject +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Arguments: [s_store_sk#12, s_city#14], [s_store_sk#12, s_city#14] + +(12) CometBroadcastExchange +Input [2]: [s_store_sk#12, s_city#14] +Arguments: [s_store_sk#12, s_city#14] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [2]: [s_store_sk#12, s_city#14] +Arguments: [ss_store_sk#4], [s_store_sk#12], Inner, BuildRight + +(14) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#12, s_city#14] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] + +(15) CometScan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(16) CometFilter +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Condition : (((hd_dep_count#16 = 6) OR (hd_vehicle_count#17 > 2)) AND isnotnull(hd_demo_sk#15)) + +(17) CometProject +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Arguments: [hd_demo_sk#15], [hd_demo_sk#15] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#15] +Arguments: [hd_demo_sk#15] + +(19) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Right output [1]: [hd_demo_sk#15] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#15], Inner, BuildRight + +(20) CometProject +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14, hd_demo_sk#15] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] + +(21) CometHashAggregate +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] + +(22) CometExchange +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#18, sum#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#18, sum#19] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] + +(24) CometScan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(25) CometFilter +Input [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Condition : isnotnull(c_customer_sk#20) + +(26) CometBroadcastExchange +Input [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [c_customer_sk#20, c_first_name#21, c_last_name#22] + +(27) CometBroadcastHashJoin +Left output [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#14, amt#23, profit#24] +Right output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [ss_customer_sk#1], [c_customer_sk#20], Inner, BuildRight + +(28) CometProject +Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#14, amt#23, profit#24, c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [c_last_name#22, c_first_name#21, substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24, s_city#14], [c_last_name#22, c_first_name#21, substr(s_city#14, 1, 30) AS substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24, s_city#14] + +(29) CometTakeOrderedAndProject +Input [7]: [c_last_name#22, c_first_name#21, substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24, s_city#14] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_last_name#22 ASC NULLS FIRST,c_first_name#21 ASC NULLS FIRST,substr(s_city#14, 1, 30) ASC NULLS FIRST,profit#24 ASC NULLS FIRST], output=[c_last_name#22,c_first_name#21,substr(s_city, 1, 30)#25,ss_ticket_number#5,amt#23,profit#24]), [c_last_name#22, c_first_name#21, substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24], 100, [c_last_name#22 ASC NULLS FIRST, c_first_name#21 ASC NULLS FIRST, substr(s_city#14, 1, 30) ASC NULLS FIRST, profit#24 ASC NULLS FIRST], [c_last_name#22, c_first_name#21, substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24] + +(30) ColumnarToRow [codegen id : 1] +Input [6]: [c_last_name#22, c_first_name#21, substr(s_city, 1, 30)#25, ss_ticket_number#5, amt#23, profit#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..60880d54af --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q79.native_iceberg_compat/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_last_name,c_first_name,substr(s_city, 1, 30),ss_ticket_number,amt,profit,s_city] + CometProject [c_last_name,c_first_name,substr(s_city, 1, 30),ss_ticket_number,amt,profit,s_city] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,s_city,amt,profit,c_customer_sk,c_first_name,c_last_name] + CometHashAggregate [ss_ticket_number,ss_customer_sk,s_city,amt,profit,ss_addr_sk,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] + CometExchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 + CometHashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,sum,sum,ss_coupon_amt,ss_net_profit] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_store_sk,s_city] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dow] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow] + CometBroadcastExchange [s_store_sk,s_city] #3 + CometProject [s_store_sk,s_city] + CometFilter [s_store_sk,s_number_employees,s_city] + CometScan parquet spark_catalog.default.store [s_store_sk,s_number_employees,s_city] + CometBroadcastExchange [hd_demo_sk] #4 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name] #5 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_datafusion/explain.txt new file mode 100644 index 0000000000..c65d269ef2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_datafusion/explain.txt @@ -0,0 +1,223 @@ +== Physical Plan == +* ColumnarToRow (42) ++- CometTakeOrderedAndProject (41) + +- CometHashAggregate (40) + +- CometExchange (39) + +- CometHashAggregate (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + +- CometBroadcastExchange (35) + +- CometHashAggregate (34) + +- CometExchange (33) + +- CometHashAggregate (32) + +- CometBroadcastHashJoin (31) + :- CometProject (16) + : +- CometFilter (15) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (14) + +- CometBroadcastExchange (30) + +- CometProject (29) + +- CometFilter (28) + +- CometHashAggregate (27) + +- CometExchange (26) + +- CometHashAggregate (25) + +- CometProject (24) + +- CometBroadcastHashJoin (23) + :- CometFilter (18) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (17) + +- CometBroadcastExchange (22) + +- CometProject (21) + +- CometFilter (20) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (19) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Arguments: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [d_date_sk#4, d_year#5, d_qoy#6] + +(4) CometFilter +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 1998)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_store_sk#1, ss_net_profit#2], [ss_store_sk#1, ss_net_profit#2] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [s_store_sk#7, s_store_name#8, s_zip#9] + +(10) CometFilter +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Condition : (isnotnull(s_store_sk#7) AND isnotnull(s_zip#9)) + +(11) CometBroadcastExchange +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [s_store_sk#7, s_store_name#8, s_zip#9] + +(12) CometBroadcastHashJoin +Left output [2]: [ss_store_sk#1, ss_net_profit#2] +Right output [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [ss_store_sk#1], [s_store_sk#7], Inner, BuildRight + +(13) CometProject +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [ss_net_profit#2, s_store_name#8, s_zip#9], [ss_net_profit#2, s_store_name#8, s_zip#9] + +(14) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [1]: [ca_zip#10] +Arguments: [ca_zip#10] + +(15) CometFilter +Input [1]: [ca_zip#10] +Condition : (substr(ca_zip#10, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#10, 1, 5))) + +(16) CometProject +Input [1]: [ca_zip#10] +Arguments: [ca_zip#11], [substr(ca_zip#10, 1, 5) AS ca_zip#11] + +(17) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#12, ca_zip#13] +Arguments: [ca_address_sk#12, ca_zip#13] + +(18) CometFilter +Input [2]: [ca_address_sk#12, ca_zip#13] +Condition : isnotnull(ca_address_sk#12) + +(19) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Arguments: [c_current_addr_sk#14, c_preferred_cust_flag#15] + +(20) CometFilter +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Condition : ((isnotnull(c_preferred_cust_flag#15) AND (c_preferred_cust_flag#15 = Y)) AND isnotnull(c_current_addr_sk#14)) + +(21) CometProject +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Arguments: [c_current_addr_sk#14], [c_current_addr_sk#14] + +(22) CometBroadcastExchange +Input [1]: [c_current_addr_sk#14] +Arguments: [c_current_addr_sk#14] + +(23) CometBroadcastHashJoin +Left output [2]: [ca_address_sk#12, ca_zip#13] +Right output [1]: [c_current_addr_sk#14] +Arguments: [ca_address_sk#12], [c_current_addr_sk#14], Inner, BuildRight + +(24) CometProject +Input [3]: [ca_address_sk#12, ca_zip#13, c_current_addr_sk#14] +Arguments: [ca_zip#13], [ca_zip#13] + +(25) CometHashAggregate +Input [1]: [ca_zip#13] +Keys [1]: [ca_zip#13] +Functions [1]: [partial_count(1)] + +(26) CometExchange +Input [2]: [ca_zip#13, count#16] +Arguments: hashpartitioning(ca_zip#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [ca_zip#13, count#16] +Keys [1]: [ca_zip#13] +Functions [1]: [count(1)] + +(28) CometFilter +Input [2]: [ca_zip#17, cnt#18] +Condition : (cnt#18 > 10) + +(29) CometProject +Input [2]: [ca_zip#17, cnt#18] +Arguments: [ca_zip#17], [ca_zip#17] + +(30) CometBroadcastExchange +Input [1]: [ca_zip#17] +Arguments: [ca_zip#17] + +(31) CometBroadcastHashJoin +Left output [1]: [ca_zip#11] +Right output [1]: [ca_zip#17] +Arguments: [coalesce(ca_zip#11, ), isnull(ca_zip#11)], [coalesce(ca_zip#17, ), isnull(ca_zip#17)], LeftSemi, BuildRight + +(32) CometHashAggregate +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] +Functions: [] + +(33) CometExchange +Input [1]: [ca_zip#11] +Arguments: hashpartitioning(ca_zip#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(34) CometHashAggregate +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] +Functions: [] + +(35) CometBroadcastExchange +Input [1]: [ca_zip#11] +Arguments: [ca_zip#11] + +(36) CometBroadcastHashJoin +Left output [3]: [ss_net_profit#2, s_store_name#8, s_zip#9] +Right output [1]: [ca_zip#11] +Arguments: [substr(s_zip#9, 1, 2)], [substr(ca_zip#11, 1, 2)], Inner, BuildRight + +(37) CometProject +Input [4]: [ss_net_profit#2, s_store_name#8, s_zip#9, ca_zip#11] +Arguments: [ss_net_profit#2, s_store_name#8], [ss_net_profit#2, s_store_name#8] + +(38) CometHashAggregate +Input [2]: [ss_net_profit#2, s_store_name#8] +Keys [1]: [s_store_name#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] + +(39) CometExchange +Input [2]: [s_store_name#8, sum#19] +Arguments: hashpartitioning(s_store_name#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(40) CometHashAggregate +Input [2]: [s_store_name#8, sum#19] +Keys [1]: [s_store_name#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] + +(41) CometTakeOrderedAndProject +Input [2]: [s_store_name#8, sum(ss_net_profit)#20] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[s_store_name#8 ASC NULLS FIRST], output=[s_store_name#8,sum(ss_net_profit)#20]), [s_store_name#8, sum(ss_net_profit)#20], 100, [s_store_name#8 ASC NULLS FIRST], [s_store_name#8, sum(ss_net_profit)#20] + +(42) ColumnarToRow [codegen id : 1] +Input [2]: [s_store_name#8, sum(ss_net_profit)#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6e915b2cee --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_datafusion/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [s_store_name,sum(ss_net_profit)] + CometHashAggregate [s_store_name,sum(ss_net_profit),sum,sum(UnscaledValue(ss_net_profit))] + CometExchange [s_store_name] #1 + CometHashAggregate [s_store_name,sum,ss_net_profit] + CometProject [ss_net_profit,s_store_name] + CometBroadcastHashJoin [ss_net_profit,s_store_name,s_zip,ca_zip] + CometProject [ss_net_profit,s_store_name,s_zip] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,s_store_sk,s_store_name,s_zip] + CometProject [ss_store_sk,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [s_store_sk,s_store_name,s_zip] #3 + CometFilter [s_store_sk,s_store_name,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_zip] + CometBroadcastExchange [ca_zip] #4 + CometHashAggregate [ca_zip] + CometExchange [ca_zip] #5 + CometHashAggregate [ca_zip] + CometBroadcastHashJoin [ca_zip,ca_zip] + CometProject [ca_zip] [ca_zip] + CometFilter [ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_zip] + CometBroadcastExchange [ca_zip] #6 + CometProject [ca_zip] + CometFilter [ca_zip,cnt] + CometHashAggregate [ca_zip,cnt,ca_zip,count,count(1)] + CometExchange [ca_zip] #7 + CometHashAggregate [ca_zip,count] + CometProject [ca_zip] + CometBroadcastHashJoin [ca_address_sk,ca_zip,c_current_addr_sk] + CometFilter [ca_address_sk,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_zip] + CometBroadcastExchange [c_current_addr_sk] #8 + CometProject [c_current_addr_sk] + CometFilter [c_current_addr_sk,c_preferred_cust_flag] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_current_addr_sk,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..467ca0bfe2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_iceberg_compat/explain.txt @@ -0,0 +1,241 @@ +== Physical Plan == +* ColumnarToRow (42) ++- CometTakeOrderedAndProject (41) + +- CometHashAggregate (40) + +- CometExchange (39) + +- CometHashAggregate (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.date_dim (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometScan parquet spark_catalog.default.store (9) + +- CometBroadcastExchange (35) + +- CometHashAggregate (34) + +- CometExchange (33) + +- CometHashAggregate (32) + +- CometBroadcastHashJoin (31) + :- CometProject (16) + : +- CometFilter (15) + : +- CometScan parquet spark_catalog.default.customer_address (14) + +- CometBroadcastExchange (30) + +- CometProject (29) + +- CometFilter (28) + +- CometHashAggregate (27) + +- CometExchange (26) + +- CometHashAggregate (25) + +- CometProject (24) + +- CometBroadcastHashJoin (23) + :- CometFilter (18) + : +- CometScan parquet spark_catalog.default.customer_address (17) + +- CometBroadcastExchange (22) + +- CometProject (21) + +- CometFilter (20) + +- CometScan parquet spark_catalog.default.customer (19) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 1998)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_store_sk#1, ss_net_profit#2], [ss_store_sk#1, ss_net_profit#2] + +(9) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Condition : (isnotnull(s_store_sk#7) AND isnotnull(s_zip#9)) + +(11) CometBroadcastExchange +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [s_store_sk#7, s_store_name#8, s_zip#9] + +(12) CometBroadcastHashJoin +Left output [2]: [ss_store_sk#1, ss_net_profit#2] +Right output [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [ss_store_sk#1], [s_store_sk#7], Inner, BuildRight + +(13) CometProject +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [ss_net_profit#2, s_store_name#8, s_zip#9], [ss_net_profit#2, s_store_name#8, s_zip#9] + +(14) CometScan parquet spark_catalog.default.customer_address +Output [1]: [ca_zip#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +ReadSchema: struct + +(15) CometFilter +Input [1]: [ca_zip#10] +Condition : (substr(ca_zip#10, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#10, 1, 5))) + +(16) CometProject +Input [1]: [ca_zip#10] +Arguments: [ca_zip#11], [substr(ca_zip#10, 1, 5) AS ca_zip#11] + +(17) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#12, ca_zip#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(18) CometFilter +Input [2]: [ca_address_sk#12, ca_zip#13] +Condition : isnotnull(ca_address_sk#12) + +(19) CometScan parquet spark_catalog.default.customer +Output [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(20) CometFilter +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Condition : ((isnotnull(c_preferred_cust_flag#15) AND (c_preferred_cust_flag#15 = Y)) AND isnotnull(c_current_addr_sk#14)) + +(21) CometProject +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Arguments: [c_current_addr_sk#14], [c_current_addr_sk#14] + +(22) CometBroadcastExchange +Input [1]: [c_current_addr_sk#14] +Arguments: [c_current_addr_sk#14] + +(23) CometBroadcastHashJoin +Left output [2]: [ca_address_sk#12, ca_zip#13] +Right output [1]: [c_current_addr_sk#14] +Arguments: [ca_address_sk#12], [c_current_addr_sk#14], Inner, BuildRight + +(24) CometProject +Input [3]: [ca_address_sk#12, ca_zip#13, c_current_addr_sk#14] +Arguments: [ca_zip#13], [ca_zip#13] + +(25) CometHashAggregate +Input [1]: [ca_zip#13] +Keys [1]: [ca_zip#13] +Functions [1]: [partial_count(1)] + +(26) CometExchange +Input [2]: [ca_zip#13, count#16] +Arguments: hashpartitioning(ca_zip#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [ca_zip#13, count#16] +Keys [1]: [ca_zip#13] +Functions [1]: [count(1)] + +(28) CometFilter +Input [2]: [ca_zip#17, cnt#18] +Condition : (cnt#18 > 10) + +(29) CometProject +Input [2]: [ca_zip#17, cnt#18] +Arguments: [ca_zip#17], [ca_zip#17] + +(30) CometBroadcastExchange +Input [1]: [ca_zip#17] +Arguments: [ca_zip#17] + +(31) CometBroadcastHashJoin +Left output [1]: [ca_zip#11] +Right output [1]: [ca_zip#17] +Arguments: [coalesce(ca_zip#11, ), isnull(ca_zip#11)], [coalesce(ca_zip#17, ), isnull(ca_zip#17)], LeftSemi, BuildRight + +(32) CometHashAggregate +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] +Functions: [] + +(33) CometExchange +Input [1]: [ca_zip#11] +Arguments: hashpartitioning(ca_zip#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(34) CometHashAggregate +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] +Functions: [] + +(35) CometBroadcastExchange +Input [1]: [ca_zip#11] +Arguments: [ca_zip#11] + +(36) CometBroadcastHashJoin +Left output [3]: [ss_net_profit#2, s_store_name#8, s_zip#9] +Right output [1]: [ca_zip#11] +Arguments: [substr(s_zip#9, 1, 2)], [substr(ca_zip#11, 1, 2)], Inner, BuildRight + +(37) CometProject +Input [4]: [ss_net_profit#2, s_store_name#8, s_zip#9, ca_zip#11] +Arguments: [ss_net_profit#2, s_store_name#8], [ss_net_profit#2, s_store_name#8] + +(38) CometHashAggregate +Input [2]: [ss_net_profit#2, s_store_name#8] +Keys [1]: [s_store_name#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] + +(39) CometExchange +Input [2]: [s_store_name#8, sum#19] +Arguments: hashpartitioning(s_store_name#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(40) CometHashAggregate +Input [2]: [s_store_name#8, sum#19] +Keys [1]: [s_store_name#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] + +(41) CometTakeOrderedAndProject +Input [2]: [s_store_name#8, sum(ss_net_profit)#20] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[s_store_name#8 ASC NULLS FIRST], output=[s_store_name#8,sum(ss_net_profit)#20]), [s_store_name#8, sum(ss_net_profit)#20], 100, [s_store_name#8 ASC NULLS FIRST], [s_store_name#8, sum(ss_net_profit)#20] + +(42) ColumnarToRow [codegen id : 1] +Input [2]: [s_store_name#8, sum(ss_net_profit)#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a179594f50 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q8.native_iceberg_compat/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [s_store_name,sum(ss_net_profit)] + CometHashAggregate [s_store_name,sum(ss_net_profit),sum,sum(UnscaledValue(ss_net_profit))] + CometExchange [s_store_name] #1 + CometHashAggregate [s_store_name,sum,ss_net_profit] + CometProject [ss_net_profit,s_store_name] + CometBroadcastHashJoin [ss_net_profit,s_store_name,s_zip,ca_zip] + CometProject [ss_net_profit,s_store_name,s_zip] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,s_store_sk,s_store_name,s_zip] + CometProject [ss_store_sk,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [s_store_sk,s_store_name,s_zip] #3 + CometFilter [s_store_sk,s_store_name,s_zip] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip] + CometBroadcastExchange [ca_zip] #4 + CometHashAggregate [ca_zip] + CometExchange [ca_zip] #5 + CometHashAggregate [ca_zip] + CometBroadcastHashJoin [ca_zip,ca_zip] + CometProject [ca_zip] [ca_zip] + CometFilter [ca_zip] + CometScan parquet spark_catalog.default.customer_address [ca_zip] + CometBroadcastExchange [ca_zip] #6 + CometProject [ca_zip] + CometFilter [ca_zip,cnt] + CometHashAggregate [ca_zip,cnt,ca_zip,count,count(1)] + CometExchange [ca_zip] #7 + CometHashAggregate [ca_zip,count] + CometProject [ca_zip] + CometBroadcastHashJoin [ca_address_sk,ca_zip,c_current_addr_sk] + CometFilter [ca_address_sk,ca_zip] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip] + CometBroadcastExchange [c_current_addr_sk] #8 + CometProject [c_current_addr_sk] + CometFilter [c_current_addr_sk,c_preferred_cust_flag] + CometScan parquet spark_catalog.default.customer [c_current_addr_sk,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_datafusion/explain.txt new file mode 100644 index 0000000000..91f9e5a088 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_datafusion/explain.txt @@ -0,0 +1,374 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * HashAggregate (70) + +- Exchange (69) + +- * HashAggregate (68) + +- * Expand (67) + +- Union (66) + :- * HashAggregate (38) + : +- * ColumnarToRow (37) + : +- CometExchange (36) + : +- CometHashAggregate (35) + : +- CometProject (34) + : +- CometBroadcastHashJoin (33) + : :- CometProject (28) + : : +- CometBroadcastHashJoin (27) + : : :- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometProject (17) + : : : : +- CometBroadcastHashJoin (16) + : : : : :- CometProject (11) + : : : : : +- CometSortMergeJoin (10) + : : : : : :- CometSort (4) + : : : : : : +- CometExchange (3) + : : : : : : +- CometFilter (2) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : +- CometSort (9) + : : : : : +- CometExchange (8) + : : : : : +- CometProject (7) + : : : : : +- CometFilter (6) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (5) + : : : : +- CometBroadcastExchange (15) + : : : : +- CometProject (14) + : : : : +- CometFilter (13) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (12) + : : : +- CometBroadcastExchange (20) + : : : +- CometFilter (19) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (18) + : : +- CometBroadcastExchange (26) + : : +- CometProject (25) + : : +- CometFilter (24) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (23) + : +- CometBroadcastExchange (32) + : +- CometProject (31) + : +- CometFilter (30) + : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (29) + :- * HashAggregate (62) + : +- * ColumnarToRow (61) + : +- CometExchange (60) + : +- CometHashAggregate (59) + : +- CometProject (58) + : +- CometBroadcastHashJoin (57) + : :- CometProject (55) + : : +- CometBroadcastHashJoin (54) + : : :- CometProject (52) + : : : +- CometBroadcastHashJoin (51) + : : : :- CometProject (49) + : : : : +- CometBroadcastHashJoin (48) + : : : : :- CometProject (46) + : : : : : +- CometSortMergeJoin (45) + : : : : : :- CometSort (42) + : : : : : : +- CometExchange (41) + : : : : : : +- CometFilter (40) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (39) + : : : : : +- CometSort (44) + : : : : : +- ReusedExchange (43) + : : : : +- ReusedExchange (47) + : : : +- ReusedExchange (50) + : : +- ReusedExchange (53) + : +- ReusedExchange (56) + +- * HashAggregate (65) + +- * ColumnarToRow (64) + +- ReusedExchange (63) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(3) CometExchange +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7], [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST] + +(5) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(6) CometFilter +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(7) CometProject +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11], [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(8) CometExchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: hashpartitioning(sr_item_sk#8, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11], [sr_item_sk#8 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [ss_item_sk#1, ss_ticket_number#4], [sr_item_sk#8, sr_ticket_number#9], LeftOuter + +(11) CometProject +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11], [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] + +(12) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_date#14] +Arguments: [d_date_sk#13, d_date#14] + +(13) CometFilter +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 2000-08-23)) AND (d_date#14 <= 2000-09-22)) AND isnotnull(d_date_sk#13)) + +(14) CometProject +Input [2]: [d_date_sk#13, d_date#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(15) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(16) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#7], [d_date_sk#13], Inner, BuildRight + +(17) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11], [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] + +(18) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#15, s_store_id#16] +Arguments: [s_store_sk#15, s_store_id#16] + +(19) CometFilter +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) + +(20) CometBroadcastExchange +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: [s_store_sk#15, s_store_id#16] + +(21) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] +Right output [2]: [s_store_sk#15, s_store_id#16] +Arguments: [ss_store_sk#2], [s_store_sk#15], Inner, BuildRight + +(22) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_sk#15, s_store_id#16] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(23) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#17, i_current_price#18] +Arguments: [i_item_sk#17, i_current_price#18] + +(24) CometFilter +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) + +(25) CometProject +Input [2]: [i_item_sk#17, i_current_price#18] +Arguments: [i_item_sk#17], [i_item_sk#17] + +(26) CometBroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: [i_item_sk#17] + +(27) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Right output [1]: [i_item_sk#17] +Arguments: [ss_item_sk#1], [i_item_sk#17], Inner, BuildRight + +(28) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, i_item_sk#17] +Arguments: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(29) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [2]: [p_promo_sk#19, p_channel_tv#20] +Arguments: [p_promo_sk#19, p_channel_tv#20] + +(30) CometFilter +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) + +(31) CometProject +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Arguments: [p_promo_sk#19], [p_promo_sk#19] + +(32) CometBroadcastExchange +Input [1]: [p_promo_sk#19] +Arguments: [p_promo_sk#19] + +(33) CometBroadcastHashJoin +Left output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Right output [1]: [p_promo_sk#19] +Arguments: [ss_promo_sk#3], [p_promo_sk#19], Inner, BuildRight + +(34) CometProject +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, p_promo_sk#19] +Arguments: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(35) CometHashAggregate +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] + +(36) CometExchange +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(37) ColumnarToRow [codegen id : 1] +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] + +(38) HashAggregate [codegen id : 1] +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#26, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#27, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#28] +Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#26,17,2) AS sales#29, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#27 AS returns#30, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#28 AS profit#31, store channel AS channel#32, concat(store, s_store_id#16) AS id#33] + +(39) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] + +(40) CometFilter +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Condition : ((isnotnull(cs_catalog_page_sk#34) AND isnotnull(cs_item_sk#35)) AND isnotnull(cs_promo_sk#36)) + +(41) CometExchange +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Arguments: hashpartitioning(cs_item_sk#35, cs_order_number#37, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(42) CometSort +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40], [cs_item_sk#35 ASC NULLS FIRST, cs_order_number#37 ASC NULLS FIRST] + +(43) ReusedExchange [Reuses operator id: 8] +Output [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] + +(44) CometSort +Input [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44], [cr_item_sk#41 ASC NULLS FIRST, cr_order_number#42 ASC NULLS FIRST] + +(45) CometSortMergeJoin +Left output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Right output [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cs_item_sk#35, cs_order_number#37], [cr_item_sk#41, cr_order_number#42], LeftOuter + +(46) CometProject +Input [11]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44], [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44] + +(47) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#45] + +(48) CometBroadcastHashJoin +Left output [8]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44] +Right output [1]: [d_date_sk#45] +Arguments: [cs_sold_date_sk#40], [d_date_sk#45], Inner, BuildRight + +(49) CometProject +Input [9]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44, d_date_sk#45] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44], [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44] + +(50) ReusedExchange [Reuses operator id: 20] +Output [2]: [cp_catalog_page_sk#46, cp_catalog_page_id#47] + +(51) CometBroadcastHashJoin +Left output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44] +Right output [2]: [cp_catalog_page_sk#46, cp_catalog_page_id#47] +Arguments: [cs_catalog_page_sk#34], [cp_catalog_page_sk#46], Inner, BuildRight + +(52) CometProject +Input [9]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_sk#46, cp_catalog_page_id#47] +Arguments: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47], [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] + +(53) ReusedExchange [Reuses operator id: 26] +Output [1]: [i_item_sk#48] + +(54) CometBroadcastHashJoin +Left output [7]: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] +Right output [1]: [i_item_sk#48] +Arguments: [cs_item_sk#35], [i_item_sk#48], Inner, BuildRight + +(55) CometProject +Input [8]: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47, i_item_sk#48] +Arguments: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47], [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] + +(56) ReusedExchange [Reuses operator id: 32] +Output [1]: [p_promo_sk#49] + +(57) CometBroadcastHashJoin +Left output [6]: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] +Right output [1]: [p_promo_sk#49] +Arguments: [cs_promo_sk#36], [p_promo_sk#49], Inner, BuildRight + +(58) CometProject +Input [7]: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47, p_promo_sk#49] +Arguments: [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47], [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] + +(59) CometHashAggregate +Input [5]: [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] +Keys [1]: [cp_catalog_page_id#47] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#38)), partial_sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))] + +(60) CometExchange +Input [6]: [cp_catalog_page_id#47, sum#50, sum#51, isEmpty#52, sum#53, isEmpty#54] +Arguments: hashpartitioning(cp_catalog_page_id#47, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(61) ColumnarToRow [codegen id : 2] +Input [6]: [cp_catalog_page_id#47, sum#50, sum#51, isEmpty#52, sum#53, isEmpty#54] + +(62) HashAggregate [codegen id : 2] +Input [6]: [cp_catalog_page_id#47, sum#50, sum#51, isEmpty#52, sum#53, isEmpty#54] +Keys [1]: [cp_catalog_page_id#47] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#38)), sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00)), sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#38))#55, sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00))#56, sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))#57] +Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#38))#55,17,2) AS sales#58, sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00))#56 AS returns#59, sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))#57 AS profit#60, catalog channel AS channel#61, concat(catalog_page, cp_catalog_page_id#47) AS id#62] + +(63) ReusedExchange [Reuses operator id: 36] +Output [6]: [web_site_id#63, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] + +(64) ColumnarToRow [codegen id : 3] +Input [6]: [web_site_id#63, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] + +(65) HashAggregate [codegen id : 3] +Input [6]: [web_site_id#63, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Keys [1]: [web_site_id#63] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#69)), sum(coalesce(cast(wr_return_amt#70 as decimal(12,2)), 0.00)), sum((ws_net_profit#71 - coalesce(cast(wr_net_loss#72 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#69))#73, sum(coalesce(cast(wr_return_amt#70 as decimal(12,2)), 0.00))#74, sum((ws_net_profit#71 - coalesce(cast(wr_net_loss#72 as decimal(12,2)), 0.00)))#75] +Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#69))#73,17,2) AS sales#76, sum(coalesce(cast(wr_return_amt#70 as decimal(12,2)), 0.00))#74 AS returns#77, sum((ws_net_profit#71 - coalesce(cast(wr_net_loss#72 as decimal(12,2)), 0.00)))#75 AS profit#78, web channel AS channel#79, concat(web_site, web_site_id#63) AS id#80] + +(66) Union + +(67) Expand [codegen id : 4] +Input [5]: [sales#29, returns#30, profit#31, channel#32, id#33] +Arguments: [[sales#29, returns#30, profit#31, channel#32, id#33, 0], [sales#29, returns#30, profit#31, channel#32, null, 1], [sales#29, returns#30, profit#31, null, null, 3]], [sales#29, returns#30, profit#31, channel#81, id#82, spark_grouping_id#83] + +(68) HashAggregate [codegen id : 4] +Input [6]: [sales#29, returns#30, profit#31, channel#81, id#82, spark_grouping_id#83] +Keys [3]: [channel#81, id#82, spark_grouping_id#83] +Functions [3]: [partial_sum(sales#29), partial_sum(returns#30), partial_sum(profit#31)] +Aggregate Attributes [6]: [sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89] +Results [9]: [channel#81, id#82, spark_grouping_id#83, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95] + +(69) Exchange +Input [9]: [channel#81, id#82, spark_grouping_id#83, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95] +Arguments: hashpartitioning(channel#81, id#82, spark_grouping_id#83, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(70) HashAggregate [codegen id : 5] +Input [9]: [channel#81, id#82, spark_grouping_id#83, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95] +Keys [3]: [channel#81, id#82, spark_grouping_id#83] +Functions [3]: [sum(sales#29), sum(returns#30), sum(profit#31)] +Aggregate Attributes [3]: [sum(sales#29)#96, sum(returns#30)#97, sum(profit#31)#98] +Results [5]: [channel#81, id#82, sum(sales#29)#96 AS sales#99, sum(returns#30)#97 AS returns#100, sum(profit#31)#98 AS profit#101] + +(71) TakeOrderedAndProject +Input [5]: [channel#81, id#82, sales#99, returns#100, profit#101] +Arguments: 100, [channel#81 ASC NULLS FIRST, id#82 ASC NULLS FIRST], [channel#81, id#82, sales#99, returns#100, profit#101] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b4a958104e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_datafusion/simplified.txt @@ -0,0 +1,81 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (5) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (4) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (1) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExchange [s_store_id] #2 + CometHashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] + CometProject [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id,p_promo_sk] + CometProject [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id,i_item_sk] + CometProject [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_sk,s_store_id] + CometProject [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss] + CometSortMergeJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometSort [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometExchange [ss_item_sk,ss_ticket_number] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometExchange [sr_item_sk,sr_ticket_number] #4 + CometProject [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometFilter [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk,s_store_id] #6 + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + CometBroadcastExchange [i_item_sk] #7 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_current_price] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price] + CometBroadcastExchange [p_promo_sk] #8 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_tv] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk,p_channel_tv] + WholeStageCodegen (2) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExchange [cp_catalog_page_id] #9 + CometHashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] + CometProject [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id,p_promo_sk] + CometProject [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id,i_item_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_sk,cp_catalog_page_id] + CometProject [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + CometBroadcastHashJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss,d_date_sk] + CometProject [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss] + CometSortMergeJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + CometSort [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometExchange [cs_item_sk,cs_order_number] #10 + CometFilter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + ReusedExchange [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] #4 + ReusedExchange [d_date_sk] #5 + ReusedExchange [cp_catalog_page_sk,cp_catalog_page_id] #6 + ReusedExchange [i_item_sk] #7 + ReusedExchange [p_promo_sk] #8 + WholeStageCodegen (3) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + ReusedExchange [web_site_id,sum,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..9e095dbc96 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_iceberg_compat/explain.txt @@ -0,0 +1,574 @@ +== Physical Plan == +TakeOrderedAndProject (102) ++- * HashAggregate (101) + +- Exchange (100) + +- * HashAggregate (99) + +- * Expand (98) + +- Union (97) + :- * HashAggregate (38) + : +- * ColumnarToRow (37) + : +- CometExchange (36) + : +- CometHashAggregate (35) + : +- CometProject (34) + : +- CometBroadcastHashJoin (33) + : :- CometProject (28) + : : +- CometBroadcastHashJoin (27) + : : :- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometProject (17) + : : : : +- CometBroadcastHashJoin (16) + : : : : :- CometProject (11) + : : : : : +- CometSortMergeJoin (10) + : : : : : :- CometSort (4) + : : : : : : +- CometExchange (3) + : : : : : : +- CometFilter (2) + : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : +- CometSort (9) + : : : : : +- CometExchange (8) + : : : : : +- CometProject (7) + : : : : : +- CometFilter (6) + : : : : : +- CometScan parquet spark_catalog.default.store_returns (5) + : : : : +- CometBroadcastExchange (15) + : : : : +- CometProject (14) + : : : : +- CometFilter (13) + : : : : +- CometScan parquet spark_catalog.default.date_dim (12) + : : : +- CometBroadcastExchange (20) + : : : +- CometFilter (19) + : : : +- CometScan parquet spark_catalog.default.store (18) + : : +- CometBroadcastExchange (26) + : : +- CometProject (25) + : : +- CometFilter (24) + : : +- CometScan parquet spark_catalog.default.item (23) + : +- CometBroadcastExchange (32) + : +- CometProject (31) + : +- CometFilter (30) + : +- CometScan parquet spark_catalog.default.promotion (29) + :- * HashAggregate (67) + : +- * ColumnarToRow (66) + : +- CometExchange (65) + : +- CometHashAggregate (64) + : +- CometProject (63) + : +- CometBroadcastHashJoin (62) + : :- CometProject (60) + : : +- CometBroadcastHashJoin (59) + : : :- CometProject (57) + : : : +- CometBroadcastHashJoin (56) + : : : :- CometProject (52) + : : : : +- CometBroadcastHashJoin (51) + : : : : :- CometProject (49) + : : : : : +- CometSortMergeJoin (48) + : : : : : :- CometSort (42) + : : : : : : +- CometExchange (41) + : : : : : : +- CometFilter (40) + : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (39) + : : : : : +- CometSort (47) + : : : : : +- CometExchange (46) + : : : : : +- CometProject (45) + : : : : : +- CometFilter (44) + : : : : : +- CometScan parquet spark_catalog.default.catalog_returns (43) + : : : : +- ReusedExchange (50) + : : : +- CometBroadcastExchange (55) + : : : +- CometFilter (54) + : : : +- CometScan parquet spark_catalog.default.catalog_page (53) + : : +- ReusedExchange (58) + : +- ReusedExchange (61) + +- * HashAggregate (96) + +- * ColumnarToRow (95) + +- CometExchange (94) + +- CometHashAggregate (93) + +- CometProject (92) + +- CometBroadcastHashJoin (91) + :- CometProject (89) + : +- CometBroadcastHashJoin (88) + : :- CometProject (86) + : : +- CometBroadcastHashJoin (85) + : : :- CometProject (81) + : : : +- CometBroadcastHashJoin (80) + : : : :- CometProject (78) + : : : : +- CometSortMergeJoin (77) + : : : : :- CometSort (71) + : : : : : +- CometExchange (70) + : : : : : +- CometFilter (69) + : : : : : +- CometScan parquet spark_catalog.default.web_sales (68) + : : : : +- CometSort (76) + : : : : +- CometExchange (75) + : : : : +- CometProject (74) + : : : : +- CometFilter (73) + : : : : +- CometScan parquet spark_catalog.default.web_returns (72) + : : : +- ReusedExchange (79) + : : +- CometBroadcastExchange (84) + : : +- CometFilter (83) + : : +- CometScan parquet spark_catalog.default.web_site (82) + : +- ReusedExchange (87) + +- ReusedExchange (90) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(3) CometExchange +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7], [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST] + +(5) CometScan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(6) CometFilter +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(7) CometProject +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11], [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(8) CometExchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: hashpartitioning(sr_item_sk#8, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11], [sr_item_sk#8 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [ss_item_sk#1, ss_ticket_number#4], [sr_item_sk#8, sr_ticket_number#9], LeftOuter + +(11) CometProject +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11], [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] + +(12) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) CometFilter +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 2000-08-23)) AND (d_date#14 <= 2000-09-22)) AND isnotnull(d_date_sk#13)) + +(14) CometProject +Input [2]: [d_date_sk#13, d_date#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(15) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(16) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#7], [d_date_sk#13], Inner, BuildRight + +(17) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11], [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] + +(18) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_store_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) CometFilter +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) + +(20) CometBroadcastExchange +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: [s_store_sk#15, s_store_id#16] + +(21) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] +Right output [2]: [s_store_sk#15, s_store_id#16] +Arguments: [ss_store_sk#2], [s_store_sk#15], Inner, BuildRight + +(22) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_sk#15, s_store_id#16] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(23) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_current_price#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) CometFilter +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) + +(25) CometProject +Input [2]: [i_item_sk#17, i_current_price#18] +Arguments: [i_item_sk#17], [i_item_sk#17] + +(26) CometBroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: [i_item_sk#17] + +(27) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Right output [1]: [i_item_sk#17] +Arguments: [ss_item_sk#1], [i_item_sk#17], Inner, BuildRight + +(28) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, i_item_sk#17] +Arguments: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(29) CometScan parquet spark_catalog.default.promotion +Output [2]: [p_promo_sk#19, p_channel_tv#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(30) CometFilter +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) + +(31) CometProject +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Arguments: [p_promo_sk#19], [p_promo_sk#19] + +(32) CometBroadcastExchange +Input [1]: [p_promo_sk#19] +Arguments: [p_promo_sk#19] + +(33) CometBroadcastHashJoin +Left output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Right output [1]: [p_promo_sk#19] +Arguments: [ss_promo_sk#3], [p_promo_sk#19], Inner, BuildRight + +(34) CometProject +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, p_promo_sk#19] +Arguments: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(35) CometHashAggregate +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] + +(36) CometExchange +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(37) ColumnarToRow [codegen id : 1] +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] + +(38) HashAggregate [codegen id : 1] +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#26, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#27, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#28] +Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#26,17,2) AS sales#29, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#27 AS returns#30, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#28 AS profit#31, store channel AS channel#32, concat(store, s_store_id#16) AS id#33] + +(39) CometScan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#40)] +PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(40) CometFilter +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Condition : ((isnotnull(cs_catalog_page_sk#34) AND isnotnull(cs_item_sk#35)) AND isnotnull(cs_promo_sk#36)) + +(41) CometExchange +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Arguments: hashpartitioning(cs_item_sk#35, cs_order_number#37, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(42) CometSort +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40], [cs_item_sk#35 ASC NULLS FIRST, cs_order_number#37 ASC NULLS FIRST] + +(43) CometScan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44, cr_returned_date_sk#45] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(44) CometFilter +Input [5]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44, cr_returned_date_sk#45] +Condition : (isnotnull(cr_item_sk#41) AND isnotnull(cr_order_number#42)) + +(45) CometProject +Input [5]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44, cr_returned_date_sk#45] +Arguments: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44], [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] + +(46) CometExchange +Input [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: hashpartitioning(cr_item_sk#41, cr_order_number#42, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(47) CometSort +Input [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44], [cr_item_sk#41 ASC NULLS FIRST, cr_order_number#42 ASC NULLS FIRST] + +(48) CometSortMergeJoin +Left output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Right output [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cs_item_sk#35, cs_order_number#37], [cr_item_sk#41, cr_order_number#42], LeftOuter + +(49) CometProject +Input [11]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44], [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44] + +(50) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#46] + +(51) CometBroadcastHashJoin +Left output [8]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44] +Right output [1]: [d_date_sk#46] +Arguments: [cs_sold_date_sk#40], [d_date_sk#46], Inner, BuildRight + +(52) CometProject +Input [9]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44, d_date_sk#46] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44], [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44] + +(53) CometScan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#47, cp_catalog_page_id#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(54) CometFilter +Input [2]: [cp_catalog_page_sk#47, cp_catalog_page_id#48] +Condition : isnotnull(cp_catalog_page_sk#47) + +(55) CometBroadcastExchange +Input [2]: [cp_catalog_page_sk#47, cp_catalog_page_id#48] +Arguments: [cp_catalog_page_sk#47, cp_catalog_page_id#48] + +(56) CometBroadcastHashJoin +Left output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44] +Right output [2]: [cp_catalog_page_sk#47, cp_catalog_page_id#48] +Arguments: [cs_catalog_page_sk#34], [cp_catalog_page_sk#47], Inner, BuildRight + +(57) CometProject +Input [9]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_sk#47, cp_catalog_page_id#48] +Arguments: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48], [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] + +(58) ReusedExchange [Reuses operator id: 26] +Output [1]: [i_item_sk#49] + +(59) CometBroadcastHashJoin +Left output [7]: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] +Right output [1]: [i_item_sk#49] +Arguments: [cs_item_sk#35], [i_item_sk#49], Inner, BuildRight + +(60) CometProject +Input [8]: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48, i_item_sk#49] +Arguments: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48], [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] + +(61) ReusedExchange [Reuses operator id: 32] +Output [1]: [p_promo_sk#50] + +(62) CometBroadcastHashJoin +Left output [6]: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] +Right output [1]: [p_promo_sk#50] +Arguments: [cs_promo_sk#36], [p_promo_sk#50], Inner, BuildRight + +(63) CometProject +Input [7]: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48, p_promo_sk#50] +Arguments: [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48], [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] + +(64) CometHashAggregate +Input [5]: [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] +Keys [1]: [cp_catalog_page_id#48] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#38)), partial_sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))] + +(65) CometExchange +Input [6]: [cp_catalog_page_id#48, sum#51, sum#52, isEmpty#53, sum#54, isEmpty#55] +Arguments: hashpartitioning(cp_catalog_page_id#48, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(66) ColumnarToRow [codegen id : 2] +Input [6]: [cp_catalog_page_id#48, sum#51, sum#52, isEmpty#53, sum#54, isEmpty#55] + +(67) HashAggregate [codegen id : 2] +Input [6]: [cp_catalog_page_id#48, sum#51, sum#52, isEmpty#53, sum#54, isEmpty#55] +Keys [1]: [cp_catalog_page_id#48] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#38)), sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00)), sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#38))#56, sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00))#57, sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))#58] +Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#38))#56,17,2) AS sales#59, sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00))#57 AS returns#60, sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))#58 AS profit#61, catalog channel AS channel#62, concat(catalog_page, cp_catalog_page_id#48) AS id#63] + +(68) CometScan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#70)] +PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(69) CometFilter +Input [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70] +Condition : ((isnotnull(ws_web_site_sk#65) AND isnotnull(ws_item_sk#64)) AND isnotnull(ws_promo_sk#66)) + +(70) CometExchange +Input [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70] +Arguments: hashpartitioning(ws_item_sk#64, ws_order_number#67, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(71) CometSort +Input [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70] +Arguments: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70], [ws_item_sk#64 ASC NULLS FIRST, ws_order_number#67 ASC NULLS FIRST] + +(72) CometScan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74, wr_returned_date_sk#75] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(73) CometFilter +Input [5]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74, wr_returned_date_sk#75] +Condition : (isnotnull(wr_item_sk#71) AND isnotnull(wr_order_number#72)) + +(74) CometProject +Input [5]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74, wr_returned_date_sk#75] +Arguments: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74], [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74] + +(75) CometExchange +Input [4]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74] +Arguments: hashpartitioning(wr_item_sk#71, wr_order_number#72, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(76) CometSort +Input [4]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74] +Arguments: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74], [wr_item_sk#71 ASC NULLS FIRST, wr_order_number#72 ASC NULLS FIRST] + +(77) CometSortMergeJoin +Left output [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70] +Right output [4]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74] +Arguments: [ws_item_sk#64, ws_order_number#67], [wr_item_sk#71, wr_order_number#72], LeftOuter + +(78) CometProject +Input [11]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70, wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74] +Arguments: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70, wr_return_amt#73, wr_net_loss#74], [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70, wr_return_amt#73, wr_net_loss#74] + +(79) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#76] + +(80) CometBroadcastHashJoin +Left output [8]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70, wr_return_amt#73, wr_net_loss#74] +Right output [1]: [d_date_sk#76] +Arguments: [ws_sold_date_sk#70], [d_date_sk#76], Inner, BuildRight + +(81) CometProject +Input [9]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70, wr_return_amt#73, wr_net_loss#74, d_date_sk#76] +Arguments: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74], [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74] + +(82) CometScan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#77, web_site_id#78] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(83) CometFilter +Input [2]: [web_site_sk#77, web_site_id#78] +Condition : isnotnull(web_site_sk#77) + +(84) CometBroadcastExchange +Input [2]: [web_site_sk#77, web_site_id#78] +Arguments: [web_site_sk#77, web_site_id#78] + +(85) CometBroadcastHashJoin +Left output [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74] +Right output [2]: [web_site_sk#77, web_site_id#78] +Arguments: [ws_web_site_sk#65], [web_site_sk#77], Inner, BuildRight + +(86) CometProject +Input [9]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_sk#77, web_site_id#78] +Arguments: [ws_item_sk#64, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78], [ws_item_sk#64, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] + +(87) ReusedExchange [Reuses operator id: 26] +Output [1]: [i_item_sk#79] + +(88) CometBroadcastHashJoin +Left output [7]: [ws_item_sk#64, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] +Right output [1]: [i_item_sk#79] +Arguments: [ws_item_sk#64], [i_item_sk#79], Inner, BuildRight + +(89) CometProject +Input [8]: [ws_item_sk#64, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78, i_item_sk#79] +Arguments: [ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78], [ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] + +(90) ReusedExchange [Reuses operator id: 32] +Output [1]: [p_promo_sk#80] + +(91) CometBroadcastHashJoin +Left output [6]: [ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] +Right output [1]: [p_promo_sk#80] +Arguments: [ws_promo_sk#66], [p_promo_sk#80], Inner, BuildRight + +(92) CometProject +Input [7]: [ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78, p_promo_sk#80] +Arguments: [ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78], [ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] + +(93) CometHashAggregate +Input [5]: [ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] +Keys [1]: [web_site_id#78] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#68)), partial_sum(coalesce(cast(wr_return_amt#73 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#69 - coalesce(cast(wr_net_loss#74 as decimal(12,2)), 0.00)))] + +(94) CometExchange +Input [6]: [web_site_id#78, sum#81, sum#82, isEmpty#83, sum#84, isEmpty#85] +Arguments: hashpartitioning(web_site_id#78, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(95) ColumnarToRow [codegen id : 3] +Input [6]: [web_site_id#78, sum#81, sum#82, isEmpty#83, sum#84, isEmpty#85] + +(96) HashAggregate [codegen id : 3] +Input [6]: [web_site_id#78, sum#81, sum#82, isEmpty#83, sum#84, isEmpty#85] +Keys [1]: [web_site_id#78] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#68)), sum(coalesce(cast(wr_return_amt#73 as decimal(12,2)), 0.00)), sum((ws_net_profit#69 - coalesce(cast(wr_net_loss#74 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#68))#86, sum(coalesce(cast(wr_return_amt#73 as decimal(12,2)), 0.00))#87, sum((ws_net_profit#69 - coalesce(cast(wr_net_loss#74 as decimal(12,2)), 0.00)))#88] +Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#68))#86,17,2) AS sales#89, sum(coalesce(cast(wr_return_amt#73 as decimal(12,2)), 0.00))#87 AS returns#90, sum((ws_net_profit#69 - coalesce(cast(wr_net_loss#74 as decimal(12,2)), 0.00)))#88 AS profit#91, web channel AS channel#92, concat(web_site, web_site_id#78) AS id#93] + +(97) Union + +(98) Expand [codegen id : 4] +Input [5]: [sales#29, returns#30, profit#31, channel#32, id#33] +Arguments: [[sales#29, returns#30, profit#31, channel#32, id#33, 0], [sales#29, returns#30, profit#31, channel#32, null, 1], [sales#29, returns#30, profit#31, null, null, 3]], [sales#29, returns#30, profit#31, channel#94, id#95, spark_grouping_id#96] + +(99) HashAggregate [codegen id : 4] +Input [6]: [sales#29, returns#30, profit#31, channel#94, id#95, spark_grouping_id#96] +Keys [3]: [channel#94, id#95, spark_grouping_id#96] +Functions [3]: [partial_sum(sales#29), partial_sum(returns#30), partial_sum(profit#31)] +Aggregate Attributes [6]: [sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Results [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] + +(100) Exchange +Input [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Arguments: hashpartitioning(channel#94, id#95, spark_grouping_id#96, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(101) HashAggregate [codegen id : 5] +Input [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Keys [3]: [channel#94, id#95, spark_grouping_id#96] +Functions [3]: [sum(sales#29), sum(returns#30), sum(profit#31)] +Aggregate Attributes [3]: [sum(sales#29)#109, sum(returns#30)#110, sum(profit#31)#111] +Results [5]: [channel#94, id#95, sum(sales#29)#109 AS sales#112, sum(returns#30)#110 AS returns#113, sum(profit#31)#111 AS profit#114] + +(102) TakeOrderedAndProject +Input [5]: [channel#94, id#95, sales#112, returns#113, profit#114] +Arguments: 100, [channel#94 ASC NULLS FIRST, id#95 ASC NULLS FIRST], [channel#94, id#95, sales#112, returns#113, profit#114] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..d962e11056 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q80.native_iceberg_compat/simplified.txt @@ -0,0 +1,112 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (5) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (4) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (1) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExchange [s_store_id] #2 + CometHashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] + CometProject [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id,p_promo_sk] + CometProject [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id,i_item_sk] + CometProject [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_sk,s_store_id] + CometProject [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss] + CometSortMergeJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometSort [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometExchange [ss_item_sk,ss_ticket_number] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometExchange [sr_item_sk,sr_ticket_number] #4 + CometProject [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometFilter [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk,s_store_id] #6 + CometFilter [s_store_sk,s_store_id] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id] + CometBroadcastExchange [i_item_sk] #7 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_current_price] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price] + CometBroadcastExchange [p_promo_sk] #8 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_tv] + CometScan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv] + WholeStageCodegen (2) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExchange [cp_catalog_page_id] #9 + CometHashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] + CometProject [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id,p_promo_sk] + CometProject [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id,i_item_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_sk,cp_catalog_page_id] + CometProject [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + CometBroadcastHashJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss,d_date_sk] + CometProject [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss] + CometSortMergeJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + CometSort [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometExchange [cs_item_sk,cs_order_number] #10 + CometFilter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + CometExchange [cr_item_sk,cr_order_number] #11 + CometProject [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + CometFilter [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk] + ReusedExchange [d_date_sk] #5 + CometBroadcastExchange [cp_catalog_page_sk,cp_catalog_page_id] #12 + CometFilter [cp_catalog_page_sk,cp_catalog_page_id] + CometScan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + ReusedExchange [i_item_sk] #7 + ReusedExchange [p_promo_sk] #8 + WholeStageCodegen (3) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExchange [web_site_id] #13 + CometHashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] + CometProject [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + CometBroadcastHashJoin [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id,p_promo_sk] + CometProject [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + CometBroadcastHashJoin [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id,i_item_sk] + CometProject [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + CometBroadcastHashJoin [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_sk,web_site_id] + CometProject [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + CometBroadcastHashJoin [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss,d_date_sk] + CometProject [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss] + CometSortMergeJoin [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + CometSort [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometExchange [ws_item_sk,ws_order_number] #14 + CometFilter [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometSort [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + CometExchange [wr_item_sk,wr_order_number] #15 + CometProject [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + CometFilter [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + ReusedExchange [d_date_sk] #5 + CometBroadcastExchange [web_site_sk,web_site_id] #16 + CometFilter [web_site_sk,web_site_id] + CometScan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] + ReusedExchange [i_item_sk] #7 + ReusedExchange [p_promo_sk] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_datafusion/explain.txt new file mode 100644 index 0000000000..9e65a13766 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_datafusion/explain.txt @@ -0,0 +1,248 @@ +== Physical Plan == +* ColumnarToRow (47) ++- CometTakeOrderedAndProject (46) + +- CometProject (45) + +- CometBroadcastHashJoin (44) + :- CometProject (40) + : +- CometBroadcastHashJoin (39) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometFilter (17) + : : : +- CometHashAggregate (16) + : : : +- CometExchange (15) + : : : +- CometHashAggregate (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (9) + : : +- CometBroadcastExchange (33) + : : +- CometFilter (32) + : : +- CometHashAggregate (31) + : : +- CometExchange (30) + : : +- CometHashAggregate (29) + : : +- CometHashAggregate (28) + : : +- CometExchange (27) + : : +- CometHashAggregate (26) + : : +- CometProject (25) + : : +- CometBroadcastHashJoin (24) + : : :- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometFilter (19) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (18) + : : : +- ReusedExchange (20) + : : +- ReusedExchange (23) + : +- CometBroadcastExchange (38) + : +- CometFilter (37) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (36) + +- CometBroadcastExchange (43) + +- CometFilter (42) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (41) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Arguments: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] + +(2) CometFilter +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Condition : (isnotnull(cr_returning_addr_sk#2) AND isnotnull(cr_returning_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5, d_year#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [cr_returned_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4, d_date_sk#5] +Arguments: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3], [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#7, ca_state#8] +Arguments: [ca_address_sk#7, ca_state#8] + +(10) CometFilter +Input [2]: [ca_address_sk#7, ca_state#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) + +(11) CometBroadcastExchange +Input [2]: [ca_address_sk#7, ca_state#8] +Arguments: [ca_address_sk#7, ca_state#8] + +(12) CometBroadcastHashJoin +Left output [3]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3] +Right output [2]: [ca_address_sk#7, ca_state#8] +Arguments: [cr_returning_addr_sk#2], [ca_address_sk#7], Inner, BuildRight + +(13) CometProject +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, ca_address_sk#7, ca_state#8] +Arguments: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8], [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] + +(14) CometHashAggregate +Input [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#3))] + +(15) CometExchange +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#9] +Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#9] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))] + +(17) CometFilter +Input [3]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12] +Condition : isnotnull(ctr_total_return#12) + +(18) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [4]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, cr_returned_date_sk#16] +Arguments: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, cr_returned_date_sk#16] + +(19) CometFilter +Input [4]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, cr_returned_date_sk#16] +Condition : isnotnull(cr_returning_addr_sk#14) + +(20) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#17] + +(21) CometBroadcastHashJoin +Left output [4]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, cr_returned_date_sk#16] +Right output [1]: [d_date_sk#17] +Arguments: [cr_returned_date_sk#16], [d_date_sk#17], Inner, BuildRight + +(22) CometProject +Input [5]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, cr_returned_date_sk#16, d_date_sk#17] +Arguments: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15], [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15] + +(23) ReusedExchange [Reuses operator id: 11] +Output [2]: [ca_address_sk#18, ca_state#19] + +(24) CometBroadcastHashJoin +Left output [3]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15] +Right output [2]: [ca_address_sk#18, ca_state#19] +Arguments: [cr_returning_addr_sk#14], [ca_address_sk#18], Inner, BuildRight + +(25) CometProject +Input [5]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, ca_address_sk#18, ca_state#19] +Arguments: [cr_returning_customer_sk#13, cr_return_amt_inc_tax#15, ca_state#19], [cr_returning_customer_sk#13, cr_return_amt_inc_tax#15, ca_state#19] + +(26) CometHashAggregate +Input [3]: [cr_returning_customer_sk#13, cr_return_amt_inc_tax#15, ca_state#19] +Keys [2]: [cr_returning_customer_sk#13, ca_state#19] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#15))] + +(27) CometExchange +Input [3]: [cr_returning_customer_sk#13, ca_state#19, sum#20] +Arguments: hashpartitioning(cr_returning_customer_sk#13, ca_state#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(28) CometHashAggregate +Input [3]: [cr_returning_customer_sk#13, ca_state#19, sum#20] +Keys [2]: [cr_returning_customer_sk#13, ca_state#19] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#15))] + +(29) CometHashAggregate +Input [2]: [ctr_state#21, ctr_total_return#22] +Keys [1]: [ctr_state#21] +Functions [1]: [partial_avg(ctr_total_return#22)] + +(30) CometExchange +Input [3]: [ctr_state#21, sum#23, count#24] +Arguments: hashpartitioning(ctr_state#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(31) CometHashAggregate +Input [3]: [ctr_state#21, sum#23, count#24] +Keys [1]: [ctr_state#21] +Functions [1]: [avg(ctr_total_return#22)] + +(32) CometFilter +Input [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#25) + +(33) CometBroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] + +(34) CometBroadcastHashJoin +Left output [3]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12] +Right output [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [ctr_state#11], [ctr_state#21], Inner, (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#25), BuildRight + +(35) CometProject +Input [5]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [ctr_customer_sk#10, ctr_total_return#12], [ctr_customer_sk#10, ctr_total_return#12] + +(36) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [6]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Arguments: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] + +(37) CometFilter +Input [6]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Condition : (isnotnull(c_customer_sk#26) AND isnotnull(c_current_addr_sk#28)) + +(38) CometBroadcastExchange +Input [6]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Arguments: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] + +(39) CometBroadcastHashJoin +Left output [2]: [ctr_customer_sk#10, ctr_total_return#12] +Right output [6]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Arguments: [ctr_customer_sk#10], [c_customer_sk#26], Inner, BuildRight + +(40) CometProject +Input [8]: [ctr_customer_sk#10, ctr_total_return#12, c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Arguments: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31], [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] + +(41) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [12]: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] +Arguments: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] + +(42) CometFilter +Input [12]: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] +Condition : ((isnotnull(ca_state#39) AND (ca_state#39 = GA)) AND isnotnull(ca_address_sk#32)) + +(43) CometBroadcastExchange +Input [12]: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] +Arguments: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] + +(44) CometBroadcastHashJoin +Left output [6]: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Right output [12]: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] +Arguments: [c_current_addr_sk#28], [ca_address_sk#32], Inner, BuildRight + +(45) CometProject +Input [18]: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] +Arguments: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12], [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12] + +(46) CometTakeOrderedAndProject +Input [16]: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#27 ASC NULLS FIRST,c_salutation#29 ASC NULLS FIRST,c_first_name#30 ASC NULLS FIRST,c_last_name#31 ASC NULLS FIRST,ca_street_number#33 ASC NULLS FIRST,ca_street_name#34 ASC NULLS FIRST,ca_street_type#35 ASC NULLS FIRST,ca_suite_number#36 ASC NULLS FIRST,ca_city#37 ASC NULLS FIRST,ca_county#38 ASC NULLS FIRST,ca_state#39 ASC NULLS FIRST,ca_zip#40 ASC NULLS FIRST,ca_country#41 ASC NULLS FIRST,ca_gmt_offset#42 ASC NULLS FIRST,ca_location_type#43 ASC NULLS FIRST,ctr_total_return#12 ASC NULLS FIRST], output=[c_customer_id#27,c_salutation#29,c_first_name#30,c_last_name#31,ca_street_number#33,ca_street_name#34,ca_street_type#35,ca_suite_number#36,ca_city#37,ca_county#38,ca_state#39,ca_zip#40,ca_country#41,ca_gmt_offset#42,ca_location_type#43,ctr_total_return#12]), [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12], 100, [c_customer_id#27 ASC NULLS FIRST, c_salutation#29 ASC NULLS FIRST, c_first_name#30 ASC NULLS FIRST, c_last_name#31 ASC NULLS FIRST, ca_street_number#33 ASC NULLS FIRST, ca_street_name#34 ASC NULLS FIRST, ca_street_type#35 ASC NULLS FIRST, ca_suite_number#36 ASC NULLS FIRST, ca_city#37 ASC NULLS FIRST, ca_county#38 ASC NULLS FIRST, ca_state#39 ASC NULLS FIRST, ca_zip#40 ASC NULLS FIRST, ca_country#41 ASC NULLS FIRST, ca_gmt_offset#42 ASC NULLS FIRST, ca_location_type#43 ASC NULLS FIRST, ctr_total_return#12 ASC NULLS FIRST], [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12] + +(47) ColumnarToRow [codegen id : 1] +Input [16]: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_datafusion/simplified.txt new file mode 100644 index 0000000000..807434f4a6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_datafusion/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + CometProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + CometBroadcastHashJoin [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] + CometProject [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + CometBroadcastHashJoin [ctr_customer_sk,ctr_total_return,c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + CometProject [ctr_customer_sk,ctr_total_return] + CometBroadcastHashJoin [ctr_customer_sk,ctr_state,ctr_total_return,(avg(ctr_total_return) * 1.2),ctr_state] + CometFilter [ctr_customer_sk,ctr_state,ctr_total_return] + CometHashAggregate [ctr_customer_sk,ctr_state,ctr_total_return,cr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] + CometExchange [cr_returning_customer_sk,ca_state] #1 + CometHashAggregate [cr_returning_customer_sk,ca_state,sum,cr_return_amt_inc_tax] + CometProject [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,ca_address_sk,ca_state] + CometProject [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk,d_date_sk] + CometFilter [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [ca_address_sk,ca_state] #3 + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [(avg(ctr_total_return) * 1.2),ctr_state] #4 + CometFilter [(avg(ctr_total_return) * 1.2),ctr_state] + CometHashAggregate [(avg(ctr_total_return) * 1.2),ctr_state,sum,count,avg(ctr_total_return)] + CometExchange [ctr_state] #5 + CometHashAggregate [ctr_state,sum,count,ctr_total_return] + CometHashAggregate [ctr_state,ctr_total_return,cr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] + CometExchange [cr_returning_customer_sk,ca_state] #6 + CometHashAggregate [cr_returning_customer_sk,ca_state,sum,cr_return_amt_inc_tax] + CometProject [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,ca_address_sk,ca_state] + CometProject [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk,d_date_sk] + CometFilter [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + ReusedExchange [d_date_sk] #2 + ReusedExchange [ca_address_sk,ca_state] #3 + CometBroadcastExchange [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] #7 + CometFilter [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + CometBroadcastExchange [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] #8 + CometFilter [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..180eb62446 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_iceberg_compat/explain.txt @@ -0,0 +1,268 @@ +== Physical Plan == +* ColumnarToRow (47) ++- CometTakeOrderedAndProject (46) + +- CometProject (45) + +- CometBroadcastHashJoin (44) + :- CometProject (40) + : +- CometBroadcastHashJoin (39) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometFilter (17) + : : : +- CometHashAggregate (16) + : : : +- CometExchange (15) + : : : +- CometHashAggregate (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.catalog_returns (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometScan parquet spark_catalog.default.customer_address (9) + : : +- CometBroadcastExchange (33) + : : +- CometFilter (32) + : : +- CometHashAggregate (31) + : : +- CometExchange (30) + : : +- CometHashAggregate (29) + : : +- CometHashAggregate (28) + : : +- CometExchange (27) + : : +- CometHashAggregate (26) + : : +- CometProject (25) + : : +- CometBroadcastHashJoin (24) + : : :- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometFilter (19) + : : : : +- CometScan parquet spark_catalog.default.catalog_returns (18) + : : : +- ReusedExchange (20) + : : +- ReusedExchange (23) + : +- CometBroadcastExchange (38) + : +- CometFilter (37) + : +- CometScan parquet spark_catalog.default.customer (36) + +- CometBroadcastExchange (43) + +- CometFilter (42) + +- CometScan parquet spark_catalog.default.customer_address (41) + + +(1) CometScan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#4)] +PushedFilters: [IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Condition : (isnotnull(cr_returning_addr_sk#2) AND isnotnull(cr_returning_customer_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [cr_returned_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4, d_date_sk#5] +Arguments: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3], [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3] + +(9) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [ca_address_sk#7, ca_state#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) + +(11) CometBroadcastExchange +Input [2]: [ca_address_sk#7, ca_state#8] +Arguments: [ca_address_sk#7, ca_state#8] + +(12) CometBroadcastHashJoin +Left output [3]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3] +Right output [2]: [ca_address_sk#7, ca_state#8] +Arguments: [cr_returning_addr_sk#2], [ca_address_sk#7], Inner, BuildRight + +(13) CometProject +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, ca_address_sk#7, ca_state#8] +Arguments: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8], [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] + +(14) CometHashAggregate +Input [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#3))] + +(15) CometExchange +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#9] +Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#9] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))] + +(17) CometFilter +Input [3]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12] +Condition : isnotnull(ctr_total_return#12) + +(18) CometScan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, cr_returned_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#16)] +PushedFilters: [IsNotNull(cr_returning_addr_sk)] +ReadSchema: struct + +(19) CometFilter +Input [4]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, cr_returned_date_sk#16] +Condition : isnotnull(cr_returning_addr_sk#14) + +(20) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#17] + +(21) CometBroadcastHashJoin +Left output [4]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, cr_returned_date_sk#16] +Right output [1]: [d_date_sk#17] +Arguments: [cr_returned_date_sk#16], [d_date_sk#17], Inner, BuildRight + +(22) CometProject +Input [5]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, cr_returned_date_sk#16, d_date_sk#17] +Arguments: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15], [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15] + +(23) ReusedExchange [Reuses operator id: 11] +Output [2]: [ca_address_sk#18, ca_state#19] + +(24) CometBroadcastHashJoin +Left output [3]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15] +Right output [2]: [ca_address_sk#18, ca_state#19] +Arguments: [cr_returning_addr_sk#14], [ca_address_sk#18], Inner, BuildRight + +(25) CometProject +Input [5]: [cr_returning_customer_sk#13, cr_returning_addr_sk#14, cr_return_amt_inc_tax#15, ca_address_sk#18, ca_state#19] +Arguments: [cr_returning_customer_sk#13, cr_return_amt_inc_tax#15, ca_state#19], [cr_returning_customer_sk#13, cr_return_amt_inc_tax#15, ca_state#19] + +(26) CometHashAggregate +Input [3]: [cr_returning_customer_sk#13, cr_return_amt_inc_tax#15, ca_state#19] +Keys [2]: [cr_returning_customer_sk#13, ca_state#19] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#15))] + +(27) CometExchange +Input [3]: [cr_returning_customer_sk#13, ca_state#19, sum#20] +Arguments: hashpartitioning(cr_returning_customer_sk#13, ca_state#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(28) CometHashAggregate +Input [3]: [cr_returning_customer_sk#13, ca_state#19, sum#20] +Keys [2]: [cr_returning_customer_sk#13, ca_state#19] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#15))] + +(29) CometHashAggregate +Input [2]: [ctr_state#21, ctr_total_return#22] +Keys [1]: [ctr_state#21] +Functions [1]: [partial_avg(ctr_total_return#22)] + +(30) CometExchange +Input [3]: [ctr_state#21, sum#23, count#24] +Arguments: hashpartitioning(ctr_state#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(31) CometHashAggregate +Input [3]: [ctr_state#21, sum#23, count#24] +Keys [1]: [ctr_state#21] +Functions [1]: [avg(ctr_total_return#22)] + +(32) CometFilter +Input [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#25) + +(33) CometBroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] + +(34) CometBroadcastHashJoin +Left output [3]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12] +Right output [2]: [(avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [ctr_state#11], [ctr_state#21], Inner, (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#25), BuildRight + +(35) CometProject +Input [5]: [ctr_customer_sk#10, ctr_state#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#25, ctr_state#21] +Arguments: [ctr_customer_sk#10, ctr_total_return#12], [ctr_customer_sk#10, ctr_total_return#12] + +(36) CometScan parquet spark_catalog.default.customer +Output [6]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(37) CometFilter +Input [6]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Condition : (isnotnull(c_customer_sk#26) AND isnotnull(c_current_addr_sk#28)) + +(38) CometBroadcastExchange +Input [6]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Arguments: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] + +(39) CometBroadcastHashJoin +Left output [2]: [ctr_customer_sk#10, ctr_total_return#12] +Right output [6]: [c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Arguments: [ctr_customer_sk#10], [c_customer_sk#26], Inner, BuildRight + +(40) CometProject +Input [8]: [ctr_customer_sk#10, ctr_total_return#12, c_customer_sk#26, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Arguments: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31], [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] + +(41) CometScan parquet spark_catalog.default.customer_address +Output [12]: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(42) CometFilter +Input [12]: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] +Condition : ((isnotnull(ca_state#39) AND (ca_state#39 = GA)) AND isnotnull(ca_address_sk#32)) + +(43) CometBroadcastExchange +Input [12]: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] +Arguments: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] + +(44) CometBroadcastHashJoin +Left output [6]: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31] +Right output [12]: [ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] +Arguments: [c_current_addr_sk#28], [ca_address_sk#32], Inner, BuildRight + +(45) CometProject +Input [18]: [ctr_total_return#12, c_customer_id#27, c_current_addr_sk#28, c_salutation#29, c_first_name#30, c_last_name#31, ca_address_sk#32, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43] +Arguments: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12], [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12] + +(46) CometTakeOrderedAndProject +Input [16]: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#27 ASC NULLS FIRST,c_salutation#29 ASC NULLS FIRST,c_first_name#30 ASC NULLS FIRST,c_last_name#31 ASC NULLS FIRST,ca_street_number#33 ASC NULLS FIRST,ca_street_name#34 ASC NULLS FIRST,ca_street_type#35 ASC NULLS FIRST,ca_suite_number#36 ASC NULLS FIRST,ca_city#37 ASC NULLS FIRST,ca_county#38 ASC NULLS FIRST,ca_state#39 ASC NULLS FIRST,ca_zip#40 ASC NULLS FIRST,ca_country#41 ASC NULLS FIRST,ca_gmt_offset#42 ASC NULLS FIRST,ca_location_type#43 ASC NULLS FIRST,ctr_total_return#12 ASC NULLS FIRST], output=[c_customer_id#27,c_salutation#29,c_first_name#30,c_last_name#31,ca_street_number#33,ca_street_name#34,ca_street_type#35,ca_suite_number#36,ca_city#37,ca_county#38,ca_state#39,ca_zip#40,ca_country#41,ca_gmt_offset#42,ca_location_type#43,ctr_total_return#12]), [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12], 100, [c_customer_id#27 ASC NULLS FIRST, c_salutation#29 ASC NULLS FIRST, c_first_name#30 ASC NULLS FIRST, c_last_name#31 ASC NULLS FIRST, ca_street_number#33 ASC NULLS FIRST, ca_street_name#34 ASC NULLS FIRST, ca_street_type#35 ASC NULLS FIRST, ca_suite_number#36 ASC NULLS FIRST, ca_city#37 ASC NULLS FIRST, ca_county#38 ASC NULLS FIRST, ca_state#39 ASC NULLS FIRST, ca_zip#40 ASC NULLS FIRST, ca_country#41 ASC NULLS FIRST, ca_gmt_offset#42 ASC NULLS FIRST, ca_location_type#43 ASC NULLS FIRST, ctr_total_return#12 ASC NULLS FIRST], [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12] + +(47) ColumnarToRow [codegen id : 1] +Input [16]: [c_customer_id#27, c_salutation#29, c_first_name#30, c_last_name#31, ca_street_number#33, ca_street_name#34, ca_street_type#35, ca_suite_number#36, ca_city#37, ca_county#38, ca_state#39, ca_zip#40, ca_country#41, ca_gmt_offset#42, ca_location_type#43, ctr_total_return#12] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..243953ba90 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q81.native_iceberg_compat/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + CometProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + CometBroadcastHashJoin [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] + CometProject [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + CometBroadcastHashJoin [ctr_customer_sk,ctr_total_return,c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + CometProject [ctr_customer_sk,ctr_total_return] + CometBroadcastHashJoin [ctr_customer_sk,ctr_state,ctr_total_return,(avg(ctr_total_return) * 1.2),ctr_state] + CometFilter [ctr_customer_sk,ctr_state,ctr_total_return] + CometHashAggregate [ctr_customer_sk,ctr_state,ctr_total_return,cr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] + CometExchange [cr_returning_customer_sk,ca_state] #1 + CometHashAggregate [cr_returning_customer_sk,ca_state,sum,cr_return_amt_inc_tax] + CometProject [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,ca_address_sk,ca_state] + CometProject [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk,d_date_sk] + CometFilter [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [ca_address_sk,ca_state] #3 + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + CometBroadcastExchange [(avg(ctr_total_return) * 1.2),ctr_state] #4 + CometFilter [(avg(ctr_total_return) * 1.2),ctr_state] + CometHashAggregate [(avg(ctr_total_return) * 1.2),ctr_state,sum,count,avg(ctr_total_return)] + CometExchange [ctr_state] #5 + CometHashAggregate [ctr_state,sum,count,ctr_total_return] + CometHashAggregate [ctr_state,ctr_total_return,cr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] + CometExchange [cr_returning_customer_sk,ca_state] #6 + CometHashAggregate [cr_returning_customer_sk,ca_state,sum,cr_return_amt_inc_tax] + CometProject [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,ca_address_sk,ca_state] + CometProject [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk,d_date_sk] + CometFilter [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + ReusedExchange [d_date_sk] #2 + ReusedExchange [ca_address_sk,ca_state] #3 + CometBroadcastExchange [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] #7 + CometFilter [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + CometBroadcastExchange [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] #8 + CometFilter [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_datafusion/explain.txt new file mode 100644 index 0000000000..fdcea86558 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_datafusion/explain.txt @@ -0,0 +1,137 @@ +== Physical Plan == +* ColumnarToRow (26) ++- CometTakeOrderedAndProject (25) + +- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometBroadcastHashJoin (20) + :- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : +- CometBroadcastExchange (7) + : : +- CometProject (6) + : : +- CometFilter (5) + : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (4) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (10) + +- CometProject (19) + +- CometFilter (18) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (17) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(2) CometFilter +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 62.00)) AND (i_current_price#4 <= 92.00)) AND i_manufact_id#5 IN (129,270,821,423)) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(4) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(5) CometFilter +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) + +(6) CometProject +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8], [inv_item_sk#6, inv_date_sk#8] + +(7) CometBroadcastExchange +Input [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1], [inv_item_sk#6], Inner, BuildRight + +(9) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] + +(10) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9, d_date#10] + +(11) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-05-25)) AND (d_date#10 <= 2000-07-24)) AND isnotnull(d_date_sk#9)) + +(12) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(14) CometBroadcastHashJoin +Left output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [inv_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(15) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#9] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(16) CometBroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(17) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#11, ss_sold_date_sk#12] + +(18) CometFilter +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_item_sk#11) + +(19) CometProject +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#11], [ss_item_sk#11] + +(20) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [1]: [ss_item_sk#11] +Arguments: [i_item_sk#1], [ss_item_sk#11], Inner, BuildLeft + +(21) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#11] +Arguments: [i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(22) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(23) CometExchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(25) CometTakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#2 ASC NULLS FIRST], output=[i_item_id#2,i_item_desc#3,i_current_price#4]), [i_item_id#2, i_item_desc#3, i_current_price#4], 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(26) ColumnarToRow [codegen id : 1] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b8535baaf2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_datafusion/simplified.txt @@ -0,0 +1,28 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometExchange [i_item_id,i_item_desc,i_current_price] #1 + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometProject [i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,ss_item_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price] #2 + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk,d_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_item_sk,inv_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometBroadcastExchange [inv_item_sk,inv_date_sk] #3 + CometProject [inv_item_sk,inv_date_sk] + CometFilter [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometProject [ss_item_sk] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..4d002c304b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_iceberg_compat/explain.txt @@ -0,0 +1,150 @@ +== Physical Plan == +* ColumnarToRow (26) ++- CometTakeOrderedAndProject (25) + +- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometBroadcastHashJoin (20) + :- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.item (1) + : : +- CometBroadcastExchange (7) + : : +- CometProject (6) + : : +- CometFilter (5) + : : +- CometScan parquet spark_catalog.default.inventory (4) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometScan parquet spark_catalog.default.date_dim (10) + +- CometProject (19) + +- CometFilter (18) + +- CometScan parquet spark_catalog.default.store_sales (17) + + +(1) CometScan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), LessThanOrEqual(i_current_price,92.00), In(i_manufact_id, [129,270,423,821]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 62.00)) AND (i_current_price#4 <= 92.00)) AND i_manufact_id#5 IN (129,270,821,423)) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(4) CometScan parquet spark_catalog.default.inventory +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#8)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) + +(6) CometProject +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8], [inv_item_sk#6, inv_date_sk#8] + +(7) CometBroadcastExchange +Input [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1], [inv_item_sk#6], Inner, BuildRight + +(9) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] + +(10) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-05-25)) AND (d_date#10 <= 2000-07-24)) AND isnotnull(d_date_sk#9)) + +(12) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(14) CometBroadcastHashJoin +Left output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [inv_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(15) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#9] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(16) CometBroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(17) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(18) CometFilter +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_item_sk#11) + +(19) CometProject +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#11], [ss_item_sk#11] + +(20) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [1]: [ss_item_sk#11] +Arguments: [i_item_sk#1], [ss_item_sk#11], Inner, BuildLeft + +(21) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#11] +Arguments: [i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(22) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(23) CometExchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(25) CometTakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#2 ASC NULLS FIRST], output=[i_item_id#2,i_item_desc#3,i_current_price#4]), [i_item_id#2, i_item_desc#3, i_current_price#4], 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(26) ColumnarToRow [codegen id : 1] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..df434a754b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q82.native_iceberg_compat/simplified.txt @@ -0,0 +1,28 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometExchange [i_item_id,i_item_desc,i_current_price] #1 + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometProject [i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,ss_item_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price] #2 + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk,d_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_item_sk,inv_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometBroadcastExchange [inv_item_sk,inv_date_sk] #3 + CometProject [inv_item_sk,inv_date_sk] + CometFilter [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometProject [ss_item_sk] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_datafusion/explain.txt new file mode 100644 index 0000000000..e02d453853 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_datafusion/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +* ColumnarToRow (35) ++- CometTakeOrderedAndProject (34) + +- CometProject (33) + +- CometBroadcastHashJoin (32) + :- CometProject (30) + : +- CometBroadcastHashJoin (29) + : :- CometHashAggregate (25) + : : +- CometExchange (24) + : : +- CometHashAggregate (23) + : : +- CometProject (22) + : : +- CometBroadcastHashJoin (21) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : +- CometBroadcastExchange (20) + : : +- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (17) + : : +- CometProject (16) + : : +- CometBroadcastHashJoin (15) + : : :- CometNativeScan: `spark_catalog`.`default`.`date_dim` (10) + : : +- CometBroadcastExchange (14) + : : +- CometProject (13) + : : +- CometFilter (12) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (11) + : +- CometBroadcastExchange (28) + : +- CometHashAggregate (27) + : +- ReusedExchange (26) + +- ReusedExchange (31) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Arguments: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] + +(2) CometFilter +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Condition : isnotnull(sr_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#4, i_item_id#5] +Arguments: [i_item_sk#4, i_item_id#5] + +(4) CometFilter +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(5) CometBroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: [i_item_sk#4, i_item_id#5] + +(6) CometBroadcastHashJoin +Left output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Right output [2]: [i_item_sk#4, i_item_id#5] +Arguments: [sr_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [5]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, i_item_sk#4, i_item_id#5] +Arguments: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5], [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#6, d_date#7] +Arguments: [d_date_sk#6, d_date#7] + +(9) CometFilter +Input [2]: [d_date_sk#6, d_date#7] +Condition : isnotnull(d_date_sk#6) + +(10) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date#8, d_week_seq#9] +Arguments: [d_date#8, d_week_seq#9] + +(11) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date#10, d_week_seq#11] +Arguments: [d_date#10, d_week_seq#11] + +(12) CometFilter +Input [2]: [d_date#10, d_week_seq#11] +Condition : cast(d_date#10 as string) IN (2000-06-30,2000-09-27,2000-11-17) + +(13) CometProject +Input [2]: [d_date#10, d_week_seq#11] +Arguments: [d_week_seq#11], [d_week_seq#11] + +(14) CometBroadcastExchange +Input [1]: [d_week_seq#11] +Arguments: [d_week_seq#11] + +(15) CometBroadcastHashJoin +Left output [2]: [d_date#8, d_week_seq#9] +Right output [1]: [d_week_seq#11] +Arguments: [d_week_seq#9], [d_week_seq#11], LeftSemi, BuildRight + +(16) CometProject +Input [2]: [d_date#8, d_week_seq#9] +Arguments: [d_date#8], [d_date#8] + +(17) CometBroadcastExchange +Input [1]: [d_date#8] +Arguments: [d_date#8] + +(18) CometBroadcastHashJoin +Left output [2]: [d_date_sk#6, d_date#7] +Right output [1]: [d_date#8] +Arguments: [d_date#7], [d_date#8], LeftSemi, BuildRight + +(19) CometProject +Input [2]: [d_date_sk#6, d_date#7] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(20) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(21) CometBroadcastHashJoin +Left output [3]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5] +Right output [1]: [d_date_sk#6] +Arguments: [sr_returned_date_sk#3], [d_date_sk#6], Inner, BuildRight + +(22) CometProject +Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5, d_date_sk#6] +Arguments: [sr_return_quantity#2, i_item_id#5], [sr_return_quantity#2, i_item_id#5] + +(23) CometHashAggregate +Input [2]: [sr_return_quantity#2, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(sr_return_quantity#2)] + +(24) CometExchange +Input [2]: [i_item_id#5, sum#12] +Arguments: hashpartitioning(i_item_id#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(25) CometHashAggregate +Input [2]: [i_item_id#5, sum#12] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(sr_return_quantity#2)] + +(26) ReusedExchange [Reuses operator id: 24] +Output [2]: [i_item_id#13, sum#14] + +(27) CometHashAggregate +Input [2]: [i_item_id#13, sum#14] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(cr_return_quantity#15)] + +(28) CometBroadcastExchange +Input [2]: [item_id#16, cr_item_qty#17] +Arguments: [item_id#16, cr_item_qty#17] + +(29) CometBroadcastHashJoin +Left output [2]: [item_id#18, sr_item_qty#19] +Right output [2]: [item_id#16, cr_item_qty#17] +Arguments: [item_id#18], [item_id#16], Inner, BuildRight + +(30) CometProject +Input [4]: [item_id#18, sr_item_qty#19, item_id#16, cr_item_qty#17] +Arguments: [item_id#18, sr_item_qty#19, cr_item_qty#17], [item_id#18, sr_item_qty#19, cr_item_qty#17] + +(31) ReusedExchange [Reuses operator id: 28] +Output [2]: [item_id#20, wr_item_qty#21] + +(32) CometBroadcastHashJoin +Left output [3]: [item_id#18, sr_item_qty#19, cr_item_qty#17] +Right output [2]: [item_id#20, wr_item_qty#21] +Arguments: [item_id#18], [item_id#20], Inner, BuildRight + +(33) CometProject +Input [5]: [item_id#18, sr_item_qty#19, cr_item_qty#17, item_id#20, wr_item_qty#21] +Arguments: [item_id#18, sr_item_qty#19, sr_dev#22, cr_item_qty#17, cr_dev#23, wr_item_qty#21, wr_dev#24, average#25], [item_id#18, sr_item_qty#19, (((cast(sr_item_qty#19 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(((sr_item_qty#19 + cr_item_qty#17) + wr_item_qty#21) as double)))) / 3.0) * 100.0) AS sr_dev#22, cr_item_qty#17, (((cast(cr_item_qty#17 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(((sr_item_qty#19 + cr_item_qty#17) + wr_item_qty#21) as double)))) / 3.0) * 100.0) AS cr_dev#23, wr_item_qty#21, (((cast(wr_item_qty#21 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(((sr_item_qty#19 + cr_item_qty#17) + wr_item_qty#21) as double)))) / 3.0) * 100.0) AS wr_dev#24, (cast(((sr_item_qty#19 + cr_item_qty#17) + wr_item_qty#21) as decimal(20,0)) / 3.0) AS average#25] + +(34) CometTakeOrderedAndProject +Input [8]: [item_id#18, sr_item_qty#19, sr_dev#22, cr_item_qty#17, cr_dev#23, wr_item_qty#21, wr_dev#24, average#25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[item_id#18 ASC NULLS FIRST,sr_item_qty#19 ASC NULLS FIRST], output=[item_id#18,sr_item_qty#19,sr_dev#22,cr_item_qty#17,cr_dev#23,wr_item_qty#21,wr_dev#24,average#25]), [item_id#18, sr_item_qty#19, sr_dev#22, cr_item_qty#17, cr_dev#23, wr_item_qty#21, wr_dev#24, average#25], 100, [item_id#18 ASC NULLS FIRST, sr_item_qty#19 ASC NULLS FIRST], [item_id#18, sr_item_qty#19, sr_dev#22, cr_item_qty#17, cr_dev#23, wr_item_qty#21, wr_dev#24, average#25] + +(35) ColumnarToRow [codegen id : 1] +Input [8]: [item_id#18, sr_item_qty#19, sr_dev#22, cr_item_qty#17, cr_dev#23, wr_item_qty#21, wr_dev#24, average#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f8447c7147 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_datafusion/simplified.txt @@ -0,0 +1,37 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] + CometProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] + CometBroadcastHashJoin [item_id,sr_item_qty,cr_item_qty,item_id,wr_item_qty] + CometProject [item_id,sr_item_qty,cr_item_qty] + CometBroadcastHashJoin [item_id,sr_item_qty,item_id,cr_item_qty] + CometHashAggregate [item_id,sr_item_qty,i_item_id,sum,sum(sr_return_quantity)] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,sr_return_quantity] + CometProject [sr_return_quantity,i_item_id] + CometBroadcastHashJoin [sr_return_quantity,sr_returned_date_sk,i_item_id,d_date_sk] + CometProject [sr_return_quantity,sr_returned_date_sk,i_item_id] + CometBroadcastHashJoin [sr_item_sk,sr_return_quantity,sr_returned_date_sk,i_item_sk,i_item_id] + CometFilter [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id] #2 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometBroadcastHashJoin [d_date_sk,d_date,d_date] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [d_date] #4 + CometProject [d_date] + CometBroadcastHashJoin [d_date,d_week_seq,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date,d_week_seq] + CometBroadcastExchange [d_week_seq] #5 + CometProject [d_week_seq] + CometFilter [d_date,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date,d_week_seq] + CometBroadcastExchange [item_id,cr_item_qty] #6 + CometHashAggregate [item_id,cr_item_qty,i_item_id,sum,sum(cr_return_quantity)] + ReusedExchange [i_item_id,sum] #1 + ReusedExchange [item_id,wr_item_qty] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..53b2ef4424 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_iceberg_compat/explain.txt @@ -0,0 +1,311 @@ +== Physical Plan == +* ColumnarToRow (55) ++- CometTakeOrderedAndProject (54) + +- CometProject (53) + +- CometBroadcastHashJoin (52) + :- CometProject (39) + : +- CometBroadcastHashJoin (38) + : :- CometHashAggregate (25) + : : +- CometExchange (24) + : : +- CometHashAggregate (23) + : : +- CometProject (22) + : : +- CometBroadcastHashJoin (21) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_returns (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.item (3) + : : +- CometBroadcastExchange (20) + : : +- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (17) + : : +- CometProject (16) + : : +- CometBroadcastHashJoin (15) + : : :- CometScan parquet spark_catalog.default.date_dim (10) + : : +- CometBroadcastExchange (14) + : : +- CometProject (13) + : : +- CometFilter (12) + : : +- CometScan parquet spark_catalog.default.date_dim (11) + : +- CometBroadcastExchange (37) + : +- CometHashAggregate (36) + : +- CometExchange (35) + : +- CometHashAggregate (34) + : +- CometProject (33) + : +- CometBroadcastHashJoin (32) + : :- CometProject (30) + : : +- CometBroadcastHashJoin (29) + : : :- CometFilter (27) + : : : +- CometScan parquet spark_catalog.default.catalog_returns (26) + : : +- ReusedExchange (28) + : +- ReusedExchange (31) + +- CometBroadcastExchange (51) + +- CometHashAggregate (50) + +- CometExchange (49) + +- CometHashAggregate (48) + +- CometProject (47) + +- CometBroadcastHashJoin (46) + :- CometProject (44) + : +- CometBroadcastHashJoin (43) + : :- CometFilter (41) + : : +- CometScan parquet spark_catalog.default.web_returns (40) + : +- ReusedExchange (42) + +- ReusedExchange (45) + + +(1) CometScan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#3)] +PushedFilters: [IsNotNull(sr_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Condition : isnotnull(sr_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_item_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(5) CometBroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: [i_item_sk#4, i_item_id#5] + +(6) CometBroadcastHashJoin +Left output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Right output [2]: [i_item_sk#4, i_item_id#5] +Arguments: [sr_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [5]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, i_item_sk#4, i_item_id#5] +Arguments: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5], [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_date#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#6, d_date#7] +Condition : isnotnull(d_date_sk#6) + +(10) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +ReadSchema: struct + +(11) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date#10, d_week_seq#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +ReadSchema: struct + +(12) CometFilter +Input [2]: [d_date#10, d_week_seq#11] +Condition : cast(d_date#10 as string) IN (2000-06-30,2000-09-27,2000-11-17) + +(13) CometProject +Input [2]: [d_date#10, d_week_seq#11] +Arguments: [d_week_seq#11], [d_week_seq#11] + +(14) CometBroadcastExchange +Input [1]: [d_week_seq#11] +Arguments: [d_week_seq#11] + +(15) CometBroadcastHashJoin +Left output [2]: [d_date#8, d_week_seq#9] +Right output [1]: [d_week_seq#11] +Arguments: [d_week_seq#9], [d_week_seq#11], LeftSemi, BuildRight + +(16) CometProject +Input [2]: [d_date#8, d_week_seq#9] +Arguments: [d_date#8], [d_date#8] + +(17) CometBroadcastExchange +Input [1]: [d_date#8] +Arguments: [d_date#8] + +(18) CometBroadcastHashJoin +Left output [2]: [d_date_sk#6, d_date#7] +Right output [1]: [d_date#8] +Arguments: [d_date#7], [d_date#8], LeftSemi, BuildRight + +(19) CometProject +Input [2]: [d_date_sk#6, d_date#7] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(20) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(21) CometBroadcastHashJoin +Left output [3]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5] +Right output [1]: [d_date_sk#6] +Arguments: [sr_returned_date_sk#3], [d_date_sk#6], Inner, BuildRight + +(22) CometProject +Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5, d_date_sk#6] +Arguments: [sr_return_quantity#2, i_item_id#5], [sr_return_quantity#2, i_item_id#5] + +(23) CometHashAggregate +Input [2]: [sr_return_quantity#2, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(sr_return_quantity#2)] + +(24) CometExchange +Input [2]: [i_item_id#5, sum#12] +Arguments: hashpartitioning(i_item_id#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(25) CometHashAggregate +Input [2]: [i_item_id#5, sum#12] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(sr_return_quantity#2)] + +(26) CometScan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#15)] +PushedFilters: [IsNotNull(cr_item_sk)] +ReadSchema: struct + +(27) CometFilter +Input [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] +Condition : isnotnull(cr_item_sk#13) + +(28) ReusedExchange [Reuses operator id: 5] +Output [2]: [i_item_sk#16, i_item_id#17] + +(29) CometBroadcastHashJoin +Left output [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] +Right output [2]: [i_item_sk#16, i_item_id#17] +Arguments: [cr_item_sk#13], [i_item_sk#16], Inner, BuildRight + +(30) CometProject +Input [5]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15, i_item_sk#16, i_item_id#17] +Arguments: [cr_return_quantity#14, cr_returned_date_sk#15, i_item_id#17], [cr_return_quantity#14, cr_returned_date_sk#15, i_item_id#17] + +(31) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#18] + +(32) CometBroadcastHashJoin +Left output [3]: [cr_return_quantity#14, cr_returned_date_sk#15, i_item_id#17] +Right output [1]: [d_date_sk#18] +Arguments: [cr_returned_date_sk#15], [d_date_sk#18], Inner, BuildRight + +(33) CometProject +Input [4]: [cr_return_quantity#14, cr_returned_date_sk#15, i_item_id#17, d_date_sk#18] +Arguments: [cr_return_quantity#14, i_item_id#17], [cr_return_quantity#14, i_item_id#17] + +(34) CometHashAggregate +Input [2]: [cr_return_quantity#14, i_item_id#17] +Keys [1]: [i_item_id#17] +Functions [1]: [partial_sum(cr_return_quantity#14)] + +(35) CometExchange +Input [2]: [i_item_id#17, sum#19] +Arguments: hashpartitioning(i_item_id#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(36) CometHashAggregate +Input [2]: [i_item_id#17, sum#19] +Keys [1]: [i_item_id#17] +Functions [1]: [sum(cr_return_quantity#14)] + +(37) CometBroadcastExchange +Input [2]: [item_id#20, cr_item_qty#21] +Arguments: [item_id#20, cr_item_qty#21] + +(38) CometBroadcastHashJoin +Left output [2]: [item_id#22, sr_item_qty#23] +Right output [2]: [item_id#20, cr_item_qty#21] +Arguments: [item_id#22], [item_id#20], Inner, BuildRight + +(39) CometProject +Input [4]: [item_id#22, sr_item_qty#23, item_id#20, cr_item_qty#21] +Arguments: [item_id#22, sr_item_qty#23, cr_item_qty#21], [item_id#22, sr_item_qty#23, cr_item_qty#21] + +(40) CometScan parquet spark_catalog.default.web_returns +Output [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#26)] +PushedFilters: [IsNotNull(wr_item_sk)] +ReadSchema: struct + +(41) CometFilter +Input [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] +Condition : isnotnull(wr_item_sk#24) + +(42) ReusedExchange [Reuses operator id: 5] +Output [2]: [i_item_sk#27, i_item_id#28] + +(43) CometBroadcastHashJoin +Left output [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] +Right output [2]: [i_item_sk#27, i_item_id#28] +Arguments: [wr_item_sk#24], [i_item_sk#27], Inner, BuildRight + +(44) CometProject +Input [5]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26, i_item_sk#27, i_item_id#28] +Arguments: [wr_return_quantity#25, wr_returned_date_sk#26, i_item_id#28], [wr_return_quantity#25, wr_returned_date_sk#26, i_item_id#28] + +(45) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#29] + +(46) CometBroadcastHashJoin +Left output [3]: [wr_return_quantity#25, wr_returned_date_sk#26, i_item_id#28] +Right output [1]: [d_date_sk#29] +Arguments: [wr_returned_date_sk#26], [d_date_sk#29], Inner, BuildRight + +(47) CometProject +Input [4]: [wr_return_quantity#25, wr_returned_date_sk#26, i_item_id#28, d_date_sk#29] +Arguments: [wr_return_quantity#25, i_item_id#28], [wr_return_quantity#25, i_item_id#28] + +(48) CometHashAggregate +Input [2]: [wr_return_quantity#25, i_item_id#28] +Keys [1]: [i_item_id#28] +Functions [1]: [partial_sum(wr_return_quantity#25)] + +(49) CometExchange +Input [2]: [i_item_id#28, sum#30] +Arguments: hashpartitioning(i_item_id#28, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(50) CometHashAggregate +Input [2]: [i_item_id#28, sum#30] +Keys [1]: [i_item_id#28] +Functions [1]: [sum(wr_return_quantity#25)] + +(51) CometBroadcastExchange +Input [2]: [item_id#31, wr_item_qty#32] +Arguments: [item_id#31, wr_item_qty#32] + +(52) CometBroadcastHashJoin +Left output [3]: [item_id#22, sr_item_qty#23, cr_item_qty#21] +Right output [2]: [item_id#31, wr_item_qty#32] +Arguments: [item_id#22], [item_id#31], Inner, BuildRight + +(53) CometProject +Input [5]: [item_id#22, sr_item_qty#23, cr_item_qty#21, item_id#31, wr_item_qty#32] +Arguments: [item_id#22, sr_item_qty#23, sr_dev#33, cr_item_qty#21, cr_dev#34, wr_item_qty#32, wr_dev#35, average#36], [item_id#22, sr_item_qty#23, (((cast(sr_item_qty#23 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(((sr_item_qty#23 + cr_item_qty#21) + wr_item_qty#32) as double)))) / 3.0) * 100.0) AS sr_dev#33, cr_item_qty#21, (((cast(cr_item_qty#21 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(((sr_item_qty#23 + cr_item_qty#21) + wr_item_qty#32) as double)))) / 3.0) * 100.0) AS cr_dev#34, wr_item_qty#32, (((cast(wr_item_qty#32 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(((sr_item_qty#23 + cr_item_qty#21) + wr_item_qty#32) as double)))) / 3.0) * 100.0) AS wr_dev#35, (cast(((sr_item_qty#23 + cr_item_qty#21) + wr_item_qty#32) as decimal(20,0)) / 3.0) AS average#36] + +(54) CometTakeOrderedAndProject +Input [8]: [item_id#22, sr_item_qty#23, sr_dev#33, cr_item_qty#21, cr_dev#34, wr_item_qty#32, wr_dev#35, average#36] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[item_id#22 ASC NULLS FIRST,sr_item_qty#23 ASC NULLS FIRST], output=[item_id#22,sr_item_qty#23,sr_dev#33,cr_item_qty#21,cr_dev#34,wr_item_qty#32,wr_dev#35,average#36]), [item_id#22, sr_item_qty#23, sr_dev#33, cr_item_qty#21, cr_dev#34, wr_item_qty#32, wr_dev#35, average#36], 100, [item_id#22 ASC NULLS FIRST, sr_item_qty#23 ASC NULLS FIRST], [item_id#22, sr_item_qty#23, sr_dev#33, cr_item_qty#21, cr_dev#34, wr_item_qty#32, wr_dev#35, average#36] + +(55) ColumnarToRow [codegen id : 1] +Input [8]: [item_id#22, sr_item_qty#23, sr_dev#33, cr_item_qty#21, cr_dev#34, wr_item_qty#32, wr_dev#35, average#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..4b04d604b3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q83.native_iceberg_compat/simplified.txt @@ -0,0 +1,57 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] + CometProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] + CometBroadcastHashJoin [item_id,sr_item_qty,cr_item_qty,item_id,wr_item_qty] + CometProject [item_id,sr_item_qty,cr_item_qty] + CometBroadcastHashJoin [item_id,sr_item_qty,item_id,cr_item_qty] + CometHashAggregate [item_id,sr_item_qty,i_item_id,sum,sum(sr_return_quantity)] + CometExchange [i_item_id] #1 + CometHashAggregate [i_item_id,sum,sr_return_quantity] + CometProject [sr_return_quantity,i_item_id] + CometBroadcastHashJoin [sr_return_quantity,sr_returned_date_sk,i_item_id,d_date_sk] + CometProject [sr_return_quantity,sr_returned_date_sk,i_item_id] + CometBroadcastHashJoin [sr_item_sk,sr_return_quantity,sr_returned_date_sk,i_item_sk,i_item_id] + CometFilter [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id] #2 + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometBroadcastHashJoin [d_date_sk,d_date,d_date] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [d_date] #4 + CometProject [d_date] + CometBroadcastHashJoin [d_date,d_week_seq,d_week_seq] + CometScan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + CometBroadcastExchange [d_week_seq] #5 + CometProject [d_week_seq] + CometFilter [d_date,d_week_seq] + CometScan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + CometBroadcastExchange [item_id,cr_item_qty] #6 + CometHashAggregate [item_id,cr_item_qty,i_item_id,sum,sum(cr_return_quantity)] + CometExchange [i_item_id] #7 + CometHashAggregate [i_item_id,sum,cr_return_quantity] + CometProject [cr_return_quantity,i_item_id] + CometBroadcastHashJoin [cr_return_quantity,cr_returned_date_sk,i_item_id,d_date_sk] + CometProject [cr_return_quantity,cr_returned_date_sk,i_item_id] + CometBroadcastHashJoin [cr_item_sk,cr_return_quantity,cr_returned_date_sk,i_item_sk,i_item_id] + CometFilter [cr_item_sk,cr_return_quantity,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] + ReusedExchange [i_item_sk,i_item_id] #2 + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [item_id,wr_item_qty] #8 + CometHashAggregate [item_id,wr_item_qty,i_item_id,sum,sum(wr_return_quantity)] + CometExchange [i_item_id] #9 + CometHashAggregate [i_item_id,sum,wr_return_quantity] + CometProject [wr_return_quantity,i_item_id] + CometBroadcastHashJoin [wr_return_quantity,wr_returned_date_sk,i_item_id,d_date_sk] + CometProject [wr_return_quantity,wr_returned_date_sk,i_item_id] + CometBroadcastHashJoin [wr_item_sk,wr_return_quantity,wr_returned_date_sk,i_item_sk,i_item_id] + CometFilter [wr_item_sk,wr_return_quantity,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] + ReusedExchange [i_item_sk,i_item_id] #2 + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_datafusion/explain.txt new file mode 100644 index 0000000000..37a2d3f738 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_datafusion/explain.txt @@ -0,0 +1,167 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * ColumnarToRow (30) + +- CometBroadcastHashJoin (29) + :- CometBroadcastExchange (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (9) + : : +- CometBroadcastExchange (16) + : : +- CometFilter (15) + : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (14) + : +- CometBroadcastExchange (22) + : +- CometProject (21) + : +- CometFilter (20) + : +- CometNativeScan: `spark_catalog`.`default`.`income_band` (19) + +- CometProject (28) + +- CometFilter (27) + +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (26) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Arguments: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] + +(2) CometFilter +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#7, ca_city#8] +Arguments: [ca_address_sk#7, ca_city#8] + +(4) CometFilter +Input [2]: [ca_address_sk#7, ca_city#8] +Condition : ((isnotnull(ca_city#8) AND (ca_city#8 = Edgewood)) AND isnotnull(ca_address_sk#7)) + +(5) CometProject +Input [2]: [ca_address_sk#7, ca_city#8] +Arguments: [ca_address_sk#7], [ca_address_sk#7] + +(6) CometBroadcastExchange +Input [1]: [ca_address_sk#7] +Arguments: [ca_address_sk#7] + +(7) CometBroadcastHashJoin +Left output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Right output [1]: [ca_address_sk#7] +Arguments: [c_current_addr_sk#4], [ca_address_sk#7], Inner, BuildRight + +(8) CometProject +Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] +Arguments: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6], [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(10) CometFilter +Input [1]: [cd_demo_sk#9] +Condition : isnotnull(cd_demo_sk#9) + +(11) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(12) CometBroadcastHashJoin +Left output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] +Right output [1]: [cd_demo_sk#9] +Arguments: [c_current_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(13) CometProject +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Arguments: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9], [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(14) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [hd_demo_sk#10, hd_income_band_sk#11] + +(15) CometFilter +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Condition : (isnotnull(hd_demo_sk#10) AND isnotnull(hd_income_band_sk#11)) + +(16) CometBroadcastExchange +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [hd_demo_sk#10, hd_income_band_sk#11] + +(17) CometBroadcastHashJoin +Left output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Right output [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [c_current_hdemo_sk#3], [hd_demo_sk#10], Inner, BuildRight + +(18) CometProject +Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11], [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11] + +(19) CometNativeScan: `spark_catalog`.`default`.`income_band` +Output [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Arguments: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] + +(20) CometFilter +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Condition : ((((isnotnull(ib_lower_bound#13) AND isnotnull(ib_upper_bound#14)) AND (ib_lower_bound#13 >= 38128)) AND (ib_upper_bound#14 <= 88128)) AND isnotnull(ib_income_band_sk#12)) + +(21) CometProject +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Arguments: [ib_income_band_sk#12], [ib_income_band_sk#12] + +(22) CometBroadcastExchange +Input [1]: [ib_income_band_sk#12] +Arguments: [ib_income_band_sk#12] + +(23) CometBroadcastHashJoin +Left output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11] +Right output [1]: [ib_income_band_sk#12] +Arguments: [hd_income_band_sk#11], [ib_income_band_sk#12], Inner, BuildRight + +(24) CometProject +Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11, ib_income_band_sk#12] +Arguments: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9], [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(25) CometBroadcastExchange +Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Arguments: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(26) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Arguments: [sr_cdemo_sk#15, sr_returned_date_sk#16] + +(27) CometFilter +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Condition : isnotnull(sr_cdemo_sk#15) + +(28) CometProject +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Arguments: [sr_cdemo_sk#15], [sr_cdemo_sk#15] + +(29) CometBroadcastHashJoin +Left output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Right output [1]: [sr_cdemo_sk#15] +Arguments: [cd_demo_sk#9], [sr_cdemo_sk#15], Inner, BuildLeft + +(30) ColumnarToRow [codegen id : 1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, sr_cdemo_sk#15] + +(31) Project [codegen id : 1] +Output [3]: [c_customer_id#1 AS customer_id#17, concat(c_last_name#6, , , c_first_name#5) AS customername#18, c_customer_id#1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, sr_cdemo_sk#15] + +(32) TakeOrderedAndProject +Input [3]: [customer_id#17, customername#18, c_customer_id#1] +Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#17, customername#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6fd5331780 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [c_customer_id,customer_id,customername] + WholeStageCodegen (1) + Project [c_customer_id,c_last_name,c_first_name] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,cd_demo_sk,sr_cdemo_sk] + CometBroadcastExchange [c_customer_id,c_first_name,c_last_name,cd_demo_sk] #1 + CometProject [c_customer_id,c_first_name,c_last_name,cd_demo_sk] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk,ib_income_band_sk] + CometProject [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk] + CometBroadcastHashJoin [c_customer_id,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk,hd_demo_sk,hd_income_band_sk] + CometProject [c_customer_id,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk] + CometBroadcastHashJoin [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk] + CometProject [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name,ca_address_sk] + CometFilter [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name] + CometBroadcastExchange [ca_address_sk] #2 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_city] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_city] + CometBroadcastExchange [cd_demo_sk] #3 + CometFilter [cd_demo_sk] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk] + CometBroadcastExchange [hd_demo_sk,hd_income_band_sk] #4 + CometFilter [hd_demo_sk,hd_income_band_sk] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_income_band_sk] + CometBroadcastExchange [ib_income_band_sk] #5 + CometProject [ib_income_band_sk] + CometFilter [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + CometNativeScan: `spark_catalog`.`default`.`income_band` [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + CometProject [sr_cdemo_sk] + CometFilter [sr_cdemo_sk,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_cdemo_sk,sr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..572fd7a66f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_iceberg_compat/explain.txt @@ -0,0 +1,185 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * ColumnarToRow (30) + +- CometBroadcastHashJoin (29) + :- CometBroadcastExchange (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.customer_address (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometScan parquet spark_catalog.default.customer_demographics (9) + : : +- CometBroadcastExchange (16) + : : +- CometFilter (15) + : : +- CometScan parquet spark_catalog.default.household_demographics (14) + : +- CometBroadcastExchange (22) + : +- CometProject (21) + : +- CometFilter (20) + : +- CometScan parquet spark_catalog.default.income_band (19) + +- CometProject (28) + +- CometFilter (27) + +- CometScan parquet spark_catalog.default.store_returns (26) + + +(1) CometScan parquet spark_catalog.default.customer +Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) + +(3) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_city#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_city), EqualTo(ca_city,Edgewood), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [ca_address_sk#7, ca_city#8] +Condition : ((isnotnull(ca_city#8) AND (ca_city#8 = Edgewood)) AND isnotnull(ca_address_sk#7)) + +(5) CometProject +Input [2]: [ca_address_sk#7, ca_city#8] +Arguments: [ca_address_sk#7], [ca_address_sk#7] + +(6) CometBroadcastExchange +Input [1]: [ca_address_sk#7] +Arguments: [ca_address_sk#7] + +(7) CometBroadcastHashJoin +Left output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Right output [1]: [ca_address_sk#7] +Arguments: [c_current_addr_sk#4], [ca_address_sk#7], Inner, BuildRight + +(8) CometProject +Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] +Arguments: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6], [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] + +(9) CometScan parquet spark_catalog.default.customer_demographics +Output [1]: [cd_demo_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(10) CometFilter +Input [1]: [cd_demo_sk#9] +Condition : isnotnull(cd_demo_sk#9) + +(11) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(12) CometBroadcastHashJoin +Left output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] +Right output [1]: [cd_demo_sk#9] +Arguments: [c_current_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(13) CometProject +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Arguments: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9], [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(14) CometScan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(15) CometFilter +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Condition : (isnotnull(hd_demo_sk#10) AND isnotnull(hd_income_band_sk#11)) + +(16) CometBroadcastExchange +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [hd_demo_sk#10, hd_income_band_sk#11] + +(17) CometBroadcastHashJoin +Left output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Right output [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [c_current_hdemo_sk#3], [hd_demo_sk#10], Inner, BuildRight + +(18) CometProject +Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11], [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11] + +(19) CometScan parquet spark_catalog.default.income_band +Output [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(20) CometFilter +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Condition : ((((isnotnull(ib_lower_bound#13) AND isnotnull(ib_upper_bound#14)) AND (ib_lower_bound#13 >= 38128)) AND (ib_upper_bound#14 <= 88128)) AND isnotnull(ib_income_band_sk#12)) + +(21) CometProject +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Arguments: [ib_income_band_sk#12], [ib_income_band_sk#12] + +(22) CometBroadcastExchange +Input [1]: [ib_income_band_sk#12] +Arguments: [ib_income_band_sk#12] + +(23) CometBroadcastHashJoin +Left output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11] +Right output [1]: [ib_income_band_sk#12] +Arguments: [hd_income_band_sk#11], [ib_income_band_sk#12], Inner, BuildRight + +(24) CometProject +Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11, ib_income_band_sk#12] +Arguments: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9], [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(25) CometBroadcastExchange +Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Arguments: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(26) CometScan parquet spark_catalog.default.store_returns +Output [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_cdemo_sk)] +ReadSchema: struct + +(27) CometFilter +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Condition : isnotnull(sr_cdemo_sk#15) + +(28) CometProject +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Arguments: [sr_cdemo_sk#15], [sr_cdemo_sk#15] + +(29) CometBroadcastHashJoin +Left output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Right output [1]: [sr_cdemo_sk#15] +Arguments: [cd_demo_sk#9], [sr_cdemo_sk#15], Inner, BuildLeft + +(30) ColumnarToRow [codegen id : 1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, sr_cdemo_sk#15] + +(31) Project [codegen id : 1] +Output [3]: [c_customer_id#1 AS customer_id#17, concat(c_last_name#6, , , c_first_name#5) AS customername#18, c_customer_id#1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, sr_cdemo_sk#15] + +(32) TakeOrderedAndProject +Input [3]: [customer_id#17, customername#18, c_customer_id#1] +Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#17, customername#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..646285a082 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q84.native_iceberg_compat/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [c_customer_id,customer_id,customername] + WholeStageCodegen (1) + Project [c_customer_id,c_last_name,c_first_name] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,cd_demo_sk,sr_cdemo_sk] + CometBroadcastExchange [c_customer_id,c_first_name,c_last_name,cd_demo_sk] #1 + CometProject [c_customer_id,c_first_name,c_last_name,cd_demo_sk] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk,ib_income_band_sk] + CometProject [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk] + CometBroadcastHashJoin [c_customer_id,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk,hd_demo_sk,hd_income_band_sk] + CometProject [c_customer_id,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk] + CometBroadcastHashJoin [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk] + CometProject [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name,ca_address_sk] + CometFilter [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name] + CometBroadcastExchange [ca_address_sk] #2 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_city] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city] + CometBroadcastExchange [cd_demo_sk] #3 + CometFilter [cd_demo_sk] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk] + CometBroadcastExchange [hd_demo_sk,hd_income_band_sk] #4 + CometFilter [hd_demo_sk,hd_income_band_sk] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk] + CometBroadcastExchange [ib_income_band_sk] #5 + CometProject [ib_income_band_sk] + CometFilter [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + CometScan parquet spark_catalog.default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + CometProject [sr_cdemo_sk] + CometFilter [sr_cdemo_sk,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_cdemo_sk,sr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_datafusion/explain.txt new file mode 100644 index 0000000000..5a77d6f24d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_datafusion/explain.txt @@ -0,0 +1,236 @@ +== Physical Plan == +* ColumnarToRow (45) ++- CometTakeOrderedAndProject (44) + +- CometHashAggregate (43) + +- CometExchange (42) + +- CometHashAggregate (41) + +- CometProject (40) + +- CometBroadcastHashJoin (39) + :- CometProject (35) + : +- CometBroadcastHashJoin (34) + : :- CometProject (29) + : : +- CometBroadcastHashJoin (28) + : : :- CometProject (23) + : : : +- CometBroadcastHashJoin (22) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (13) + : : : : : +- CometBroadcastHashJoin (12) + : : : : : :- CometProject (8) + : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : :- CometBroadcastExchange (3) + : : : : : : : +- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : : : : +- CometProject (6) + : : : : : : +- CometFilter (5) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (4) + : : : : : +- CometBroadcastExchange (11) + : : : : : +- CometFilter (10) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_page` (9) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometFilter (15) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (14) + : : : +- CometBroadcastExchange (21) + : : : +- CometFilter (20) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (19) + : : +- CometBroadcastExchange (27) + : : +- CometProject (26) + : : +- CometFilter (25) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (24) + : +- CometBroadcastExchange (33) + : +- CometProject (32) + : +- CometFilter (31) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (30) + +- CometBroadcastExchange (38) + +- CometFilter (37) + +- CometNativeScan: `spark_catalog`.`default`.`reason` (36) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Condition : ((((isnotnull(ws_item_sk#1) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_web_page_sk#2)) AND ((((ws_sales_price#5 >= 100.00) AND (ws_sales_price#5 <= 150.00)) OR ((ws_sales_price#5 >= 50.00) AND (ws_sales_price#5 <= 100.00))) OR ((ws_sales_price#5 >= 150.00) AND (ws_sales_price#5 <= 200.00)))) AND ((((ws_net_profit#6 >= 100.00) AND (ws_net_profit#6 <= 200.00)) OR ((ws_net_profit#6 >= 150.00) AND (ws_net_profit#6 <= 300.00))) OR ((ws_net_profit#6 >= 50.00) AND (ws_net_profit#6 <= 250.00)))) + +(3) CometBroadcastExchange +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] + +(4) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Arguments: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] + +(5) CometFilter +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Condition : (((((isnotnull(wr_item_sk#8) AND isnotnull(wr_order_number#13)) AND isnotnull(wr_refunded_cdemo_sk#9)) AND isnotnull(wr_returning_cdemo_sk#11)) AND isnotnull(wr_refunded_addr_sk#10)) AND isnotnull(wr_reason_sk#12)) + +(6) CometProject +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Arguments: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15], [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] + +(7) CometBroadcastHashJoin +Left output [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Right output [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Arguments: [ws_item_sk#1, ws_order_number#3], [wr_item_sk#8, wr_order_number#13], Inner, BuildLeft + +(8) CometProject +Input [15]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Arguments: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(9) CometNativeScan: `spark_catalog`.`default`.`web_page` +Output [1]: [wp_web_page_sk#17] +Arguments: [wp_web_page_sk#17] + +(10) CometFilter +Input [1]: [wp_web_page_sk#17] +Condition : isnotnull(wp_web_page_sk#17) + +(11) CometBroadcastExchange +Input [1]: [wp_web_page_sk#17] +Arguments: [wp_web_page_sk#17] + +(12) CometBroadcastHashJoin +Left output [11]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [1]: [wp_web_page_sk#17] +Arguments: [ws_web_page_sk#2], [wp_web_page_sk#17], Inner, BuildRight + +(13) CometProject +Input [12]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, wp_web_page_sk#17] +Arguments: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(14) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(15) CometFilter +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Condition : (((isnotnull(cd_demo_sk#18) AND isnotnull(cd_marital_status#19)) AND isnotnull(cd_education_status#20)) AND ((((cd_marital_status#19 = M) AND (cd_education_status#20 = Advanced Degree )) OR ((cd_marital_status#19 = S) AND (cd_education_status#20 = College ))) OR ((cd_marital_status#19 = W) AND (cd_education_status#20 = 2 yr Degree )))) + +(16) CometBroadcastExchange +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(17) CometBroadcastHashJoin +Left output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [wr_refunded_cdemo_sk#9], [cd_demo_sk#18], Inner, ((((((cd_marital_status#19 = M) AND (cd_education_status#20 = Advanced Degree )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#19 = S) AND (cd_education_status#20 = College )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#19 = W) AND (cd_education_status#20 = 2 yr Degree )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00))), BuildRight + +(18) CometProject +Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20], [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20] + +(19) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + +(20) CometFilter +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Condition : ((isnotnull(cd_demo_sk#21) AND isnotnull(cd_marital_status#22)) AND isnotnull(cd_education_status#23)) + +(21) CometBroadcastExchange +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + +(22) CometBroadcastHashJoin +Left output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20] +Right output [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [wr_returning_cdemo_sk#11, cd_marital_status#19, cd_education_status#20], [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23], Inner, BuildRight + +(23) CometProject +Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20, cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Arguments: [ca_address_sk#24, ca_state#25, ca_country#26] + +(25) CometFilter +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Condition : (((isnotnull(ca_country#26) AND (ca_country#26 = United States)) AND isnotnull(ca_address_sk#24)) AND ((ca_state#25 IN (IN,OH,NJ) OR ca_state#25 IN (WI,CT,KY)) OR ca_state#25 IN (LA,IA,AR))) + +(26) CometProject +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Arguments: [ca_address_sk#24, ca_state#25], [ca_address_sk#24, ca_state#25] + +(27) CometBroadcastExchange +Input [2]: [ca_address_sk#24, ca_state#25] +Arguments: [ca_address_sk#24, ca_state#25] + +(28) CometBroadcastHashJoin +Left output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [2]: [ca_address_sk#24, ca_state#25] +Arguments: [wr_refunded_addr_sk#10], [ca_address_sk#24], Inner, ((((ca_state#25 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#25 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#25 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00))), BuildRight + +(29) CometProject +Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, ca_address_sk#24, ca_state#25] +Arguments: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(30) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#27, d_year#28] +Arguments: [d_date_sk#27, d_year#28] + +(31) CometFilter +Input [2]: [d_date_sk#27, d_year#28] +Condition : ((isnotnull(d_year#28) AND (d_year#28 = 2000)) AND isnotnull(d_date_sk#27)) + +(32) CometProject +Input [2]: [d_date_sk#27, d_year#28] +Arguments: [d_date_sk#27], [d_date_sk#27] + +(33) CometBroadcastExchange +Input [1]: [d_date_sk#27] +Arguments: [d_date_sk#27] + +(34) CometBroadcastHashJoin +Left output [5]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [1]: [d_date_sk#27] +Arguments: [ws_sold_date_sk#7], [d_date_sk#27], Inner, BuildRight + +(35) CometProject +Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, d_date_sk#27] +Arguments: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(36) CometNativeScan: `spark_catalog`.`default`.`reason` +Output [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: [r_reason_sk#29, r_reason_desc#30] + +(37) CometFilter +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Condition : isnotnull(r_reason_sk#29) + +(38) CometBroadcastExchange +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: [r_reason_sk#29, r_reason_desc#30] + +(39) CometBroadcastHashJoin +Left output [4]: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: [wr_reason_sk#12], [r_reason_sk#29], Inner, BuildRight + +(40) CometProject +Input [6]: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, r_reason_sk#29, r_reason_desc#30] +Arguments: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30], [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] + +(41) CometHashAggregate +Input [4]: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] +Keys [1]: [r_reason_desc#30] +Functions [3]: [partial_avg(ws_quantity#4), partial_avg(UnscaledValue(wr_refunded_cash#15)), partial_avg(UnscaledValue(wr_fee#14))] + +(42) CometExchange +Input [7]: [r_reason_desc#30, sum#31, count#32, sum#33, count#34, sum#35, count#36] +Arguments: hashpartitioning(r_reason_desc#30, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(43) CometHashAggregate +Input [7]: [r_reason_desc#30, sum#31, count#32, sum#33, count#34, sum#35, count#36] +Keys [1]: [r_reason_desc#30] +Functions [3]: [avg(ws_quantity#4), avg(UnscaledValue(wr_refunded_cash#15)), avg(UnscaledValue(wr_fee#14))] + +(44) CometTakeOrderedAndProject +Input [4]: [substr(r_reason_desc, 1, 20)#37, avg(ws_quantity)#38, avg(wr_refunded_cash)#39, avg(wr_fee)#40] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[substr(r_reason_desc, 1, 20)#37 ASC NULLS FIRST,avg(ws_quantity)#38 ASC NULLS FIRST,avg(wr_refunded_cash)#39 ASC NULLS FIRST,avg(wr_fee)#40 ASC NULLS FIRST], output=[substr(r_reason_desc, 1, 20)#37,avg(ws_quantity)#38,avg(wr_refunded_cash)#39,avg(wr_fee)#40]), [substr(r_reason_desc, 1, 20)#37, avg(ws_quantity)#38, avg(wr_refunded_cash)#39, avg(wr_fee)#40], 100, [substr(r_reason_desc, 1, 20)#37 ASC NULLS FIRST, avg(ws_quantity)#38 ASC NULLS FIRST, avg(wr_refunded_cash)#39 ASC NULLS FIRST, avg(wr_fee)#40 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#37, avg(ws_quantity)#38, avg(wr_refunded_cash)#39, avg(wr_fee)#40] + +(45) ColumnarToRow [codegen id : 1] +Input [4]: [substr(r_reason_desc, 1, 20)#37, avg(ws_quantity)#38, avg(wr_refunded_cash)#39, avg(wr_fee)#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_datafusion/simplified.txt new file mode 100644 index 0000000000..905125e838 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_datafusion/simplified.txt @@ -0,0 +1,47 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee)] + CometHashAggregate [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee),r_reason_desc,sum,count,sum,count,sum,count,avg(ws_quantity),avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee))] + CometExchange [r_reason_desc] #1 + CometHashAggregate [r_reason_desc,sum,count,sum,count,sum,count,ws_quantity,wr_refunded_cash,wr_fee] + CometProject [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] + CometBroadcastHashJoin [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash,r_reason_sk,r_reason_desc] + CometProject [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_quantity,ws_sold_date_sk,wr_reason_sk,wr_fee,wr_refunded_cash,d_date_sk] + CometProject [ws_quantity,ws_sold_date_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_reason_sk,wr_fee,wr_refunded_cash,ca_address_sk,ca_state] + CometProject [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_marital_status,cd_education_status,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_web_page_sk,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,wp_web_page_sk] + CometProject [ws_web_page_sk,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] + CometBroadcastExchange [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] #2 + CometFilter [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] + CometProject [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] + CometFilter [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk] + CometBroadcastExchange [wp_web_page_sk] #3 + CometFilter [wp_web_page_sk] + CometNativeScan: `spark_catalog`.`default`.`web_page` [wp_web_page_sk] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #4 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #5 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [ca_address_sk,ca_state] #6 + CometProject [ca_address_sk,ca_state] + CometFilter [ca_address_sk,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #7 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [r_reason_sk,r_reason_desc] #8 + CometFilter [r_reason_sk,r_reason_desc] + CometNativeScan: `spark_catalog`.`default`.`reason` [r_reason_sk,r_reason_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..647cefe519 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_iceberg_compat/explain.txt @@ -0,0 +1,261 @@ +== Physical Plan == +* ColumnarToRow (45) ++- CometTakeOrderedAndProject (44) + +- CometHashAggregate (43) + +- CometExchange (42) + +- CometHashAggregate (41) + +- CometProject (40) + +- CometBroadcastHashJoin (39) + :- CometProject (35) + : +- CometBroadcastHashJoin (34) + : :- CometProject (29) + : : +- CometBroadcastHashJoin (28) + : : :- CometProject (23) + : : : +- CometBroadcastHashJoin (22) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (13) + : : : : : +- CometBroadcastHashJoin (12) + : : : : : :- CometProject (8) + : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : :- CometBroadcastExchange (3) + : : : : : : : +- CometFilter (2) + : : : : : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : : : : : +- CometProject (6) + : : : : : : +- CometFilter (5) + : : : : : : +- CometScan parquet spark_catalog.default.web_returns (4) + : : : : : +- CometBroadcastExchange (11) + : : : : : +- CometFilter (10) + : : : : : +- CometScan parquet spark_catalog.default.web_page (9) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometFilter (15) + : : : : +- CometScan parquet spark_catalog.default.customer_demographics (14) + : : : +- CometBroadcastExchange (21) + : : : +- CometFilter (20) + : : : +- CometScan parquet spark_catalog.default.customer_demographics (19) + : : +- CometBroadcastExchange (27) + : : +- CometProject (26) + : : +- CometFilter (25) + : : +- CometScan parquet spark_catalog.default.customer_address (24) + : +- CometBroadcastExchange (33) + : +- CometProject (32) + : +- CometFilter (31) + : +- CometScan parquet spark_catalog.default.date_dim (30) + +- CometBroadcastExchange (38) + +- CometFilter (37) + +- CometScan parquet spark_catalog.default.reason (36) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#7)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), Or(Or(And(GreaterThanOrEqual(ws_sales_price,100.00),LessThanOrEqual(ws_sales_price,150.00)),And(GreaterThanOrEqual(ws_sales_price,50.00),LessThanOrEqual(ws_sales_price,100.00))),And(GreaterThanOrEqual(ws_sales_price,150.00),LessThanOrEqual(ws_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ws_net_profit,100.00),LessThanOrEqual(ws_net_profit,200.00)),And(GreaterThanOrEqual(ws_net_profit,150.00),LessThanOrEqual(ws_net_profit,300.00))),And(GreaterThanOrEqual(ws_net_profit,50.00),LessThanOrEqual(ws_net_profit,250.00)))] +ReadSchema: struct + +(2) CometFilter +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Condition : ((((isnotnull(ws_item_sk#1) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_web_page_sk#2)) AND ((((ws_sales_price#5 >= 100.00) AND (ws_sales_price#5 <= 150.00)) OR ((ws_sales_price#5 >= 50.00) AND (ws_sales_price#5 <= 100.00))) OR ((ws_sales_price#5 >= 150.00) AND (ws_sales_price#5 <= 200.00)))) AND ((((ws_net_profit#6 >= 100.00) AND (ws_net_profit#6 <= 200.00)) OR ((ws_net_profit#6 >= 150.00) AND (ws_net_profit#6 <= 300.00))) OR ((ws_net_profit#6 >= 50.00) AND (ws_net_profit#6 <= 250.00)))) + +(3) CometBroadcastExchange +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] + +(4) CometScan parquet spark_catalog.default.web_returns +Output [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)] +ReadSchema: struct + +(5) CometFilter +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Condition : (((((isnotnull(wr_item_sk#8) AND isnotnull(wr_order_number#13)) AND isnotnull(wr_refunded_cdemo_sk#9)) AND isnotnull(wr_returning_cdemo_sk#11)) AND isnotnull(wr_refunded_addr_sk#10)) AND isnotnull(wr_reason_sk#12)) + +(6) CometProject +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Arguments: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15], [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] + +(7) CometBroadcastHashJoin +Left output [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Right output [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Arguments: [ws_item_sk#1, ws_order_number#3], [wr_item_sk#8, wr_order_number#13], Inner, BuildLeft + +(8) CometProject +Input [15]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Arguments: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(9) CometScan parquet spark_catalog.default.web_page +Output [1]: [wp_web_page_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(10) CometFilter +Input [1]: [wp_web_page_sk#17] +Condition : isnotnull(wp_web_page_sk#17) + +(11) CometBroadcastExchange +Input [1]: [wp_web_page_sk#17] +Arguments: [wp_web_page_sk#17] + +(12) CometBroadcastHashJoin +Left output [11]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [1]: [wp_web_page_sk#17] +Arguments: [ws_web_page_sk#2], [wp_web_page_sk#17], Inner, BuildRight + +(13) CometProject +Input [12]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, wp_web_page_sk#17] +Arguments: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(14) CometScan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree )))] +ReadSchema: struct + +(15) CometFilter +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Condition : (((isnotnull(cd_demo_sk#18) AND isnotnull(cd_marital_status#19)) AND isnotnull(cd_education_status#20)) AND ((((cd_marital_status#19 = M) AND (cd_education_status#20 = Advanced Degree )) OR ((cd_marital_status#19 = S) AND (cd_education_status#20 = College ))) OR ((cd_marital_status#19 = W) AND (cd_education_status#20 = 2 yr Degree )))) + +(16) CometBroadcastExchange +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(17) CometBroadcastHashJoin +Left output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [wr_refunded_cdemo_sk#9], [cd_demo_sk#18], Inner, ((((((cd_marital_status#19 = M) AND (cd_education_status#20 = Advanced Degree )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#19 = S) AND (cd_education_status#20 = College )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#19 = W) AND (cd_education_status#20 = 2 yr Degree )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00))), BuildRight + +(18) CometProject +Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20], [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20] + +(19) CometScan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)] +ReadSchema: struct + +(20) CometFilter +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Condition : ((isnotnull(cd_demo_sk#21) AND isnotnull(cd_marital_status#22)) AND isnotnull(cd_education_status#23)) + +(21) CometBroadcastExchange +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + +(22) CometBroadcastHashJoin +Left output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20] +Right output [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [wr_returning_cdemo_sk#11, cd_marital_status#19, cd_education_status#20], [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23], Inner, BuildRight + +(23) CometProject +Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20, cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(24) CometScan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,NJ,OH]),In(ca_state, [CT,KY,WI])),In(ca_state, [AR,IA,LA]))] +ReadSchema: struct + +(25) CometFilter +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Condition : (((isnotnull(ca_country#26) AND (ca_country#26 = United States)) AND isnotnull(ca_address_sk#24)) AND ((ca_state#25 IN (IN,OH,NJ) OR ca_state#25 IN (WI,CT,KY)) OR ca_state#25 IN (LA,IA,AR))) + +(26) CometProject +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Arguments: [ca_address_sk#24, ca_state#25], [ca_address_sk#24, ca_state#25] + +(27) CometBroadcastExchange +Input [2]: [ca_address_sk#24, ca_state#25] +Arguments: [ca_address_sk#24, ca_state#25] + +(28) CometBroadcastHashJoin +Left output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [2]: [ca_address_sk#24, ca_state#25] +Arguments: [wr_refunded_addr_sk#10], [ca_address_sk#24], Inner, ((((ca_state#25 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#25 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#25 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00))), BuildRight + +(29) CometProject +Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, ca_address_sk#24, ca_state#25] +Arguments: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(30) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#27, d_year#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) CometFilter +Input [2]: [d_date_sk#27, d_year#28] +Condition : ((isnotnull(d_year#28) AND (d_year#28 = 2000)) AND isnotnull(d_date_sk#27)) + +(32) CometProject +Input [2]: [d_date_sk#27, d_year#28] +Arguments: [d_date_sk#27], [d_date_sk#27] + +(33) CometBroadcastExchange +Input [1]: [d_date_sk#27] +Arguments: [d_date_sk#27] + +(34) CometBroadcastHashJoin +Left output [5]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [1]: [d_date_sk#27] +Arguments: [ws_sold_date_sk#7], [d_date_sk#27], Inner, BuildRight + +(35) CometProject +Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, d_date_sk#27] +Arguments: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(36) CometScan parquet spark_catalog.default.reason +Output [2]: [r_reason_sk#29, r_reason_desc#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_sk)] +ReadSchema: struct + +(37) CometFilter +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Condition : isnotnull(r_reason_sk#29) + +(38) CometBroadcastExchange +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: [r_reason_sk#29, r_reason_desc#30] + +(39) CometBroadcastHashJoin +Left output [4]: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: [wr_reason_sk#12], [r_reason_sk#29], Inner, BuildRight + +(40) CometProject +Input [6]: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, r_reason_sk#29, r_reason_desc#30] +Arguments: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30], [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] + +(41) CometHashAggregate +Input [4]: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] +Keys [1]: [r_reason_desc#30] +Functions [3]: [partial_avg(ws_quantity#4), partial_avg(UnscaledValue(wr_refunded_cash#15)), partial_avg(UnscaledValue(wr_fee#14))] + +(42) CometExchange +Input [7]: [r_reason_desc#30, sum#31, count#32, sum#33, count#34, sum#35, count#36] +Arguments: hashpartitioning(r_reason_desc#30, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(43) CometHashAggregate +Input [7]: [r_reason_desc#30, sum#31, count#32, sum#33, count#34, sum#35, count#36] +Keys [1]: [r_reason_desc#30] +Functions [3]: [avg(ws_quantity#4), avg(UnscaledValue(wr_refunded_cash#15)), avg(UnscaledValue(wr_fee#14))] + +(44) CometTakeOrderedAndProject +Input [4]: [substr(r_reason_desc, 1, 20)#37, avg(ws_quantity)#38, avg(wr_refunded_cash)#39, avg(wr_fee)#40] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[substr(r_reason_desc, 1, 20)#37 ASC NULLS FIRST,avg(ws_quantity)#38 ASC NULLS FIRST,avg(wr_refunded_cash)#39 ASC NULLS FIRST,avg(wr_fee)#40 ASC NULLS FIRST], output=[substr(r_reason_desc, 1, 20)#37,avg(ws_quantity)#38,avg(wr_refunded_cash)#39,avg(wr_fee)#40]), [substr(r_reason_desc, 1, 20)#37, avg(ws_quantity)#38, avg(wr_refunded_cash)#39, avg(wr_fee)#40], 100, [substr(r_reason_desc, 1, 20)#37 ASC NULLS FIRST, avg(ws_quantity)#38 ASC NULLS FIRST, avg(wr_refunded_cash)#39 ASC NULLS FIRST, avg(wr_fee)#40 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#37, avg(ws_quantity)#38, avg(wr_refunded_cash)#39, avg(wr_fee)#40] + +(45) ColumnarToRow [codegen id : 1] +Input [4]: [substr(r_reason_desc, 1, 20)#37, avg(ws_quantity)#38, avg(wr_refunded_cash)#39, avg(wr_fee)#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..52807f3247 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q85.native_iceberg_compat/simplified.txt @@ -0,0 +1,47 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee)] + CometHashAggregate [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee),r_reason_desc,sum,count,sum,count,sum,count,avg(ws_quantity),avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee))] + CometExchange [r_reason_desc] #1 + CometHashAggregate [r_reason_desc,sum,count,sum,count,sum,count,ws_quantity,wr_refunded_cash,wr_fee] + CometProject [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] + CometBroadcastHashJoin [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash,r_reason_sk,r_reason_desc] + CometProject [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_quantity,ws_sold_date_sk,wr_reason_sk,wr_fee,wr_refunded_cash,d_date_sk] + CometProject [ws_quantity,ws_sold_date_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_reason_sk,wr_fee,wr_refunded_cash,ca_address_sk,ca_state] + CometProject [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_marital_status,cd_education_status,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_web_page_sk,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,wp_web_page_sk] + CometProject [ws_web_page_sk,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] + CometBroadcastExchange [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] #2 + CometFilter [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] + CometProject [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] + CometFilter [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk] + CometBroadcastExchange [wp_web_page_sk] #3 + CometFilter [wp_web_page_sk] + CometScan parquet spark_catalog.default.web_page [wp_web_page_sk] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #4 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #5 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [ca_address_sk,ca_state] #6 + CometProject [ca_address_sk,ca_state] + CometFilter [ca_address_sk,ca_state,ca_country] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #7 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [r_reason_sk,r_reason_desc] #8 + CometFilter [r_reason_sk,r_reason_desc] + CometScan parquet spark_catalog.default.reason [r_reason_sk,r_reason_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_datafusion/explain.txt new file mode 100644 index 0000000000..22e2a3e664 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_datafusion/explain.txt @@ -0,0 +1,121 @@ +== Physical Plan == +TakeOrderedAndProject (23) ++- * Project (22) + +- Window (21) + +- * ColumnarToRow (20) + +- CometSort (19) + +- CometExchange (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometExpand (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : +- CometBroadcastExchange (6) + : +- CometProject (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + +- CometBroadcastExchange (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4, d_month_seq#5] + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#4] +Arguments: [ws_item_sk#1, ws_net_paid#2], [ws_item_sk#1, ws_net_paid#2] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [i_item_sk#6, i_class#7, i_category#8] + +(10) CometFilter +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Condition : isnotnull(i_item_sk#6) + +(11) CometBroadcastExchange +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [i_item_sk#6, i_class#7, i_category#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#1, ws_net_paid#2] +Right output [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [ws_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] +Arguments: [ws_net_paid#2, i_category#8, i_class#7], [ws_net_paid#2, i_category#8, i_class#7] + +(14) CometExpand +Input [3]: [ws_net_paid#2, i_category#8, i_class#7] +Arguments: [[ws_net_paid#2, i_category#8, i_class#7, 0], [ws_net_paid#2, i_category#8, null, 1], [ws_net_paid#2, null, null, 3]], [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] + +(15) CometHashAggregate +Input [4]: [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] + +(16) CometExchange +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#12] +Arguments: hashpartitioning(i_category#9, i_class#10, spark_grouping_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#12] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] + +(18) CometExchange +Input [7]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17] +Arguments: hashpartitioning(_w1#16, _w2#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(19) CometSort +Input [7]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17] +Arguments: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17], [_w1#16 ASC NULLS FIRST, _w2#17 ASC NULLS FIRST, _w0#15 DESC NULLS LAST] + +(20) ColumnarToRow [codegen id : 1] +Input [7]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17] + +(21) Window +Input [7]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17] +Arguments: [rank(_w0#15) windowspecdefinition(_w1#16, _w2#17, _w0#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#18], [_w1#16, _w2#17], [_w0#15 DESC NULLS LAST] + +(22) Project [codegen id : 2] +Output [5]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, rank_within_parent#18] +Input [8]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17, rank_within_parent#18] + +(23) TakeOrderedAndProject +Input [5]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, rank_within_parent#18] +Arguments: 100, [lochierarchy#14 DESC NULLS LAST, CASE WHEN (lochierarchy#14 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#18 ASC NULLS FIRST], [total_sum#13, i_category#9, i_class#10, lochierarchy#14, rank_within_parent#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_datafusion/simplified.txt new file mode 100644 index 0000000000..777ca4bdfe --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_datafusion/simplified.txt @@ -0,0 +1,27 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] + WholeStageCodegen (2) + Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [total_sum,i_category,i_class,lochierarchy,_w0,_w1,_w2] + CometExchange [_w1,_w2] #1 + CometHashAggregate [total_sum,i_category,i_class,lochierarchy,_w0,_w1,_w2,spark_grouping_id,sum,sum(UnscaledValue(ws_net_paid))] + CometExchange [i_category,i_class,spark_grouping_id] #2 + CometHashAggregate [i_category,i_class,spark_grouping_id,sum,ws_net_paid] + CometExpand [i_category,i_class] [ws_net_paid,i_category,i_class,spark_grouping_id] + CometProject [ws_net_paid,i_category,i_class] + CometBroadcastHashJoin [ws_item_sk,ws_net_paid,i_item_sk,i_class,i_category] + CometProject [ws_item_sk,ws_net_paid] + CometBroadcastHashJoin [ws_item_sk,ws_net_paid,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_net_paid,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_net_paid,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_class,i_category] #4 + CometFilter [i_item_sk,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_class,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..db2024d3b0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_iceberg_compat/explain.txt @@ -0,0 +1,131 @@ +== Physical Plan == +TakeOrderedAndProject (23) ++- * Project (22) + +- Window (21) + +- * ColumnarToRow (20) + +- CometSort (19) + +- CometExchange (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometExpand (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.web_sales (1) + : +- CometBroadcastExchange (6) + : +- CometProject (5) + : +- CometFilter (4) + : +- CometScan parquet spark_catalog.default.date_dim (3) + +- CometBroadcastExchange (11) + +- CometFilter (10) + +- CometScan parquet spark_catalog.default.item (9) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#4] +Arguments: [ws_item_sk#1, ws_net_paid#2], [ws_item_sk#1, ws_net_paid#2] + +(9) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#6, i_class#7, i_category#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Condition : isnotnull(i_item_sk#6) + +(11) CometBroadcastExchange +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [i_item_sk#6, i_class#7, i_category#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#1, ws_net_paid#2] +Right output [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [ws_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] +Arguments: [ws_net_paid#2, i_category#8, i_class#7], [ws_net_paid#2, i_category#8, i_class#7] + +(14) CometExpand +Input [3]: [ws_net_paid#2, i_category#8, i_class#7] +Arguments: [[ws_net_paid#2, i_category#8, i_class#7, 0], [ws_net_paid#2, i_category#8, null, 1], [ws_net_paid#2, null, null, 3]], [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] + +(15) CometHashAggregate +Input [4]: [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] + +(16) CometExchange +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#12] +Arguments: hashpartitioning(i_category#9, i_class#10, spark_grouping_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#12] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] + +(18) CometExchange +Input [7]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17] +Arguments: hashpartitioning(_w1#16, _w2#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(19) CometSort +Input [7]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17] +Arguments: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17], [_w1#16 ASC NULLS FIRST, _w2#17 ASC NULLS FIRST, _w0#15 DESC NULLS LAST] + +(20) ColumnarToRow [codegen id : 1] +Input [7]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17] + +(21) Window +Input [7]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17] +Arguments: [rank(_w0#15) windowspecdefinition(_w1#16, _w2#17, _w0#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#18], [_w1#16, _w2#17], [_w0#15 DESC NULLS LAST] + +(22) Project [codegen id : 2] +Output [5]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, rank_within_parent#18] +Input [8]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, _w0#15, _w1#16, _w2#17, rank_within_parent#18] + +(23) TakeOrderedAndProject +Input [5]: [total_sum#13, i_category#9, i_class#10, lochierarchy#14, rank_within_parent#18] +Arguments: 100, [lochierarchy#14 DESC NULLS LAST, CASE WHEN (lochierarchy#14 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#18 ASC NULLS FIRST], [total_sum#13, i_category#9, i_class#10, lochierarchy#14, rank_within_parent#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..0c25350590 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q86.native_iceberg_compat/simplified.txt @@ -0,0 +1,27 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] + WholeStageCodegen (2) + Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [total_sum,i_category,i_class,lochierarchy,_w0,_w1,_w2] + CometExchange [_w1,_w2] #1 + CometHashAggregate [total_sum,i_category,i_class,lochierarchy,_w0,_w1,_w2,spark_grouping_id,sum,sum(UnscaledValue(ws_net_paid))] + CometExchange [i_category,i_class,spark_grouping_id] #2 + CometHashAggregate [i_category,i_class,spark_grouping_id,sum,ws_net_paid] + CometExpand [i_category,i_class] [ws_net_paid,i_category,i_class,spark_grouping_id] + CometProject [ws_net_paid,i_category,i_class] + CometBroadcastHashJoin [ws_item_sk,ws_net_paid,i_item_sk,i_class,i_category] + CometProject [ws_item_sk,ws_net_paid] + CometBroadcastHashJoin [ws_item_sk,ws_net_paid,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_net_paid,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_class,i_category] #4 + CometFilter [i_item_sk,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_datafusion/explain.txt new file mode 100644 index 0000000000..3ff75a4a3b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_datafusion/explain.txt @@ -0,0 +1,154 @@ +== Physical Plan == +* HashAggregate (28) ++- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin LeftAnti BuildRight (24) + :- * BroadcastHashJoin LeftAnti BuildRight (22) + : :- * ColumnarToRow (17) + : : +- CometHashAggregate (16) + : : +- CometExchange (15) + : : +- CometHashAggregate (14) + : : +- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (9) + : +- BroadcastExchange (21) + : +- * ColumnarToRow (20) + : +- CometHashAggregate (19) + : +- ReusedExchange (18) + +- ReusedExchange (23) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Arguments: [ss_customer_sk#1, ss_sold_date_sk#2] + +(2) CometFilter +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Condition : isnotnull(ss_customer_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Arguments: [d_date_sk#3, d_date#4, d_month_seq#5] + +(4) CometFilter +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(5) CometProject +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Arguments: [d_date_sk#3, d_date#4], [d_date_sk#3, d_date#4] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: [d_date_sk#3, d_date#4] + +(7) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Right output [2]: [d_date_sk#3, d_date#4] +Arguments: [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#3, d_date#4] +Arguments: [ss_customer_sk#1, d_date#4], [ss_customer_sk#1, d_date#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(10) CometFilter +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) + +(11) CometBroadcastExchange +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, d_date#4] +Right output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [ss_customer_sk#1], [c_customer_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ss_customer_sk#1, d_date#4, c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_last_name#8, c_first_name#7, d_date#4], [c_last_name#8, c_first_name#7, d_date#4] + +(14) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(15) CometExchange +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(18) ReusedExchange [Reuses operator id: 15] +Output [3]: [c_last_name#9, c_first_name#10, d_date#11] + +(19) CometHashAggregate +Input [3]: [c_last_name#9, c_first_name#10, d_date#11] +Keys [3]: [c_last_name#9, c_first_name#10, d_date#11] +Functions: [] + +(20) ColumnarToRow [codegen id : 1] +Input [3]: [c_last_name#9, c_first_name#10, d_date#11] + +(21) BroadcastExchange +Input [3]: [c_last_name#9, c_first_name#10, d_date#11] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=2] + +(22) BroadcastHashJoin [codegen id : 3] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#10, ), isnull(c_first_name#10), coalesce(d_date#11, 1970-01-01), isnull(d_date#11)] +Join type: LeftAnti +Join condition: None + +(23) ReusedExchange [Reuses operator id: 21] +Output [3]: [c_last_name#12, c_first_name#13, d_date#14] + +(24) BroadcastHashJoin [codegen id : 3] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#12, ), isnull(c_last_name#12), coalesce(c_first_name#13, ), isnull(c_first_name#13), coalesce(d_date#14, 1970-01-01), isnull(d_date#14)] +Join type: LeftAnti +Join condition: None + +(25) Project [codegen id : 3] +Output: [] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(26) HashAggregate [codegen id : 3] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [1]: [count#16] + +(27) Exchange +Input [1]: [count#16] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] + +(28) HashAggregate [codegen id : 4] +Input [1]: [count#16] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [1]: [count(1)#17 AS count(1)#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8612445652 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_datafusion/simplified.txt @@ -0,0 +1,36 @@ +WholeStageCodegen (4) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (3) + HashAggregate [count,count] + Project + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + ColumnarToRow + InputAdapter + CometHashAggregate [c_last_name,c_first_name,d_date] + CometExchange [c_last_name,c_first_name,d_date] #2 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometProject [c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [ss_customer_sk,d_date,c_customer_sk,c_first_name,c_last_name] + CometProject [ss_customer_sk,d_date] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_customer_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #3 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_month_seq] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name] #4 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [c_last_name,c_first_name,d_date] + ReusedExchange [c_last_name,c_first_name,d_date] #2 + InputAdapter + ReusedExchange [c_last_name,c_first_name,d_date] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..20f373e5f0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_iceberg_compat/explain.txt @@ -0,0 +1,281 @@ +== Physical Plan == +* HashAggregate (49) ++- Exchange (48) + +- * HashAggregate (47) + +- * Project (46) + +- * BroadcastHashJoin LeftAnti BuildRight (45) + :- * BroadcastHashJoin LeftAnti BuildRight (31) + : :- * ColumnarToRow (17) + : : +- CometHashAggregate (16) + : : +- CometExchange (15) + : : +- CometHashAggregate (14) + : : +- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.customer (9) + : +- BroadcastExchange (30) + : +- * ColumnarToRow (29) + : +- CometHashAggregate (28) + : +- CometExchange (27) + : +- CometHashAggregate (26) + : +- CometProject (25) + : +- CometBroadcastHashJoin (24) + : :- CometProject (22) + : : +- CometBroadcastHashJoin (21) + : : :- CometFilter (19) + : : : +- CometScan parquet spark_catalog.default.catalog_sales (18) + : : +- ReusedExchange (20) + : +- ReusedExchange (23) + +- BroadcastExchange (44) + +- * ColumnarToRow (43) + +- CometHashAggregate (42) + +- CometExchange (41) + +- CometHashAggregate (40) + +- CometProject (39) + +- CometBroadcastHashJoin (38) + :- CometProject (36) + : +- CometBroadcastHashJoin (35) + : :- CometFilter (33) + : : +- CometScan parquet spark_catalog.default.web_sales (32) + : +- ReusedExchange (34) + +- ReusedExchange (37) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#2)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Condition : isnotnull(ss_customer_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(5) CometProject +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Arguments: [d_date_sk#3, d_date#4], [d_date_sk#3, d_date#4] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: [d_date_sk#3, d_date#4] + +(7) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Right output [2]: [d_date_sk#3, d_date#4] +Arguments: [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#3, d_date#4] +Arguments: [ss_customer_sk#1, d_date#4], [ss_customer_sk#1, d_date#4] + +(9) CometScan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) + +(11) CometBroadcastExchange +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, d_date#4] +Right output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [ss_customer_sk#1], [c_customer_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ss_customer_sk#1, d_date#4, c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_last_name#8, c_first_name#7, d_date#4], [c_last_name#8, c_first_name#7, d_date#4] + +(14) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(15) CometExchange +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(18) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#10)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(19) CometFilter +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_bill_customer_sk#9) + +(20) ReusedExchange [Reuses operator id: 6] +Output [2]: [d_date_sk#11, d_date#12] + +(21) CometBroadcastHashJoin +Left output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Right output [2]: [d_date_sk#11, d_date#12] +Arguments: [cs_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(22) CometProject +Input [4]: [cs_bill_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11, d_date#12] +Arguments: [cs_bill_customer_sk#9, d_date#12], [cs_bill_customer_sk#9, d_date#12] + +(23) ReusedExchange [Reuses operator id: 11] +Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] + +(24) CometBroadcastHashJoin +Left output [2]: [cs_bill_customer_sk#9, d_date#12] +Right output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] +Arguments: [cs_bill_customer_sk#9], [c_customer_sk#13], Inner, BuildRight + +(25) CometProject +Input [5]: [cs_bill_customer_sk#9, d_date#12, c_customer_sk#13, c_first_name#14, c_last_name#15] +Arguments: [c_last_name#15, c_first_name#14, d_date#12], [c_last_name#15, c_first_name#14, d_date#12] + +(26) CometHashAggregate +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] + +(27) CometExchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, d_date#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(28) CometHashAggregate +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] + +(29) ColumnarToRow [codegen id : 1] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] + +(30) BroadcastExchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=3] + +(31) BroadcastHashJoin [codegen id : 3] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)] +Join type: LeftAnti +Join condition: None + +(32) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#17)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(33) CometFilter +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Condition : isnotnull(ws_bill_customer_sk#16) + +(34) ReusedExchange [Reuses operator id: 6] +Output [2]: [d_date_sk#18, d_date#19] + +(35) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Right output [2]: [d_date_sk#18, d_date#19] +Arguments: [ws_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + +(36) CometProject +Input [4]: [ws_bill_customer_sk#16, ws_sold_date_sk#17, d_date_sk#18, d_date#19] +Arguments: [ws_bill_customer_sk#16, d_date#19], [ws_bill_customer_sk#16, d_date#19] + +(37) ReusedExchange [Reuses operator id: 11] +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] + +(38) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#16, d_date#19] +Right output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [ws_bill_customer_sk#16], [c_customer_sk#20], Inner, BuildRight + +(39) CometProject +Input [5]: [ws_bill_customer_sk#16, d_date#19, c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [c_last_name#22, c_first_name#21, d_date#19], [c_last_name#22, c_first_name#21, d_date#19] + +(40) CometHashAggregate +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] + +(41) CometExchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(c_last_name#22, c_first_name#21, d_date#19, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(42) CometHashAggregate +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] + +(43) ColumnarToRow [codegen id : 2] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] + +(44) BroadcastExchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 3] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)] +Join type: LeftAnti +Join condition: None + +(46) Project [codegen id : 3] +Output: [] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(47) HashAggregate [codegen id : 3] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] + +(48) Exchange +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(49) HashAggregate [codegen id : 4] +Input [1]: [count#24] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS count(1)#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..dca3542fe9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q87.native_iceberg_compat/simplified.txt @@ -0,0 +1,59 @@ +WholeStageCodegen (4) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (3) + HashAggregate [count,count] + Project + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + ColumnarToRow + InputAdapter + CometHashAggregate [c_last_name,c_first_name,d_date] + CometExchange [c_last_name,c_first_name,d_date] #2 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometProject [c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [ss_customer_sk,d_date,c_customer_sk,c_first_name,c_last_name] + CometProject [ss_customer_sk,d_date] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_customer_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #3 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name] #4 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [c_last_name,c_first_name,d_date] + CometExchange [c_last_name,c_first_name,d_date] #6 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometProject [c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [cs_bill_customer_sk,d_date,c_customer_sk,c_first_name,c_last_name] + CometProject [cs_bill_customer_sk,d_date] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_sold_date_sk,d_date_sk,d_date] + CometFilter [cs_bill_customer_sk,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk,d_date] #3 + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometHashAggregate [c_last_name,c_first_name,d_date] + CometExchange [c_last_name,c_first_name,d_date] #8 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometProject [c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [ws_bill_customer_sk,d_date,c_customer_sk,c_first_name,c_last_name] + CometProject [ws_bill_customer_sk,d_date] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk,d_date] + CometFilter [ws_bill_customer_sk,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk,d_date] #3 + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_datafusion/explain.txt new file mode 100644 index 0000000000..1917778c34 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_datafusion/explain.txt @@ -0,0 +1,873 @@ +== Physical Plan == +* BroadcastNestedLoopJoin Inner BuildRight (172) +:- * BroadcastNestedLoopJoin Inner BuildRight (151) +: :- * BroadcastNestedLoopJoin Inner BuildRight (130) +: : :- * BroadcastNestedLoopJoin Inner BuildRight (109) +: : : :- * BroadcastNestedLoopJoin Inner BuildRight (88) +: : : : :- * BroadcastNestedLoopJoin Inner BuildRight (67) +: : : : : :- * BroadcastNestedLoopJoin Inner BuildRight (46) +: : : : : : :- * ColumnarToRow (25) +: : : : : : : +- CometHashAggregate (24) +: : : : : : : +- CometExchange (23) +: : : : : : : +- CometHashAggregate (22) +: : : : : : : +- CometProject (21) +: : : : : : : +- CometBroadcastHashJoin (20) +: : : : : : : :- CometProject (15) +: : : : : : : : +- CometBroadcastHashJoin (14) +: : : : : : : : :- CometProject (9) +: : : : : : : : : +- CometBroadcastHashJoin (8) +: : : : : : : : : :- CometProject (3) +: : : : : : : : : : +- CometFilter (2) +: : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) +: : : : : : : : : +- CometBroadcastExchange (7) +: : : : : : : : : +- CometProject (6) +: : : : : : : : : +- CometFilter (5) +: : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (4) +: : : : : : : : +- CometBroadcastExchange (13) +: : : : : : : : +- CometProject (12) +: : : : : : : : +- CometFilter (11) +: : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (10) +: : : : : : : +- CometBroadcastExchange (19) +: : : : : : : +- CometProject (18) +: : : : : : : +- CometFilter (17) +: : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (16) +: : : : : : +- BroadcastExchange (45) +: : : : : : +- * ColumnarToRow (44) +: : : : : : +- CometHashAggregate (43) +: : : : : : +- CometExchange (42) +: : : : : : +- CometHashAggregate (41) +: : : : : : +- CometProject (40) +: : : : : : +- CometBroadcastHashJoin (39) +: : : : : : :- CometProject (37) +: : : : : : : +- CometBroadcastHashJoin (36) +: : : : : : : :- CometProject (31) +: : : : : : : : +- CometBroadcastHashJoin (30) +: : : : : : : : :- CometProject (28) +: : : : : : : : : +- CometFilter (27) +: : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (26) +: : : : : : : : +- ReusedExchange (29) +: : : : : : : +- CometBroadcastExchange (35) +: : : : : : : +- CometProject (34) +: : : : : : : +- CometFilter (33) +: : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (32) +: : : : : : +- ReusedExchange (38) +: : : : : +- BroadcastExchange (66) +: : : : : +- * ColumnarToRow (65) +: : : : : +- CometHashAggregate (64) +: : : : : +- CometExchange (63) +: : : : : +- CometHashAggregate (62) +: : : : : +- CometProject (61) +: : : : : +- CometBroadcastHashJoin (60) +: : : : : :- CometProject (58) +: : : : : : +- CometBroadcastHashJoin (57) +: : : : : : :- CometProject (52) +: : : : : : : +- CometBroadcastHashJoin (51) +: : : : : : : :- CometProject (49) +: : : : : : : : +- CometFilter (48) +: : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (47) +: : : : : : : +- ReusedExchange (50) +: : : : : : +- CometBroadcastExchange (56) +: : : : : : +- CometProject (55) +: : : : : : +- CometFilter (54) +: : : : : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (53) +: : : : : +- ReusedExchange (59) +: : : : +- BroadcastExchange (87) +: : : : +- * ColumnarToRow (86) +: : : : +- CometHashAggregate (85) +: : : : +- CometExchange (84) +: : : : +- CometHashAggregate (83) +: : : : +- CometProject (82) +: : : : +- CometBroadcastHashJoin (81) +: : : : :- CometProject (79) +: : : : : +- CometBroadcastHashJoin (78) +: : : : : :- CometProject (73) +: : : : : : +- CometBroadcastHashJoin (72) +: : : : : : :- CometProject (70) +: : : : : : : +- CometFilter (69) +: : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (68) +: : : : : : +- ReusedExchange (71) +: : : : : +- CometBroadcastExchange (77) +: : : : : +- CometProject (76) +: : : : : +- CometFilter (75) +: : : : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (74) +: : : : +- ReusedExchange (80) +: : : +- BroadcastExchange (108) +: : : +- * ColumnarToRow (107) +: : : +- CometHashAggregate (106) +: : : +- CometExchange (105) +: : : +- CometHashAggregate (104) +: : : +- CometProject (103) +: : : +- CometBroadcastHashJoin (102) +: : : :- CometProject (100) +: : : : +- CometBroadcastHashJoin (99) +: : : : :- CometProject (94) +: : : : : +- CometBroadcastHashJoin (93) +: : : : : :- CometProject (91) +: : : : : : +- CometFilter (90) +: : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (89) +: : : : : +- ReusedExchange (92) +: : : : +- CometBroadcastExchange (98) +: : : : +- CometProject (97) +: : : : +- CometFilter (96) +: : : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (95) +: : : +- ReusedExchange (101) +: : +- BroadcastExchange (129) +: : +- * ColumnarToRow (128) +: : +- CometHashAggregate (127) +: : +- CometExchange (126) +: : +- CometHashAggregate (125) +: : +- CometProject (124) +: : +- CometBroadcastHashJoin (123) +: : :- CometProject (121) +: : : +- CometBroadcastHashJoin (120) +: : : :- CometProject (115) +: : : : +- CometBroadcastHashJoin (114) +: : : : :- CometProject (112) +: : : : : +- CometFilter (111) +: : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (110) +: : : : +- ReusedExchange (113) +: : : +- CometBroadcastExchange (119) +: : : +- CometProject (118) +: : : +- CometFilter (117) +: : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (116) +: : +- ReusedExchange (122) +: +- BroadcastExchange (150) +: +- * ColumnarToRow (149) +: +- CometHashAggregate (148) +: +- CometExchange (147) +: +- CometHashAggregate (146) +: +- CometProject (145) +: +- CometBroadcastHashJoin (144) +: :- CometProject (142) +: : +- CometBroadcastHashJoin (141) +: : :- CometProject (136) +: : : +- CometBroadcastHashJoin (135) +: : : :- CometProject (133) +: : : : +- CometFilter (132) +: : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (131) +: : : +- ReusedExchange (134) +: : +- CometBroadcastExchange (140) +: : +- CometProject (139) +: : +- CometFilter (138) +: : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (137) +: +- ReusedExchange (143) ++- BroadcastExchange (171) + +- * ColumnarToRow (170) + +- CometHashAggregate (169) + +- CometExchange (168) + +- CometHashAggregate (167) + +- CometProject (166) + +- CometBroadcastHashJoin (165) + :- CometProject (163) + : +- CometBroadcastHashJoin (162) + : :- CometProject (157) + : : +- CometBroadcastHashJoin (156) + : : :- CometProject (154) + : : : +- CometFilter (153) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (152) + : : +- ReusedExchange (155) + : +- CometBroadcastExchange (161) + : +- CometProject (160) + : +- CometFilter (159) + : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (158) + +- ReusedExchange (164) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Arguments: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(3) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Arguments: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3], [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(4) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Arguments: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] + +(5) CometFilter +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Condition : (((((hd_dep_count#6 = 4) AND (hd_vehicle_count#7 <= 6)) OR ((hd_dep_count#6 = 2) AND (hd_vehicle_count#7 <= 4))) OR ((hd_dep_count#6 = 0) AND (hd_vehicle_count#7 <= 2))) AND isnotnull(hd_demo_sk#5)) + +(6) CometProject +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Arguments: [hd_demo_sk#5], [hd_demo_sk#5] + +(7) CometBroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: [hd_demo_sk#5] + +(8) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Right output [1]: [hd_demo_sk#5] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#5], Inner, BuildRight + +(9) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] +Arguments: [ss_sold_time_sk#1, ss_store_sk#3], [ss_sold_time_sk#1, ss_store_sk#3] + +(10) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Arguments: [t_time_sk#8, t_hour#9, t_minute#10] + +(11) CometFilter +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 8)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(12) CometProject +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Arguments: [t_time_sk#8], [t_time_sk#8] + +(13) CometBroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: [t_time_sk#8] + +(14) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Right output [1]: [t_time_sk#8] +Arguments: [ss_sold_time_sk#1], [t_time_sk#8], Inner, BuildRight + +(15) CometProject +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] +Arguments: [ss_store_sk#3], [ss_store_sk#3] + +(16) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#11, s_store_name#12] +Arguments: [s_store_sk#11, s_store_name#12] + +(17) CometFilter +Input [2]: [s_store_sk#11, s_store_name#12] +Condition : ((isnotnull(s_store_name#12) AND (s_store_name#12 = ese)) AND isnotnull(s_store_sk#11)) + +(18) CometProject +Input [2]: [s_store_sk#11, s_store_name#12] +Arguments: [s_store_sk#11], [s_store_sk#11] + +(19) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(20) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#3] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#3], [s_store_sk#11], Inner, BuildRight + +(21) CometProject +Input [2]: [ss_store_sk#3, s_store_sk#11] + +(22) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(23) CometExchange +Input [1]: [count#13] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [1]: [count#13] +Keys: [] +Functions [1]: [count(1)] + +(25) ColumnarToRow [codegen id : 8] +Input [1]: [h8_30_to_9#14] + +(26) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] +Arguments: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] + +(27) CometFilter +Input [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] +Condition : ((isnotnull(ss_hdemo_sk#16) AND isnotnull(ss_sold_time_sk#15)) AND isnotnull(ss_store_sk#17)) + +(28) CometProject +Input [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] +Arguments: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17], [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17] + +(29) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#19] + +(30) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17] +Right output [1]: [hd_demo_sk#19] +Arguments: [ss_hdemo_sk#16], [hd_demo_sk#19], Inner, BuildRight + +(31) CometProject +Input [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, hd_demo_sk#19] +Arguments: [ss_sold_time_sk#15, ss_store_sk#17], [ss_sold_time_sk#15, ss_store_sk#17] + +(32) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#20, t_hour#21, t_minute#22] +Arguments: [t_time_sk#20, t_hour#21, t_minute#22] + +(33) CometFilter +Input [3]: [t_time_sk#20, t_hour#21, t_minute#22] +Condition : ((((isnotnull(t_hour#21) AND isnotnull(t_minute#22)) AND (t_hour#21 = 9)) AND (t_minute#22 < 30)) AND isnotnull(t_time_sk#20)) + +(34) CometProject +Input [3]: [t_time_sk#20, t_hour#21, t_minute#22] +Arguments: [t_time_sk#20], [t_time_sk#20] + +(35) CometBroadcastExchange +Input [1]: [t_time_sk#20] +Arguments: [t_time_sk#20] + +(36) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#15, ss_store_sk#17] +Right output [1]: [t_time_sk#20] +Arguments: [ss_sold_time_sk#15], [t_time_sk#20], Inner, BuildRight + +(37) CometProject +Input [3]: [ss_sold_time_sk#15, ss_store_sk#17, t_time_sk#20] +Arguments: [ss_store_sk#17], [ss_store_sk#17] + +(38) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#23] + +(39) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#17] +Right output [1]: [s_store_sk#23] +Arguments: [ss_store_sk#17], [s_store_sk#23], Inner, BuildRight + +(40) CometProject +Input [2]: [ss_store_sk#17, s_store_sk#23] + +(41) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(42) CometExchange +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(43) CometHashAggregate +Input [1]: [count#24] +Keys: [] +Functions [1]: [count(1)] + +(44) ColumnarToRow [codegen id : 1] +Input [1]: [h9_to_9_30#25] + +(45) BroadcastExchange +Input [1]: [h9_to_9_30#25] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(46) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(47) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] +Arguments: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] + +(48) CometFilter +Input [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] +Condition : ((isnotnull(ss_hdemo_sk#27) AND isnotnull(ss_sold_time_sk#26)) AND isnotnull(ss_store_sk#28)) + +(49) CometProject +Input [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] +Arguments: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28], [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28] + +(50) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#30] + +(51) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28] +Right output [1]: [hd_demo_sk#30] +Arguments: [ss_hdemo_sk#27], [hd_demo_sk#30], Inner, BuildRight + +(52) CometProject +Input [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, hd_demo_sk#30] +Arguments: [ss_sold_time_sk#26, ss_store_sk#28], [ss_sold_time_sk#26, ss_store_sk#28] + +(53) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [t_time_sk#31, t_hour#32, t_minute#33] + +(54) CometFilter +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Condition : ((((isnotnull(t_hour#32) AND isnotnull(t_minute#33)) AND (t_hour#32 = 9)) AND (t_minute#33 >= 30)) AND isnotnull(t_time_sk#31)) + +(55) CometProject +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [t_time_sk#31], [t_time_sk#31] + +(56) CometBroadcastExchange +Input [1]: [t_time_sk#31] +Arguments: [t_time_sk#31] + +(57) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#26, ss_store_sk#28] +Right output [1]: [t_time_sk#31] +Arguments: [ss_sold_time_sk#26], [t_time_sk#31], Inner, BuildRight + +(58) CometProject +Input [3]: [ss_sold_time_sk#26, ss_store_sk#28, t_time_sk#31] +Arguments: [ss_store_sk#28], [ss_store_sk#28] + +(59) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#34] + +(60) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#28] +Right output [1]: [s_store_sk#34] +Arguments: [ss_store_sk#28], [s_store_sk#34], Inner, BuildRight + +(61) CometProject +Input [2]: [ss_store_sk#28, s_store_sk#34] + +(62) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(63) CometExchange +Input [1]: [count#35] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(64) CometHashAggregate +Input [1]: [count#35] +Keys: [] +Functions [1]: [count(1)] + +(65) ColumnarToRow [codegen id : 2] +Input [1]: [h9_30_to_10#36] + +(66) BroadcastExchange +Input [1]: [h9_30_to_10#36] +Arguments: IdentityBroadcastMode, [plan_id=5] + +(67) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(68) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Arguments: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] + +(69) CometFilter +Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Condition : ((isnotnull(ss_hdemo_sk#38) AND isnotnull(ss_sold_time_sk#37)) AND isnotnull(ss_store_sk#39)) + +(70) CometProject +Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Arguments: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39], [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39] + +(71) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#41] + +(72) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39] +Right output [1]: [hd_demo_sk#41] +Arguments: [ss_hdemo_sk#38], [hd_demo_sk#41], Inner, BuildRight + +(73) CometProject +Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, hd_demo_sk#41] +Arguments: [ss_sold_time_sk#37, ss_store_sk#39], [ss_sold_time_sk#37, ss_store_sk#39] + +(74) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Arguments: [t_time_sk#42, t_hour#43, t_minute#44] + +(75) CometFilter +Input [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Condition : ((((isnotnull(t_hour#43) AND isnotnull(t_minute#44)) AND (t_hour#43 = 10)) AND (t_minute#44 < 30)) AND isnotnull(t_time_sk#42)) + +(76) CometProject +Input [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Arguments: [t_time_sk#42], [t_time_sk#42] + +(77) CometBroadcastExchange +Input [1]: [t_time_sk#42] +Arguments: [t_time_sk#42] + +(78) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#37, ss_store_sk#39] +Right output [1]: [t_time_sk#42] +Arguments: [ss_sold_time_sk#37], [t_time_sk#42], Inner, BuildRight + +(79) CometProject +Input [3]: [ss_sold_time_sk#37, ss_store_sk#39, t_time_sk#42] +Arguments: [ss_store_sk#39], [ss_store_sk#39] + +(80) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#45] + +(81) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#39] +Right output [1]: [s_store_sk#45] +Arguments: [ss_store_sk#39], [s_store_sk#45], Inner, BuildRight + +(82) CometProject +Input [2]: [ss_store_sk#39, s_store_sk#45] + +(83) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(84) CometExchange +Input [1]: [count#46] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(85) CometHashAggregate +Input [1]: [count#46] +Keys: [] +Functions [1]: [count(1)] + +(86) ColumnarToRow [codegen id : 3] +Input [1]: [h10_to_10_30#47] + +(87) BroadcastExchange +Input [1]: [h10_to_10_30#47] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(88) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(89) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] +Arguments: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] + +(90) CometFilter +Input [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] +Condition : ((isnotnull(ss_hdemo_sk#49) AND isnotnull(ss_sold_time_sk#48)) AND isnotnull(ss_store_sk#50)) + +(91) CometProject +Input [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] +Arguments: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50], [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50] + +(92) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#52] + +(93) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50] +Right output [1]: [hd_demo_sk#52] +Arguments: [ss_hdemo_sk#49], [hd_demo_sk#52], Inner, BuildRight + +(94) CometProject +Input [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, hd_demo_sk#52] +Arguments: [ss_sold_time_sk#48, ss_store_sk#50], [ss_sold_time_sk#48, ss_store_sk#50] + +(95) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#53, t_hour#54, t_minute#55] +Arguments: [t_time_sk#53, t_hour#54, t_minute#55] + +(96) CometFilter +Input [3]: [t_time_sk#53, t_hour#54, t_minute#55] +Condition : ((((isnotnull(t_hour#54) AND isnotnull(t_minute#55)) AND (t_hour#54 = 10)) AND (t_minute#55 >= 30)) AND isnotnull(t_time_sk#53)) + +(97) CometProject +Input [3]: [t_time_sk#53, t_hour#54, t_minute#55] +Arguments: [t_time_sk#53], [t_time_sk#53] + +(98) CometBroadcastExchange +Input [1]: [t_time_sk#53] +Arguments: [t_time_sk#53] + +(99) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#48, ss_store_sk#50] +Right output [1]: [t_time_sk#53] +Arguments: [ss_sold_time_sk#48], [t_time_sk#53], Inner, BuildRight + +(100) CometProject +Input [3]: [ss_sold_time_sk#48, ss_store_sk#50, t_time_sk#53] +Arguments: [ss_store_sk#50], [ss_store_sk#50] + +(101) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#56] + +(102) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#50] +Right output [1]: [s_store_sk#56] +Arguments: [ss_store_sk#50], [s_store_sk#56], Inner, BuildRight + +(103) CometProject +Input [2]: [ss_store_sk#50, s_store_sk#56] + +(104) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(105) CometExchange +Input [1]: [count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(106) CometHashAggregate +Input [1]: [count#57] +Keys: [] +Functions [1]: [count(1)] + +(107) ColumnarToRow [codegen id : 4] +Input [1]: [h10_30_to_11#58] + +(108) BroadcastExchange +Input [1]: [h10_30_to_11#58] +Arguments: IdentityBroadcastMode, [plan_id=9] + +(109) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(110) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] +Arguments: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] + +(111) CometFilter +Input [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] +Condition : ((isnotnull(ss_hdemo_sk#60) AND isnotnull(ss_sold_time_sk#59)) AND isnotnull(ss_store_sk#61)) + +(112) CometProject +Input [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] +Arguments: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61], [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61] + +(113) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#63] + +(114) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61] +Right output [1]: [hd_demo_sk#63] +Arguments: [ss_hdemo_sk#60], [hd_demo_sk#63], Inner, BuildRight + +(115) CometProject +Input [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, hd_demo_sk#63] +Arguments: [ss_sold_time_sk#59, ss_store_sk#61], [ss_sold_time_sk#59, ss_store_sk#61] + +(116) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#64, t_hour#65, t_minute#66] +Arguments: [t_time_sk#64, t_hour#65, t_minute#66] + +(117) CometFilter +Input [3]: [t_time_sk#64, t_hour#65, t_minute#66] +Condition : ((((isnotnull(t_hour#65) AND isnotnull(t_minute#66)) AND (t_hour#65 = 11)) AND (t_minute#66 < 30)) AND isnotnull(t_time_sk#64)) + +(118) CometProject +Input [3]: [t_time_sk#64, t_hour#65, t_minute#66] +Arguments: [t_time_sk#64], [t_time_sk#64] + +(119) CometBroadcastExchange +Input [1]: [t_time_sk#64] +Arguments: [t_time_sk#64] + +(120) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#59, ss_store_sk#61] +Right output [1]: [t_time_sk#64] +Arguments: [ss_sold_time_sk#59], [t_time_sk#64], Inner, BuildRight + +(121) CometProject +Input [3]: [ss_sold_time_sk#59, ss_store_sk#61, t_time_sk#64] +Arguments: [ss_store_sk#61], [ss_store_sk#61] + +(122) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#67] + +(123) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#61] +Right output [1]: [s_store_sk#67] +Arguments: [ss_store_sk#61], [s_store_sk#67], Inner, BuildRight + +(124) CometProject +Input [2]: [ss_store_sk#61, s_store_sk#67] + +(125) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(126) CometExchange +Input [1]: [count#68] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=10] + +(127) CometHashAggregate +Input [1]: [count#68] +Keys: [] +Functions [1]: [count(1)] + +(128) ColumnarToRow [codegen id : 5] +Input [1]: [h11_to_11_30#69] + +(129) BroadcastExchange +Input [1]: [h11_to_11_30#69] +Arguments: IdentityBroadcastMode, [plan_id=11] + +(130) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(131) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] +Arguments: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] + +(132) CometFilter +Input [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] +Condition : ((isnotnull(ss_hdemo_sk#71) AND isnotnull(ss_sold_time_sk#70)) AND isnotnull(ss_store_sk#72)) + +(133) CometProject +Input [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] +Arguments: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72], [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72] + +(134) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#74] + +(135) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72] +Right output [1]: [hd_demo_sk#74] +Arguments: [ss_hdemo_sk#71], [hd_demo_sk#74], Inner, BuildRight + +(136) CometProject +Input [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, hd_demo_sk#74] +Arguments: [ss_sold_time_sk#70, ss_store_sk#72], [ss_sold_time_sk#70, ss_store_sk#72] + +(137) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#75, t_hour#76, t_minute#77] +Arguments: [t_time_sk#75, t_hour#76, t_minute#77] + +(138) CometFilter +Input [3]: [t_time_sk#75, t_hour#76, t_minute#77] +Condition : ((((isnotnull(t_hour#76) AND isnotnull(t_minute#77)) AND (t_hour#76 = 11)) AND (t_minute#77 >= 30)) AND isnotnull(t_time_sk#75)) + +(139) CometProject +Input [3]: [t_time_sk#75, t_hour#76, t_minute#77] +Arguments: [t_time_sk#75], [t_time_sk#75] + +(140) CometBroadcastExchange +Input [1]: [t_time_sk#75] +Arguments: [t_time_sk#75] + +(141) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#70, ss_store_sk#72] +Right output [1]: [t_time_sk#75] +Arguments: [ss_sold_time_sk#70], [t_time_sk#75], Inner, BuildRight + +(142) CometProject +Input [3]: [ss_sold_time_sk#70, ss_store_sk#72, t_time_sk#75] +Arguments: [ss_store_sk#72], [ss_store_sk#72] + +(143) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#78] + +(144) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#72] +Right output [1]: [s_store_sk#78] +Arguments: [ss_store_sk#72], [s_store_sk#78], Inner, BuildRight + +(145) CometProject +Input [2]: [ss_store_sk#72, s_store_sk#78] + +(146) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(147) CometExchange +Input [1]: [count#79] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=12] + +(148) CometHashAggregate +Input [1]: [count#79] +Keys: [] +Functions [1]: [count(1)] + +(149) ColumnarToRow [codegen id : 6] +Input [1]: [h11_30_to_12#80] + +(150) BroadcastExchange +Input [1]: [h11_30_to_12#80] +Arguments: IdentityBroadcastMode, [plan_id=13] + +(151) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(152) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] +Arguments: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] + +(153) CometFilter +Input [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] +Condition : ((isnotnull(ss_hdemo_sk#82) AND isnotnull(ss_sold_time_sk#81)) AND isnotnull(ss_store_sk#83)) + +(154) CometProject +Input [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] +Arguments: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83], [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83] + +(155) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#85] + +(156) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83] +Right output [1]: [hd_demo_sk#85] +Arguments: [ss_hdemo_sk#82], [hd_demo_sk#85], Inner, BuildRight + +(157) CometProject +Input [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, hd_demo_sk#85] +Arguments: [ss_sold_time_sk#81, ss_store_sk#83], [ss_sold_time_sk#81, ss_store_sk#83] + +(158) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#86, t_hour#87, t_minute#88] +Arguments: [t_time_sk#86, t_hour#87, t_minute#88] + +(159) CometFilter +Input [3]: [t_time_sk#86, t_hour#87, t_minute#88] +Condition : ((((isnotnull(t_hour#87) AND isnotnull(t_minute#88)) AND (t_hour#87 = 12)) AND (t_minute#88 < 30)) AND isnotnull(t_time_sk#86)) + +(160) CometProject +Input [3]: [t_time_sk#86, t_hour#87, t_minute#88] +Arguments: [t_time_sk#86], [t_time_sk#86] + +(161) CometBroadcastExchange +Input [1]: [t_time_sk#86] +Arguments: [t_time_sk#86] + +(162) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#81, ss_store_sk#83] +Right output [1]: [t_time_sk#86] +Arguments: [ss_sold_time_sk#81], [t_time_sk#86], Inner, BuildRight + +(163) CometProject +Input [3]: [ss_sold_time_sk#81, ss_store_sk#83, t_time_sk#86] +Arguments: [ss_store_sk#83], [ss_store_sk#83] + +(164) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#89] + +(165) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#83] +Right output [1]: [s_store_sk#89] +Arguments: [ss_store_sk#83], [s_store_sk#89], Inner, BuildRight + +(166) CometProject +Input [2]: [ss_store_sk#83, s_store_sk#89] + +(167) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(168) CometExchange +Input [1]: [count#90] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=14] + +(169) CometHashAggregate +Input [1]: [count#90] +Keys: [] +Functions [1]: [count(1)] + +(170) ColumnarToRow [codegen id : 7] +Input [1]: [h12_to_12_30#91] + +(171) BroadcastExchange +Input [1]: [h12_to_12_30#91] +Arguments: IdentityBroadcastMode, [plan_id=15] + +(172) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_datafusion/simplified.txt new file mode 100644 index 0000000000..5acfeba131 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_datafusion/simplified.txt @@ -0,0 +1,195 @@ +WholeStageCodegen (8) + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + ColumnarToRow + InputAdapter + CometHashAggregate [h8_30_to_9,count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometBroadcastExchange [hd_demo_sk] #2 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [t_time_sk] #3 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [h9_to_9_30,count,count(1)] + CometExchange #6 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #7 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometHashAggregate [h9_30_to_10,count,count(1)] + CometExchange #9 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #10 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometHashAggregate [h10_to_10_30,count,count(1)] + CometExchange #12 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #13 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometHashAggregate [h10_30_to_11,count,count(1)] + CometExchange #15 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #16 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometHashAggregate [h11_to_11_30,count,count(1)] + CometExchange #18 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #19 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #20 + WholeStageCodegen (6) + ColumnarToRow + InputAdapter + CometHashAggregate [h11_30_to_12,count,count(1)] + CometExchange #21 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #22 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #23 + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometHashAggregate [h12_to_12_30,count,count(1)] + CometExchange #24 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #25 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..592e23cd27 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_iceberg_compat/explain.txt @@ -0,0 +1,927 @@ +== Physical Plan == +* BroadcastNestedLoopJoin Inner BuildRight (172) +:- * BroadcastNestedLoopJoin Inner BuildRight (151) +: :- * BroadcastNestedLoopJoin Inner BuildRight (130) +: : :- * BroadcastNestedLoopJoin Inner BuildRight (109) +: : : :- * BroadcastNestedLoopJoin Inner BuildRight (88) +: : : : :- * BroadcastNestedLoopJoin Inner BuildRight (67) +: : : : : :- * BroadcastNestedLoopJoin Inner BuildRight (46) +: : : : : : :- * ColumnarToRow (25) +: : : : : : : +- CometHashAggregate (24) +: : : : : : : +- CometExchange (23) +: : : : : : : +- CometHashAggregate (22) +: : : : : : : +- CometProject (21) +: : : : : : : +- CometBroadcastHashJoin (20) +: : : : : : : :- CometProject (15) +: : : : : : : : +- CometBroadcastHashJoin (14) +: : : : : : : : :- CometProject (9) +: : : : : : : : : +- CometBroadcastHashJoin (8) +: : : : : : : : : :- CometProject (3) +: : : : : : : : : : +- CometFilter (2) +: : : : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) +: : : : : : : : : +- CometBroadcastExchange (7) +: : : : : : : : : +- CometProject (6) +: : : : : : : : : +- CometFilter (5) +: : : : : : : : : +- CometScan parquet spark_catalog.default.household_demographics (4) +: : : : : : : : +- CometBroadcastExchange (13) +: : : : : : : : +- CometProject (12) +: : : : : : : : +- CometFilter (11) +: : : : : : : : +- CometScan parquet spark_catalog.default.time_dim (10) +: : : : : : : +- CometBroadcastExchange (19) +: : : : : : : +- CometProject (18) +: : : : : : : +- CometFilter (17) +: : : : : : : +- CometScan parquet spark_catalog.default.store (16) +: : : : : : +- BroadcastExchange (45) +: : : : : : +- * ColumnarToRow (44) +: : : : : : +- CometHashAggregate (43) +: : : : : : +- CometExchange (42) +: : : : : : +- CometHashAggregate (41) +: : : : : : +- CometProject (40) +: : : : : : +- CometBroadcastHashJoin (39) +: : : : : : :- CometProject (37) +: : : : : : : +- CometBroadcastHashJoin (36) +: : : : : : : :- CometProject (31) +: : : : : : : : +- CometBroadcastHashJoin (30) +: : : : : : : : :- CometProject (28) +: : : : : : : : : +- CometFilter (27) +: : : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (26) +: : : : : : : : +- ReusedExchange (29) +: : : : : : : +- CometBroadcastExchange (35) +: : : : : : : +- CometProject (34) +: : : : : : : +- CometFilter (33) +: : : : : : : +- CometScan parquet spark_catalog.default.time_dim (32) +: : : : : : +- ReusedExchange (38) +: : : : : +- BroadcastExchange (66) +: : : : : +- * ColumnarToRow (65) +: : : : : +- CometHashAggregate (64) +: : : : : +- CometExchange (63) +: : : : : +- CometHashAggregate (62) +: : : : : +- CometProject (61) +: : : : : +- CometBroadcastHashJoin (60) +: : : : : :- CometProject (58) +: : : : : : +- CometBroadcastHashJoin (57) +: : : : : : :- CometProject (52) +: : : : : : : +- CometBroadcastHashJoin (51) +: : : : : : : :- CometProject (49) +: : : : : : : : +- CometFilter (48) +: : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (47) +: : : : : : : +- ReusedExchange (50) +: : : : : : +- CometBroadcastExchange (56) +: : : : : : +- CometProject (55) +: : : : : : +- CometFilter (54) +: : : : : : +- CometScan parquet spark_catalog.default.time_dim (53) +: : : : : +- ReusedExchange (59) +: : : : +- BroadcastExchange (87) +: : : : +- * ColumnarToRow (86) +: : : : +- CometHashAggregate (85) +: : : : +- CometExchange (84) +: : : : +- CometHashAggregate (83) +: : : : +- CometProject (82) +: : : : +- CometBroadcastHashJoin (81) +: : : : :- CometProject (79) +: : : : : +- CometBroadcastHashJoin (78) +: : : : : :- CometProject (73) +: : : : : : +- CometBroadcastHashJoin (72) +: : : : : : :- CometProject (70) +: : : : : : : +- CometFilter (69) +: : : : : : : +- CometScan parquet spark_catalog.default.store_sales (68) +: : : : : : +- ReusedExchange (71) +: : : : : +- CometBroadcastExchange (77) +: : : : : +- CometProject (76) +: : : : : +- CometFilter (75) +: : : : : +- CometScan parquet spark_catalog.default.time_dim (74) +: : : : +- ReusedExchange (80) +: : : +- BroadcastExchange (108) +: : : +- * ColumnarToRow (107) +: : : +- CometHashAggregate (106) +: : : +- CometExchange (105) +: : : +- CometHashAggregate (104) +: : : +- CometProject (103) +: : : +- CometBroadcastHashJoin (102) +: : : :- CometProject (100) +: : : : +- CometBroadcastHashJoin (99) +: : : : :- CometProject (94) +: : : : : +- CometBroadcastHashJoin (93) +: : : : : :- CometProject (91) +: : : : : : +- CometFilter (90) +: : : : : : +- CometScan parquet spark_catalog.default.store_sales (89) +: : : : : +- ReusedExchange (92) +: : : : +- CometBroadcastExchange (98) +: : : : +- CometProject (97) +: : : : +- CometFilter (96) +: : : : +- CometScan parquet spark_catalog.default.time_dim (95) +: : : +- ReusedExchange (101) +: : +- BroadcastExchange (129) +: : +- * ColumnarToRow (128) +: : +- CometHashAggregate (127) +: : +- CometExchange (126) +: : +- CometHashAggregate (125) +: : +- CometProject (124) +: : +- CometBroadcastHashJoin (123) +: : :- CometProject (121) +: : : +- CometBroadcastHashJoin (120) +: : : :- CometProject (115) +: : : : +- CometBroadcastHashJoin (114) +: : : : :- CometProject (112) +: : : : : +- CometFilter (111) +: : : : : +- CometScan parquet spark_catalog.default.store_sales (110) +: : : : +- ReusedExchange (113) +: : : +- CometBroadcastExchange (119) +: : : +- CometProject (118) +: : : +- CometFilter (117) +: : : +- CometScan parquet spark_catalog.default.time_dim (116) +: : +- ReusedExchange (122) +: +- BroadcastExchange (150) +: +- * ColumnarToRow (149) +: +- CometHashAggregate (148) +: +- CometExchange (147) +: +- CometHashAggregate (146) +: +- CometProject (145) +: +- CometBroadcastHashJoin (144) +: :- CometProject (142) +: : +- CometBroadcastHashJoin (141) +: : :- CometProject (136) +: : : +- CometBroadcastHashJoin (135) +: : : :- CometProject (133) +: : : : +- CometFilter (132) +: : : : +- CometScan parquet spark_catalog.default.store_sales (131) +: : : +- ReusedExchange (134) +: : +- CometBroadcastExchange (140) +: : +- CometProject (139) +: : +- CometFilter (138) +: : +- CometScan parquet spark_catalog.default.time_dim (137) +: +- ReusedExchange (143) ++- BroadcastExchange (171) + +- * ColumnarToRow (170) + +- CometHashAggregate (169) + +- CometExchange (168) + +- CometHashAggregate (167) + +- CometProject (166) + +- CometBroadcastHashJoin (165) + :- CometProject (163) + : +- CometBroadcastHashJoin (162) + : :- CometProject (157) + : : +- CometBroadcastHashJoin (156) + : : :- CometProject (154) + : : : +- CometFilter (153) + : : : +- CometScan parquet spark_catalog.default.store_sales (152) + : : +- ReusedExchange (155) + : +- CometBroadcastExchange (161) + : +- CometProject (160) + : +- CometFilter (159) + : +- CometScan parquet spark_catalog.default.time_dim (158) + +- ReusedExchange (164) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(3) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Arguments: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3], [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(4) CometScan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,2),LessThanOrEqual(hd_vehicle_count,4))),And(EqualTo(hd_dep_count,0),LessThanOrEqual(hd_vehicle_count,2))), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Condition : (((((hd_dep_count#6 = 4) AND (hd_vehicle_count#7 <= 6)) OR ((hd_dep_count#6 = 2) AND (hd_vehicle_count#7 <= 4))) OR ((hd_dep_count#6 = 0) AND (hd_vehicle_count#7 <= 2))) AND isnotnull(hd_demo_sk#5)) + +(6) CometProject +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Arguments: [hd_demo_sk#5], [hd_demo_sk#5] + +(7) CometBroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: [hd_demo_sk#5] + +(8) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Right output [1]: [hd_demo_sk#5] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#5], Inner, BuildRight + +(9) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] +Arguments: [ss_sold_time_sk#1, ss_store_sk#3], [ss_sold_time_sk#1, ss_store_sk#3] + +(10) CometScan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(11) CometFilter +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 8)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(12) CometProject +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Arguments: [t_time_sk#8], [t_time_sk#8] + +(13) CometBroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: [t_time_sk#8] + +(14) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Right output [1]: [t_time_sk#8] +Arguments: [ss_sold_time_sk#1], [t_time_sk#8], Inner, BuildRight + +(15) CometProject +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] +Arguments: [ss_store_sk#3], [ss_store_sk#3] + +(16) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_store_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(17) CometFilter +Input [2]: [s_store_sk#11, s_store_name#12] +Condition : ((isnotnull(s_store_name#12) AND (s_store_name#12 = ese)) AND isnotnull(s_store_sk#11)) + +(18) CometProject +Input [2]: [s_store_sk#11, s_store_name#12] +Arguments: [s_store_sk#11], [s_store_sk#11] + +(19) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(20) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#3] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#3], [s_store_sk#11], Inner, BuildRight + +(21) CometProject +Input [2]: [ss_store_sk#3, s_store_sk#11] + +(22) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(23) CometExchange +Input [1]: [count#13] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [1]: [count#13] +Keys: [] +Functions [1]: [count(1)] + +(25) ColumnarToRow [codegen id : 8] +Input [1]: [h8_30_to_9#14] + +(26) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(27) CometFilter +Input [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] +Condition : ((isnotnull(ss_hdemo_sk#16) AND isnotnull(ss_sold_time_sk#15)) AND isnotnull(ss_store_sk#17)) + +(28) CometProject +Input [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] +Arguments: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17], [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17] + +(29) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#19] + +(30) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17] +Right output [1]: [hd_demo_sk#19] +Arguments: [ss_hdemo_sk#16], [hd_demo_sk#19], Inner, BuildRight + +(31) CometProject +Input [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, hd_demo_sk#19] +Arguments: [ss_sold_time_sk#15, ss_store_sk#17], [ss_sold_time_sk#15, ss_store_sk#17] + +(32) CometScan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#20, t_hour#21, t_minute#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(33) CometFilter +Input [3]: [t_time_sk#20, t_hour#21, t_minute#22] +Condition : ((((isnotnull(t_hour#21) AND isnotnull(t_minute#22)) AND (t_hour#21 = 9)) AND (t_minute#22 < 30)) AND isnotnull(t_time_sk#20)) + +(34) CometProject +Input [3]: [t_time_sk#20, t_hour#21, t_minute#22] +Arguments: [t_time_sk#20], [t_time_sk#20] + +(35) CometBroadcastExchange +Input [1]: [t_time_sk#20] +Arguments: [t_time_sk#20] + +(36) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#15, ss_store_sk#17] +Right output [1]: [t_time_sk#20] +Arguments: [ss_sold_time_sk#15], [t_time_sk#20], Inner, BuildRight + +(37) CometProject +Input [3]: [ss_sold_time_sk#15, ss_store_sk#17, t_time_sk#20] +Arguments: [ss_store_sk#17], [ss_store_sk#17] + +(38) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#23] + +(39) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#17] +Right output [1]: [s_store_sk#23] +Arguments: [ss_store_sk#17], [s_store_sk#23], Inner, BuildRight + +(40) CometProject +Input [2]: [ss_store_sk#17, s_store_sk#23] + +(41) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(42) CometExchange +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(43) CometHashAggregate +Input [1]: [count#24] +Keys: [] +Functions [1]: [count(1)] + +(44) ColumnarToRow [codegen id : 1] +Input [1]: [h9_to_9_30#25] + +(45) BroadcastExchange +Input [1]: [h9_to_9_30#25] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(46) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(47) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(48) CometFilter +Input [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] +Condition : ((isnotnull(ss_hdemo_sk#27) AND isnotnull(ss_sold_time_sk#26)) AND isnotnull(ss_store_sk#28)) + +(49) CometProject +Input [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] +Arguments: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28], [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28] + +(50) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#30] + +(51) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28] +Right output [1]: [hd_demo_sk#30] +Arguments: [ss_hdemo_sk#27], [hd_demo_sk#30], Inner, BuildRight + +(52) CometProject +Input [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, hd_demo_sk#30] +Arguments: [ss_sold_time_sk#26, ss_store_sk#28], [ss_sold_time_sk#26, ss_store_sk#28] + +(53) CometScan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(54) CometFilter +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Condition : ((((isnotnull(t_hour#32) AND isnotnull(t_minute#33)) AND (t_hour#32 = 9)) AND (t_minute#33 >= 30)) AND isnotnull(t_time_sk#31)) + +(55) CometProject +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [t_time_sk#31], [t_time_sk#31] + +(56) CometBroadcastExchange +Input [1]: [t_time_sk#31] +Arguments: [t_time_sk#31] + +(57) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#26, ss_store_sk#28] +Right output [1]: [t_time_sk#31] +Arguments: [ss_sold_time_sk#26], [t_time_sk#31], Inner, BuildRight + +(58) CometProject +Input [3]: [ss_sold_time_sk#26, ss_store_sk#28, t_time_sk#31] +Arguments: [ss_store_sk#28], [ss_store_sk#28] + +(59) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#34] + +(60) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#28] +Right output [1]: [s_store_sk#34] +Arguments: [ss_store_sk#28], [s_store_sk#34], Inner, BuildRight + +(61) CometProject +Input [2]: [ss_store_sk#28, s_store_sk#34] + +(62) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(63) CometExchange +Input [1]: [count#35] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(64) CometHashAggregate +Input [1]: [count#35] +Keys: [] +Functions [1]: [count(1)] + +(65) ColumnarToRow [codegen id : 2] +Input [1]: [h9_30_to_10#36] + +(66) BroadcastExchange +Input [1]: [h9_30_to_10#36] +Arguments: IdentityBroadcastMode, [plan_id=5] + +(67) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(68) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(69) CometFilter +Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Condition : ((isnotnull(ss_hdemo_sk#38) AND isnotnull(ss_sold_time_sk#37)) AND isnotnull(ss_store_sk#39)) + +(70) CometProject +Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Arguments: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39], [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39] + +(71) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#41] + +(72) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39] +Right output [1]: [hd_demo_sk#41] +Arguments: [ss_hdemo_sk#38], [hd_demo_sk#41], Inner, BuildRight + +(73) CometProject +Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, hd_demo_sk#41] +Arguments: [ss_sold_time_sk#37, ss_store_sk#39], [ss_sold_time_sk#37, ss_store_sk#39] + +(74) CometScan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(75) CometFilter +Input [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Condition : ((((isnotnull(t_hour#43) AND isnotnull(t_minute#44)) AND (t_hour#43 = 10)) AND (t_minute#44 < 30)) AND isnotnull(t_time_sk#42)) + +(76) CometProject +Input [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Arguments: [t_time_sk#42], [t_time_sk#42] + +(77) CometBroadcastExchange +Input [1]: [t_time_sk#42] +Arguments: [t_time_sk#42] + +(78) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#37, ss_store_sk#39] +Right output [1]: [t_time_sk#42] +Arguments: [ss_sold_time_sk#37], [t_time_sk#42], Inner, BuildRight + +(79) CometProject +Input [3]: [ss_sold_time_sk#37, ss_store_sk#39, t_time_sk#42] +Arguments: [ss_store_sk#39], [ss_store_sk#39] + +(80) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#45] + +(81) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#39] +Right output [1]: [s_store_sk#45] +Arguments: [ss_store_sk#39], [s_store_sk#45], Inner, BuildRight + +(82) CometProject +Input [2]: [ss_store_sk#39, s_store_sk#45] + +(83) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(84) CometExchange +Input [1]: [count#46] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(85) CometHashAggregate +Input [1]: [count#46] +Keys: [] +Functions [1]: [count(1)] + +(86) ColumnarToRow [codegen id : 3] +Input [1]: [h10_to_10_30#47] + +(87) BroadcastExchange +Input [1]: [h10_to_10_30#47] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(88) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(89) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(90) CometFilter +Input [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] +Condition : ((isnotnull(ss_hdemo_sk#49) AND isnotnull(ss_sold_time_sk#48)) AND isnotnull(ss_store_sk#50)) + +(91) CometProject +Input [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] +Arguments: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50], [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50] + +(92) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#52] + +(93) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50] +Right output [1]: [hd_demo_sk#52] +Arguments: [ss_hdemo_sk#49], [hd_demo_sk#52], Inner, BuildRight + +(94) CometProject +Input [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, hd_demo_sk#52] +Arguments: [ss_sold_time_sk#48, ss_store_sk#50], [ss_sold_time_sk#48, ss_store_sk#50] + +(95) CometScan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#53, t_hour#54, t_minute#55] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(96) CometFilter +Input [3]: [t_time_sk#53, t_hour#54, t_minute#55] +Condition : ((((isnotnull(t_hour#54) AND isnotnull(t_minute#55)) AND (t_hour#54 = 10)) AND (t_minute#55 >= 30)) AND isnotnull(t_time_sk#53)) + +(97) CometProject +Input [3]: [t_time_sk#53, t_hour#54, t_minute#55] +Arguments: [t_time_sk#53], [t_time_sk#53] + +(98) CometBroadcastExchange +Input [1]: [t_time_sk#53] +Arguments: [t_time_sk#53] + +(99) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#48, ss_store_sk#50] +Right output [1]: [t_time_sk#53] +Arguments: [ss_sold_time_sk#48], [t_time_sk#53], Inner, BuildRight + +(100) CometProject +Input [3]: [ss_sold_time_sk#48, ss_store_sk#50, t_time_sk#53] +Arguments: [ss_store_sk#50], [ss_store_sk#50] + +(101) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#56] + +(102) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#50] +Right output [1]: [s_store_sk#56] +Arguments: [ss_store_sk#50], [s_store_sk#56], Inner, BuildRight + +(103) CometProject +Input [2]: [ss_store_sk#50, s_store_sk#56] + +(104) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(105) CometExchange +Input [1]: [count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(106) CometHashAggregate +Input [1]: [count#57] +Keys: [] +Functions [1]: [count(1)] + +(107) ColumnarToRow [codegen id : 4] +Input [1]: [h10_30_to_11#58] + +(108) BroadcastExchange +Input [1]: [h10_30_to_11#58] +Arguments: IdentityBroadcastMode, [plan_id=9] + +(109) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(110) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(111) CometFilter +Input [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] +Condition : ((isnotnull(ss_hdemo_sk#60) AND isnotnull(ss_sold_time_sk#59)) AND isnotnull(ss_store_sk#61)) + +(112) CometProject +Input [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] +Arguments: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61], [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61] + +(113) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#63] + +(114) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61] +Right output [1]: [hd_demo_sk#63] +Arguments: [ss_hdemo_sk#60], [hd_demo_sk#63], Inner, BuildRight + +(115) CometProject +Input [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, hd_demo_sk#63] +Arguments: [ss_sold_time_sk#59, ss_store_sk#61], [ss_sold_time_sk#59, ss_store_sk#61] + +(116) CometScan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#64, t_hour#65, t_minute#66] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(117) CometFilter +Input [3]: [t_time_sk#64, t_hour#65, t_minute#66] +Condition : ((((isnotnull(t_hour#65) AND isnotnull(t_minute#66)) AND (t_hour#65 = 11)) AND (t_minute#66 < 30)) AND isnotnull(t_time_sk#64)) + +(118) CometProject +Input [3]: [t_time_sk#64, t_hour#65, t_minute#66] +Arguments: [t_time_sk#64], [t_time_sk#64] + +(119) CometBroadcastExchange +Input [1]: [t_time_sk#64] +Arguments: [t_time_sk#64] + +(120) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#59, ss_store_sk#61] +Right output [1]: [t_time_sk#64] +Arguments: [ss_sold_time_sk#59], [t_time_sk#64], Inner, BuildRight + +(121) CometProject +Input [3]: [ss_sold_time_sk#59, ss_store_sk#61, t_time_sk#64] +Arguments: [ss_store_sk#61], [ss_store_sk#61] + +(122) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#67] + +(123) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#61] +Right output [1]: [s_store_sk#67] +Arguments: [ss_store_sk#61], [s_store_sk#67], Inner, BuildRight + +(124) CometProject +Input [2]: [ss_store_sk#61, s_store_sk#67] + +(125) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(126) CometExchange +Input [1]: [count#68] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=10] + +(127) CometHashAggregate +Input [1]: [count#68] +Keys: [] +Functions [1]: [count(1)] + +(128) ColumnarToRow [codegen id : 5] +Input [1]: [h11_to_11_30#69] + +(129) BroadcastExchange +Input [1]: [h11_to_11_30#69] +Arguments: IdentityBroadcastMode, [plan_id=11] + +(130) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(131) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(132) CometFilter +Input [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] +Condition : ((isnotnull(ss_hdemo_sk#71) AND isnotnull(ss_sold_time_sk#70)) AND isnotnull(ss_store_sk#72)) + +(133) CometProject +Input [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] +Arguments: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72], [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72] + +(134) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#74] + +(135) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72] +Right output [1]: [hd_demo_sk#74] +Arguments: [ss_hdemo_sk#71], [hd_demo_sk#74], Inner, BuildRight + +(136) CometProject +Input [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, hd_demo_sk#74] +Arguments: [ss_sold_time_sk#70, ss_store_sk#72], [ss_sold_time_sk#70, ss_store_sk#72] + +(137) CometScan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#75, t_hour#76, t_minute#77] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(138) CometFilter +Input [3]: [t_time_sk#75, t_hour#76, t_minute#77] +Condition : ((((isnotnull(t_hour#76) AND isnotnull(t_minute#77)) AND (t_hour#76 = 11)) AND (t_minute#77 >= 30)) AND isnotnull(t_time_sk#75)) + +(139) CometProject +Input [3]: [t_time_sk#75, t_hour#76, t_minute#77] +Arguments: [t_time_sk#75], [t_time_sk#75] + +(140) CometBroadcastExchange +Input [1]: [t_time_sk#75] +Arguments: [t_time_sk#75] + +(141) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#70, ss_store_sk#72] +Right output [1]: [t_time_sk#75] +Arguments: [ss_sold_time_sk#70], [t_time_sk#75], Inner, BuildRight + +(142) CometProject +Input [3]: [ss_sold_time_sk#70, ss_store_sk#72, t_time_sk#75] +Arguments: [ss_store_sk#72], [ss_store_sk#72] + +(143) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#78] + +(144) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#72] +Right output [1]: [s_store_sk#78] +Arguments: [ss_store_sk#72], [s_store_sk#78], Inner, BuildRight + +(145) CometProject +Input [2]: [ss_store_sk#72, s_store_sk#78] + +(146) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(147) CometExchange +Input [1]: [count#79] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=12] + +(148) CometHashAggregate +Input [1]: [count#79] +Keys: [] +Functions [1]: [count(1)] + +(149) ColumnarToRow [codegen id : 6] +Input [1]: [h11_30_to_12#80] + +(150) BroadcastExchange +Input [1]: [h11_30_to_12#80] +Arguments: IdentityBroadcastMode, [plan_id=13] + +(151) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(152) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(153) CometFilter +Input [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] +Condition : ((isnotnull(ss_hdemo_sk#82) AND isnotnull(ss_sold_time_sk#81)) AND isnotnull(ss_store_sk#83)) + +(154) CometProject +Input [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] +Arguments: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83], [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83] + +(155) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#85] + +(156) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83] +Right output [1]: [hd_demo_sk#85] +Arguments: [ss_hdemo_sk#82], [hd_demo_sk#85], Inner, BuildRight + +(157) CometProject +Input [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, hd_demo_sk#85] +Arguments: [ss_sold_time_sk#81, ss_store_sk#83], [ss_sold_time_sk#81, ss_store_sk#83] + +(158) CometScan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#86, t_hour#87, t_minute#88] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(159) CometFilter +Input [3]: [t_time_sk#86, t_hour#87, t_minute#88] +Condition : ((((isnotnull(t_hour#87) AND isnotnull(t_minute#88)) AND (t_hour#87 = 12)) AND (t_minute#88 < 30)) AND isnotnull(t_time_sk#86)) + +(160) CometProject +Input [3]: [t_time_sk#86, t_hour#87, t_minute#88] +Arguments: [t_time_sk#86], [t_time_sk#86] + +(161) CometBroadcastExchange +Input [1]: [t_time_sk#86] +Arguments: [t_time_sk#86] + +(162) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#81, ss_store_sk#83] +Right output [1]: [t_time_sk#86] +Arguments: [ss_sold_time_sk#81], [t_time_sk#86], Inner, BuildRight + +(163) CometProject +Input [3]: [ss_sold_time_sk#81, ss_store_sk#83, t_time_sk#86] +Arguments: [ss_store_sk#83], [ss_store_sk#83] + +(164) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#89] + +(165) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#83] +Right output [1]: [s_store_sk#89] +Arguments: [ss_store_sk#83], [s_store_sk#89], Inner, BuildRight + +(166) CometProject +Input [2]: [ss_store_sk#83, s_store_sk#89] + +(167) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(168) CometExchange +Input [1]: [count#90] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=14] + +(169) CometHashAggregate +Input [1]: [count#90] +Keys: [] +Functions [1]: [count(1)] + +(170) ColumnarToRow [codegen id : 7] +Input [1]: [h12_to_12_30#91] + +(171) BroadcastExchange +Input [1]: [h12_to_12_30#91] +Arguments: IdentityBroadcastMode, [plan_id=15] + +(172) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..1bb61b6c92 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q88.native_iceberg_compat/simplified.txt @@ -0,0 +1,195 @@ +WholeStageCodegen (8) + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + ColumnarToRow + InputAdapter + CometHashAggregate [h8_30_to_9,count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometBroadcastExchange [hd_demo_sk] #2 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [t_time_sk] #3 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_store_name] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [h9_to_9_30,count,count(1)] + CometExchange #6 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #7 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometHashAggregate [h9_30_to_10,count,count(1)] + CometExchange #9 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #10 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometHashAggregate [h10_to_10_30,count,count(1)] + CometExchange #12 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #13 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometHashAggregate [h10_30_to_11,count,count(1)] + CometExchange #15 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #16 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometHashAggregate [h11_to_11_30,count,count(1)] + CometExchange #18 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #19 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #20 + WholeStageCodegen (6) + ColumnarToRow + InputAdapter + CometHashAggregate [h11_30_to_12,count,count(1)] + CometExchange #21 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #22 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #23 + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometHashAggregate [h12_to_12_30,count,count(1)] + CometExchange #24 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #25 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_datafusion/explain.txt new file mode 100644 index 0000000000..888fbc116c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_datafusion/explain.txt @@ -0,0 +1,147 @@ +== Physical Plan == +TakeOrderedAndProject (28) ++- * Project (27) + +- * Filter (26) + +- Window (25) + +- * ColumnarToRow (24) + +- CometSort (23) + +- CometExchange (22) + +- CometHashAggregate (21) + +- CometExchange (20) + +- CometHashAggregate (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : +- CometBroadcastExchange (11) + : +- CometProject (10) + : +- CometFilter (9) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`store` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Arguments: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] + +(2) CometFilter +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Condition : (((i_category#4 IN (Books ,Electronics ,Sports ) AND i_class#3 IN (computers ,stereo ,football )) OR (i_category#4 IN (Men ,Jewelry ,Women ) AND i_class#3 IN (shirts ,birdal ,dresses ))) AND isnotnull(i_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(4) CometFilter +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Condition : (isnotnull(ss_item_sk#5) AND isnotnull(ss_store_sk#6)) + +(5) CometBroadcastExchange +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(6) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Right output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [i_item_sk#1], [ss_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8], [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9, d_year#10, d_moy#11] + +(9) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((isnotnull(d_year#10) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(10) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9, d_moy#11], [d_date_sk#9, d_moy#11] + +(11) CometBroadcastExchange +Input [2]: [d_date_sk#9, d_moy#11] +Arguments: [d_date_sk#9, d_moy#11] + +(12) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Right output [2]: [d_date_sk#9, d_moy#11] +Arguments: [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(13) CometProject +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#9, d_moy#11] +Arguments: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11], [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] + +(14) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [s_store_sk#12, s_store_name#13, s_company_name#14] + +(15) CometFilter +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Condition : isnotnull(s_store_sk#12) + +(16) CometBroadcastExchange +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [s_store_sk#12, s_store_name#13, s_company_name#14] + +(17) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] +Right output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [ss_store_sk#6], [s_store_sk#12], Inner, BuildRight + +(18) CometProject +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11, s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14], [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] + +(19) CometHashAggregate +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] + +(20) CometExchange +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#15] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#15] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] + +(22) CometExchange +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(23) CometSort +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17] +Arguments: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17], [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, s_company_name#14 ASC NULLS FIRST] + +(24) ColumnarToRow [codegen id : 1] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17] + +(25) Window +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17] +Arguments: [avg(_w0#17) windowspecdefinition(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#18], [i_category#4, i_brand#2, s_store_name#13, s_company_name#14] + +(26) Filter [codegen id : 2] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17, avg_monthly_sales#18] +Condition : CASE WHEN NOT (avg_monthly_sales#18 = 0.000000) THEN ((abs((sum_sales#16 - avg_monthly_sales#18)) / avg_monthly_sales#18) > 0.1000000000000000) END + +(27) Project [codegen id : 2] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, avg_monthly_sales#18] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17, avg_monthly_sales#18] + +(28) TakeOrderedAndProject +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, avg_monthly_sales#18] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#18) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, avg_monthly_sales#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_datafusion/simplified.txt new file mode 100644 index 0000000000..69ace610ec --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_datafusion/simplified.txt @@ -0,0 +1,32 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_class,i_brand,s_company_name,d_moy] + WholeStageCodegen (2) + Project [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,_w0] + CometExchange [i_category,i_brand,s_store_name,s_company_name] #1 + CometHashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,_w0,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy] #2 + CometHashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum,ss_sales_price] + CometProject [i_brand,i_class,i_category,ss_sales_price,d_moy,s_store_name,s_company_name] + CometBroadcastHashJoin [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,d_moy,s_store_sk,s_store_name,s_company_name] + CometProject [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,d_moy] + CometBroadcastHashJoin [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_moy] + CometProject [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_class,i_category,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_moy] #4 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_name] #5 + CometFilter [s_store_sk,s_store_name,s_company_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ebc6f6214a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_iceberg_compat/explain.txt @@ -0,0 +1,160 @@ +== Physical Plan == +TakeOrderedAndProject (28) ++- * Project (27) + +- * Filter (26) + +- Window (25) + +- * ColumnarToRow (24) + +- CometSort (23) + +- CometExchange (22) + +- CometHashAggregate (21) + +- CometExchange (20) + +- CometHashAggregate (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.item (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.store_sales (3) + : +- CometBroadcastExchange (11) + : +- CometProject (10) + : +- CometFilter (9) + : +- CometScan parquet spark_catalog.default.date_dim (8) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometScan parquet spark_catalog.default.store (14) + + +(1) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(In(i_category, [Books ,Electronics ,Sports ]),In(i_class, [computers ,football ,stereo ])),And(In(i_category, [Jewelry ,Men ,Women ]),In(i_class, [birdal ,dresses ,shirts ]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Condition : (((i_category#4 IN (Books ,Electronics ,Sports ) AND i_class#3 IN (computers ,stereo ,football )) OR (i_category#4 IN (Men ,Jewelry ,Women ) AND i_class#3 IN (shirts ,birdal ,dresses ))) AND isnotnull(i_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Condition : (isnotnull(ss_item_sk#5) AND isnotnull(ss_store_sk#6)) + +(5) CometBroadcastExchange +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(6) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Right output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [i_item_sk#1], [ss_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8], [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((isnotnull(d_year#10) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(10) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9, d_moy#11], [d_date_sk#9, d_moy#11] + +(11) CometBroadcastExchange +Input [2]: [d_date_sk#9, d_moy#11] +Arguments: [d_date_sk#9, d_moy#11] + +(12) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Right output [2]: [d_date_sk#9, d_moy#11] +Arguments: [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(13) CometProject +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#9, d_moy#11] +Arguments: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11], [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] + +(14) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(15) CometFilter +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Condition : isnotnull(s_store_sk#12) + +(16) CometBroadcastExchange +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [s_store_sk#12, s_store_name#13, s_company_name#14] + +(17) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] +Right output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [ss_store_sk#6], [s_store_sk#12], Inner, BuildRight + +(18) CometProject +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11, s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14], [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] + +(19) CometHashAggregate +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] + +(20) CometExchange +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#15] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#15] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] + +(22) CometExchange +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(23) CometSort +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17] +Arguments: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17], [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, s_company_name#14 ASC NULLS FIRST] + +(24) ColumnarToRow [codegen id : 1] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17] + +(25) Window +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17] +Arguments: [avg(_w0#17) windowspecdefinition(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#18], [i_category#4, i_brand#2, s_store_name#13, s_company_name#14] + +(26) Filter [codegen id : 2] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17, avg_monthly_sales#18] +Condition : CASE WHEN NOT (avg_monthly_sales#18 = 0.000000) THEN ((abs((sum_sales#16 - avg_monthly_sales#18)) / avg_monthly_sales#18) > 0.1000000000000000) END + +(27) Project [codegen id : 2] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, avg_monthly_sales#18] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, _w0#17, avg_monthly_sales#18] + +(28) TakeOrderedAndProject +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, avg_monthly_sales#18] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#18) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#16, avg_monthly_sales#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..7905678064 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q89.native_iceberg_compat/simplified.txt @@ -0,0 +1,32 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_class,i_brand,s_company_name,d_moy] + WholeStageCodegen (2) + Project [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,_w0] + CometExchange [i_category,i_brand,s_store_name,s_company_name] #1 + CometHashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,_w0,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy] #2 + CometHashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum,ss_sales_price] + CometProject [i_brand,i_class,i_category,ss_sales_price,d_moy,s_store_name,s_company_name] + CometBroadcastHashJoin [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,d_moy,s_store_sk,s_store_name,s_company_name] + CometProject [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,d_moy] + CometBroadcastHashJoin [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_moy] + CometProject [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_class,i_category,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_moy] #4 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_name] #5 + CometFilter [s_store_sk,s_store_name,s_company_name] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_datafusion/explain.txt new file mode 100644 index 0000000000..41ec0fbbe3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_datafusion/explain.txt @@ -0,0 +1,265 @@ +== Physical Plan == +* ColumnarToRow (4) ++- CometProject (3) + +- CometFilter (2) + +- CometNativeScan: `spark_catalog`.`default`.`reason` (1) + + +(1) CometNativeScan: `spark_catalog`.`default`.`reason` +Output [1]: [r_reason_sk#1] +Arguments: [r_reason_sk#1] + +(2) CometFilter +Input [1]: [r_reason_sk#1] +Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) + +(3) CometProject +Input [1]: [r_reason_sk#1] +Arguments: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6], [CASE WHEN (Subquery scalar-subquery#7, [id=#8].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_net_paid) END AS bucket1#2, CASE WHEN (Subquery scalar-subquery#9, [id=#10].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#9, [id=#10].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#9, [id=#10].avg(ss_net_paid) END AS bucket2#3, CASE WHEN (Subquery scalar-subquery#11, [id=#12].count(1) > 365541424) THEN ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_net_paid) END AS bucket3#4, CASE WHEN (Subquery scalar-subquery#13, [id=#14].count(1) > 216357808) THEN ReusedSubquery Subquery scalar-subquery#13, [id=#14].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#13, [id=#14].avg(ss_net_paid) END AS bucket4#5, CASE WHEN (Subquery scalar-subquery#15, [id=#16].count(1) > 184483884) THEN ReusedSubquery Subquery scalar-subquery#15, [id=#16].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#15, [id=#16].avg(ss_net_paid) END AS bucket5#6] + +(4) ColumnarToRow [codegen id : 1] +Input [5]: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#7, [id=#8] +* ColumnarToRow (12) ++- CometProject (11) + +- CometHashAggregate (10) + +- CometExchange (9) + +- CometHashAggregate (8) + +- CometProject (7) + +- CometFilter (6) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (5) + + +(5) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Arguments: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] + +(6) CometFilter +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_quantity#17) AND (ss_quantity#17 >= 1)) AND (ss_quantity#17 <= 20)) + +(7) CometProject +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Arguments: [ss_ext_discount_amt#18, ss_net_paid#19], [ss_ext_discount_amt#18, ss_net_paid#19] + +(8) CometHashAggregate +Input [2]: [ss_ext_discount_amt#18, ss_net_paid#19] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#18)), partial_avg(UnscaledValue(ss_net_paid#19))] + +(9) CometExchange +Input [5]: [count#21, sum#22, count#23, sum#24, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometHashAggregate +Input [5]: [count#21, sum#22, count#23, sum#24, count#25] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#18)), avg(UnscaledValue(ss_net_paid#19))] + +(11) CometProject +Input [3]: [count(1)#26, avg(ss_ext_discount_amt)#27, avg(ss_net_paid)#28] +Arguments: [mergedValue#29], [named_struct(count(1), count(1)#26, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#27, avg(ss_net_paid), avg(ss_net_paid)#28) AS mergedValue#29] + +(12) ColumnarToRow [codegen id : 1] +Input [1]: [mergedValue#29] + +Subquery:2 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] + +Subquery:3 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] + +Subquery:4 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +* ColumnarToRow (20) ++- CometProject (19) + +- CometHashAggregate (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometProject (15) + +- CometFilter (14) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (13) + + +(13) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_quantity#30, ss_ext_discount_amt#31, ss_net_paid#32, ss_sold_date_sk#33] +Arguments: [ss_quantity#30, ss_ext_discount_amt#31, ss_net_paid#32, ss_sold_date_sk#33] + +(14) CometFilter +Input [4]: [ss_quantity#30, ss_ext_discount_amt#31, ss_net_paid#32, ss_sold_date_sk#33] +Condition : ((isnotnull(ss_quantity#30) AND (ss_quantity#30 >= 21)) AND (ss_quantity#30 <= 40)) + +(15) CometProject +Input [4]: [ss_quantity#30, ss_ext_discount_amt#31, ss_net_paid#32, ss_sold_date_sk#33] +Arguments: [ss_ext_discount_amt#31, ss_net_paid#32], [ss_ext_discount_amt#31, ss_net_paid#32] + +(16) CometHashAggregate +Input [2]: [ss_ext_discount_amt#31, ss_net_paid#32] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#31)), partial_avg(UnscaledValue(ss_net_paid#32))] + +(17) CometExchange +Input [5]: [count#34, sum#35, count#36, sum#37, count#38] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometHashAggregate +Input [5]: [count#34, sum#35, count#36, sum#37, count#38] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#31)), avg(UnscaledValue(ss_net_paid#32))] + +(19) CometProject +Input [3]: [count(1)#39, avg(ss_ext_discount_amt)#40, avg(ss_net_paid)#41] +Arguments: [mergedValue#42], [named_struct(count(1), count(1)#39, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#40, avg(ss_net_paid), avg(ss_net_paid)#41) AS mergedValue#42] + +(20) ColumnarToRow [codegen id : 1] +Input [1]: [mergedValue#42] + +Subquery:5 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] + +Subquery:6 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] + +Subquery:7 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +* ColumnarToRow (28) ++- CometProject (27) + +- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometProject (23) + +- CometFilter (22) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (21) + + +(21) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_quantity#43, ss_ext_discount_amt#44, ss_net_paid#45, ss_sold_date_sk#46] +Arguments: [ss_quantity#43, ss_ext_discount_amt#44, ss_net_paid#45, ss_sold_date_sk#46] + +(22) CometFilter +Input [4]: [ss_quantity#43, ss_ext_discount_amt#44, ss_net_paid#45, ss_sold_date_sk#46] +Condition : ((isnotnull(ss_quantity#43) AND (ss_quantity#43 >= 41)) AND (ss_quantity#43 <= 60)) + +(23) CometProject +Input [4]: [ss_quantity#43, ss_ext_discount_amt#44, ss_net_paid#45, ss_sold_date_sk#46] +Arguments: [ss_ext_discount_amt#44, ss_net_paid#45], [ss_ext_discount_amt#44, ss_net_paid#45] + +(24) CometHashAggregate +Input [2]: [ss_ext_discount_amt#44, ss_net_paid#45] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#44)), partial_avg(UnscaledValue(ss_net_paid#45))] + +(25) CometExchange +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(26) CometHashAggregate +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#44)), avg(UnscaledValue(ss_net_paid#45))] + +(27) CometProject +Input [3]: [count(1)#52, avg(ss_ext_discount_amt)#53, avg(ss_net_paid)#54] +Arguments: [mergedValue#55], [named_struct(count(1), count(1)#52, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#53, avg(ss_net_paid), avg(ss_net_paid)#54) AS mergedValue#55] + +(28) ColumnarToRow [codegen id : 1] +Input [1]: [mergedValue#55] + +Subquery:8 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] + +Subquery:9 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] + +Subquery:10 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +* ColumnarToRow (36) ++- CometProject (35) + +- CometHashAggregate (34) + +- CometExchange (33) + +- CometHashAggregate (32) + +- CometProject (31) + +- CometFilter (30) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (29) + + +(29) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_quantity#56, ss_ext_discount_amt#57, ss_net_paid#58, ss_sold_date_sk#59] +Arguments: [ss_quantity#56, ss_ext_discount_amt#57, ss_net_paid#58, ss_sold_date_sk#59] + +(30) CometFilter +Input [4]: [ss_quantity#56, ss_ext_discount_amt#57, ss_net_paid#58, ss_sold_date_sk#59] +Condition : ((isnotnull(ss_quantity#56) AND (ss_quantity#56 >= 61)) AND (ss_quantity#56 <= 80)) + +(31) CometProject +Input [4]: [ss_quantity#56, ss_ext_discount_amt#57, ss_net_paid#58, ss_sold_date_sk#59] +Arguments: [ss_ext_discount_amt#57, ss_net_paid#58], [ss_ext_discount_amt#57, ss_net_paid#58] + +(32) CometHashAggregate +Input [2]: [ss_ext_discount_amt#57, ss_net_paid#58] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#57)), partial_avg(UnscaledValue(ss_net_paid#58))] + +(33) CometExchange +Input [5]: [count#60, sum#61, count#62, sum#63, count#64] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(34) CometHashAggregate +Input [5]: [count#60, sum#61, count#62, sum#63, count#64] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#57)), avg(UnscaledValue(ss_net_paid#58))] + +(35) CometProject +Input [3]: [count(1)#65, avg(ss_ext_discount_amt)#66, avg(ss_net_paid)#67] +Arguments: [mergedValue#68], [named_struct(count(1), count(1)#65, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#66, avg(ss_net_paid), avg(ss_net_paid)#67) AS mergedValue#68] + +(36) ColumnarToRow [codegen id : 1] +Input [1]: [mergedValue#68] + +Subquery:11 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] + +Subquery:12 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] + +Subquery:13 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#15, [id=#16] +* ColumnarToRow (44) ++- CometProject (43) + +- CometHashAggregate (42) + +- CometExchange (41) + +- CometHashAggregate (40) + +- CometProject (39) + +- CometFilter (38) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (37) + + +(37) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_quantity#69, ss_ext_discount_amt#70, ss_net_paid#71, ss_sold_date_sk#72] +Arguments: [ss_quantity#69, ss_ext_discount_amt#70, ss_net_paid#71, ss_sold_date_sk#72] + +(38) CometFilter +Input [4]: [ss_quantity#69, ss_ext_discount_amt#70, ss_net_paid#71, ss_sold_date_sk#72] +Condition : ((isnotnull(ss_quantity#69) AND (ss_quantity#69 >= 81)) AND (ss_quantity#69 <= 100)) + +(39) CometProject +Input [4]: [ss_quantity#69, ss_ext_discount_amt#70, ss_net_paid#71, ss_sold_date_sk#72] +Arguments: [ss_ext_discount_amt#70, ss_net_paid#71], [ss_ext_discount_amt#70, ss_net_paid#71] + +(40) CometHashAggregate +Input [2]: [ss_ext_discount_amt#70, ss_net_paid#71] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#70)), partial_avg(UnscaledValue(ss_net_paid#71))] + +(41) CometExchange +Input [5]: [count#73, sum#74, count#75, sum#76, count#77] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(42) CometHashAggregate +Input [5]: [count#73, sum#74, count#75, sum#76, count#77] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#70)), avg(UnscaledValue(ss_net_paid#71))] + +(43) CometProject +Input [3]: [count(1)#78, avg(ss_ext_discount_amt)#79, avg(ss_net_paid)#80] +Arguments: [mergedValue#81], [named_struct(count(1), count(1)#78, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#79, avg(ss_net_paid), avg(ss_net_paid)#80) AS mergedValue#81] + +(44) ColumnarToRow [codegen id : 1] +Input [1]: [mergedValue#81] + +Subquery:14 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] + +Subquery:15 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_datafusion/simplified.txt new file mode 100644 index 0000000000..750b92722f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_datafusion/simplified.txt @@ -0,0 +1,71 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [bucket1,bucket2,bucket3,bucket4,bucket5] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #1 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #1 + ReusedSubquery [mergedValue] #1 + Subquery #2 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #2 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #2 + ReusedSubquery [mergedValue] #2 + Subquery #3 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #3 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #3 + ReusedSubquery [mergedValue] #3 + Subquery #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #4 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #4 + ReusedSubquery [mergedValue] #4 + Subquery #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #5 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #5 + ReusedSubquery [mergedValue] #5 + CometFilter [r_reason_sk] + CometNativeScan: `spark_catalog`.`default`.`reason` [r_reason_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..8261f96537 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_iceberg_compat/explain.txt @@ -0,0 +1,283 @@ +== Physical Plan == +* ColumnarToRow (4) ++- CometProject (3) + +- CometFilter (2) + +- CometScan parquet spark_catalog.default.reason (1) + + +(1) CometScan parquet spark_catalog.default.reason +Output [1]: [r_reason_sk#1] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)] +ReadSchema: struct + +(2) CometFilter +Input [1]: [r_reason_sk#1] +Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) + +(3) CometProject +Input [1]: [r_reason_sk#1] +Arguments: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6], [CASE WHEN (Subquery scalar-subquery#7, [id=#8].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_net_paid) END AS bucket1#2, CASE WHEN (Subquery scalar-subquery#9, [id=#10].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#9, [id=#10].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#9, [id=#10].avg(ss_net_paid) END AS bucket2#3, CASE WHEN (Subquery scalar-subquery#11, [id=#12].count(1) > 365541424) THEN ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_net_paid) END AS bucket3#4, CASE WHEN (Subquery scalar-subquery#13, [id=#14].count(1) > 216357808) THEN ReusedSubquery Subquery scalar-subquery#13, [id=#14].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#13, [id=#14].avg(ss_net_paid) END AS bucket4#5, CASE WHEN (Subquery scalar-subquery#15, [id=#16].count(1) > 184483884) THEN ReusedSubquery Subquery scalar-subquery#15, [id=#16].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#15, [id=#16].avg(ss_net_paid) END AS bucket5#6] + +(4) ColumnarToRow [codegen id : 1] +Input [5]: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#7, [id=#8] +* ColumnarToRow (12) ++- CometProject (11) + +- CometHashAggregate (10) + +- CometExchange (9) + +- CometHashAggregate (8) + +- CometProject (7) + +- CometFilter (6) + +- CometScan parquet spark_catalog.default.store_sales (5) + + +(5) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(6) CometFilter +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_quantity#17) AND (ss_quantity#17 >= 1)) AND (ss_quantity#17 <= 20)) + +(7) CometProject +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Arguments: [ss_ext_discount_amt#18, ss_net_paid#19], [ss_ext_discount_amt#18, ss_net_paid#19] + +(8) CometHashAggregate +Input [2]: [ss_ext_discount_amt#18, ss_net_paid#19] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#18)), partial_avg(UnscaledValue(ss_net_paid#19))] + +(9) CometExchange +Input [5]: [count#21, sum#22, count#23, sum#24, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometHashAggregate +Input [5]: [count#21, sum#22, count#23, sum#24, count#25] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#18)), avg(UnscaledValue(ss_net_paid#19))] + +(11) CometProject +Input [3]: [count(1)#26, avg(ss_ext_discount_amt)#27, avg(ss_net_paid)#28] +Arguments: [mergedValue#29], [named_struct(count(1), count(1)#26, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#27, avg(ss_net_paid), avg(ss_net_paid)#28) AS mergedValue#29] + +(12) ColumnarToRow [codegen id : 1] +Input [1]: [mergedValue#29] + +Subquery:2 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] + +Subquery:3 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] + +Subquery:4 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +* ColumnarToRow (20) ++- CometProject (19) + +- CometHashAggregate (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometProject (15) + +- CometFilter (14) + +- CometScan parquet spark_catalog.default.store_sales (13) + + +(13) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#30, ss_ext_discount_amt#31, ss_net_paid#32, ss_sold_date_sk#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(14) CometFilter +Input [4]: [ss_quantity#30, ss_ext_discount_amt#31, ss_net_paid#32, ss_sold_date_sk#33] +Condition : ((isnotnull(ss_quantity#30) AND (ss_quantity#30 >= 21)) AND (ss_quantity#30 <= 40)) + +(15) CometProject +Input [4]: [ss_quantity#30, ss_ext_discount_amt#31, ss_net_paid#32, ss_sold_date_sk#33] +Arguments: [ss_ext_discount_amt#31, ss_net_paid#32], [ss_ext_discount_amt#31, ss_net_paid#32] + +(16) CometHashAggregate +Input [2]: [ss_ext_discount_amt#31, ss_net_paid#32] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#31)), partial_avg(UnscaledValue(ss_net_paid#32))] + +(17) CometExchange +Input [5]: [count#34, sum#35, count#36, sum#37, count#38] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometHashAggregate +Input [5]: [count#34, sum#35, count#36, sum#37, count#38] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#31)), avg(UnscaledValue(ss_net_paid#32))] + +(19) CometProject +Input [3]: [count(1)#39, avg(ss_ext_discount_amt)#40, avg(ss_net_paid)#41] +Arguments: [mergedValue#42], [named_struct(count(1), count(1)#39, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#40, avg(ss_net_paid), avg(ss_net_paid)#41) AS mergedValue#42] + +(20) ColumnarToRow [codegen id : 1] +Input [1]: [mergedValue#42] + +Subquery:5 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] + +Subquery:6 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] + +Subquery:7 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +* ColumnarToRow (28) ++- CometProject (27) + +- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometProject (23) + +- CometFilter (22) + +- CometScan parquet spark_catalog.default.store_sales (21) + + +(21) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#43, ss_ext_discount_amt#44, ss_net_paid#45, ss_sold_date_sk#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(22) CometFilter +Input [4]: [ss_quantity#43, ss_ext_discount_amt#44, ss_net_paid#45, ss_sold_date_sk#46] +Condition : ((isnotnull(ss_quantity#43) AND (ss_quantity#43 >= 41)) AND (ss_quantity#43 <= 60)) + +(23) CometProject +Input [4]: [ss_quantity#43, ss_ext_discount_amt#44, ss_net_paid#45, ss_sold_date_sk#46] +Arguments: [ss_ext_discount_amt#44, ss_net_paid#45], [ss_ext_discount_amt#44, ss_net_paid#45] + +(24) CometHashAggregate +Input [2]: [ss_ext_discount_amt#44, ss_net_paid#45] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#44)), partial_avg(UnscaledValue(ss_net_paid#45))] + +(25) CometExchange +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(26) CometHashAggregate +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#44)), avg(UnscaledValue(ss_net_paid#45))] + +(27) CometProject +Input [3]: [count(1)#52, avg(ss_ext_discount_amt)#53, avg(ss_net_paid)#54] +Arguments: [mergedValue#55], [named_struct(count(1), count(1)#52, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#53, avg(ss_net_paid), avg(ss_net_paid)#54) AS mergedValue#55] + +(28) ColumnarToRow [codegen id : 1] +Input [1]: [mergedValue#55] + +Subquery:8 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] + +Subquery:9 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] + +Subquery:10 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +* ColumnarToRow (36) ++- CometProject (35) + +- CometHashAggregate (34) + +- CometExchange (33) + +- CometHashAggregate (32) + +- CometProject (31) + +- CometFilter (30) + +- CometScan parquet spark_catalog.default.store_sales (29) + + +(29) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#56, ss_ext_discount_amt#57, ss_net_paid#58, ss_sold_date_sk#59] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(30) CometFilter +Input [4]: [ss_quantity#56, ss_ext_discount_amt#57, ss_net_paid#58, ss_sold_date_sk#59] +Condition : ((isnotnull(ss_quantity#56) AND (ss_quantity#56 >= 61)) AND (ss_quantity#56 <= 80)) + +(31) CometProject +Input [4]: [ss_quantity#56, ss_ext_discount_amt#57, ss_net_paid#58, ss_sold_date_sk#59] +Arguments: [ss_ext_discount_amt#57, ss_net_paid#58], [ss_ext_discount_amt#57, ss_net_paid#58] + +(32) CometHashAggregate +Input [2]: [ss_ext_discount_amt#57, ss_net_paid#58] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#57)), partial_avg(UnscaledValue(ss_net_paid#58))] + +(33) CometExchange +Input [5]: [count#60, sum#61, count#62, sum#63, count#64] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(34) CometHashAggregate +Input [5]: [count#60, sum#61, count#62, sum#63, count#64] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#57)), avg(UnscaledValue(ss_net_paid#58))] + +(35) CometProject +Input [3]: [count(1)#65, avg(ss_ext_discount_amt)#66, avg(ss_net_paid)#67] +Arguments: [mergedValue#68], [named_struct(count(1), count(1)#65, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#66, avg(ss_net_paid), avg(ss_net_paid)#67) AS mergedValue#68] + +(36) ColumnarToRow [codegen id : 1] +Input [1]: [mergedValue#68] + +Subquery:11 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] + +Subquery:12 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] + +Subquery:13 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#15, [id=#16] +* ColumnarToRow (44) ++- CometProject (43) + +- CometHashAggregate (42) + +- CometExchange (41) + +- CometHashAggregate (40) + +- CometProject (39) + +- CometFilter (38) + +- CometScan parquet spark_catalog.default.store_sales (37) + + +(37) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#69, ss_ext_discount_amt#70, ss_net_paid#71, ss_sold_date_sk#72] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(38) CometFilter +Input [4]: [ss_quantity#69, ss_ext_discount_amt#70, ss_net_paid#71, ss_sold_date_sk#72] +Condition : ((isnotnull(ss_quantity#69) AND (ss_quantity#69 >= 81)) AND (ss_quantity#69 <= 100)) + +(39) CometProject +Input [4]: [ss_quantity#69, ss_ext_discount_amt#70, ss_net_paid#71, ss_sold_date_sk#72] +Arguments: [ss_ext_discount_amt#70, ss_net_paid#71], [ss_ext_discount_amt#70, ss_net_paid#71] + +(40) CometHashAggregate +Input [2]: [ss_ext_discount_amt#70, ss_net_paid#71] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#70)), partial_avg(UnscaledValue(ss_net_paid#71))] + +(41) CometExchange +Input [5]: [count#73, sum#74, count#75, sum#76, count#77] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(42) CometHashAggregate +Input [5]: [count#73, sum#74, count#75, sum#76, count#77] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#70)), avg(UnscaledValue(ss_net_paid#71))] + +(43) CometProject +Input [3]: [count(1)#78, avg(ss_ext_discount_amt)#79, avg(ss_net_paid)#80] +Arguments: [mergedValue#81], [named_struct(count(1), count(1)#78, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#79, avg(ss_net_paid), avg(ss_net_paid)#80) AS mergedValue#81] + +(44) ColumnarToRow [codegen id : 1] +Input [1]: [mergedValue#81] + +Subquery:14 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] + +Subquery:15 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..cfeb9ebe74 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9.native_iceberg_compat/simplified.txt @@ -0,0 +1,71 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [bucket1,bucket2,bucket3,bucket4,bucket5] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #1 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #1 + ReusedSubquery [mergedValue] #1 + Subquery #2 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #2 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #2 + ReusedSubquery [mergedValue] #2 + Subquery #3 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #3 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #3 + ReusedSubquery [mergedValue] #3 + Subquery #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #4 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #4 + ReusedSubquery [mergedValue] #4 + Subquery #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #5 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #5 + ReusedSubquery [mergedValue] #5 + CometFilter [r_reason_sk] + CometScan parquet spark_catalog.default.reason [r_reason_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_datafusion/explain.txt new file mode 100644 index 0000000000..6390d2d5eb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_datafusion/explain.txt @@ -0,0 +1,242 @@ +== Physical Plan == +* Project (47) ++- * BroadcastNestedLoopJoin Inner BuildRight (46) + :- * ColumnarToRow (25) + : +- CometHashAggregate (24) + : +- CometExchange (23) + : +- CometHashAggregate (22) + : +- CometProject (21) + : +- CometBroadcastHashJoin (20) + : :- CometProject (15) + : : +- CometBroadcastHashJoin (14) + : : :- CometProject (9) + : : : +- CometBroadcastHashJoin (8) + : : : :- CometProject (3) + : : : : +- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : +- CometBroadcastExchange (7) + : : : +- CometProject (6) + : : : +- CometFilter (5) + : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (4) + : : +- CometBroadcastExchange (13) + : : +- CometProject (12) + : : +- CometFilter (11) + : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (10) + : +- CometBroadcastExchange (19) + : +- CometProject (18) + : +- CometFilter (17) + : +- CometNativeScan: `spark_catalog`.`default`.`web_page` (16) + +- BroadcastExchange (45) + +- * ColumnarToRow (44) + +- CometHashAggregate (43) + +- CometExchange (42) + +- CometHashAggregate (41) + +- CometProject (40) + +- CometBroadcastHashJoin (39) + :- CometProject (37) + : +- CometBroadcastHashJoin (36) + : :- CometProject (31) + : : +- CometBroadcastHashJoin (30) + : : :- CometProject (28) + : : : +- CometFilter (27) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (26) + : : +- ReusedExchange (29) + : +- CometBroadcastExchange (35) + : +- CometProject (34) + : +- CometFilter (33) + : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (32) + +- ReusedExchange (38) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Arguments: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(3) CometProject +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Arguments: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3], [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] + +(4) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#5, hd_dep_count#6] +Arguments: [hd_demo_sk#5, hd_dep_count#6] + +(5) CometFilter +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Condition : ((isnotnull(hd_dep_count#6) AND (hd_dep_count#6 = 6)) AND isnotnull(hd_demo_sk#5)) + +(6) CometProject +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Arguments: [hd_demo_sk#5], [hd_demo_sk#5] + +(7) CometBroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: [hd_demo_sk#5] + +(8) CometBroadcastHashJoin +Left output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Right output [1]: [hd_demo_sk#5] +Arguments: [ws_ship_hdemo_sk#2], [hd_demo_sk#5], Inner, BuildRight + +(9) CometProject +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#5] +Arguments: [ws_sold_time_sk#1, ws_web_page_sk#3], [ws_sold_time_sk#1, ws_web_page_sk#3] + +(10) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [2]: [t_time_sk#7, t_hour#8] +Arguments: [t_time_sk#7, t_hour#8] + +(11) CometFilter +Input [2]: [t_time_sk#7, t_hour#8] +Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 8)) AND (t_hour#8 <= 9)) AND isnotnull(t_time_sk#7)) + +(12) CometProject +Input [2]: [t_time_sk#7, t_hour#8] +Arguments: [t_time_sk#7], [t_time_sk#7] + +(13) CometBroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: [t_time_sk#7] + +(14) CometBroadcastHashJoin +Left output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] +Right output [1]: [t_time_sk#7] +Arguments: [ws_sold_time_sk#1], [t_time_sk#7], Inner, BuildRight + +(15) CometProject +Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] +Arguments: [ws_web_page_sk#3], [ws_web_page_sk#3] + +(16) CometNativeScan: `spark_catalog`.`default`.`web_page` +Output [2]: [wp_web_page_sk#9, wp_char_count#10] +Arguments: [wp_web_page_sk#9, wp_char_count#10] + +(17) CometFilter +Input [2]: [wp_web_page_sk#9, wp_char_count#10] +Condition : (((isnotnull(wp_char_count#10) AND (wp_char_count#10 >= 5000)) AND (wp_char_count#10 <= 5200)) AND isnotnull(wp_web_page_sk#9)) + +(18) CometProject +Input [2]: [wp_web_page_sk#9, wp_char_count#10] +Arguments: [wp_web_page_sk#9], [wp_web_page_sk#9] + +(19) CometBroadcastExchange +Input [1]: [wp_web_page_sk#9] +Arguments: [wp_web_page_sk#9] + +(20) CometBroadcastHashJoin +Left output [1]: [ws_web_page_sk#3] +Right output [1]: [wp_web_page_sk#9] +Arguments: [ws_web_page_sk#3], [wp_web_page_sk#9], Inner, BuildRight + +(21) CometProject +Input [2]: [ws_web_page_sk#3, wp_web_page_sk#9] + +(22) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(23) CometExchange +Input [1]: [count#11] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [1]: [count#11] +Keys: [] +Functions [1]: [count(1)] + +(25) ColumnarToRow [codegen id : 2] +Input [1]: [amc#12] + +(26) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] +Arguments: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] + +(27) CometFilter +Input [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] +Condition : ((isnotnull(ws_ship_hdemo_sk#14) AND isnotnull(ws_sold_time_sk#13)) AND isnotnull(ws_web_page_sk#15)) + +(28) CometProject +Input [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] +Arguments: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15], [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15] + +(29) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#17] + +(30) CometBroadcastHashJoin +Left output [3]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15] +Right output [1]: [hd_demo_sk#17] +Arguments: [ws_ship_hdemo_sk#14], [hd_demo_sk#17], Inner, BuildRight + +(31) CometProject +Input [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, hd_demo_sk#17] +Arguments: [ws_sold_time_sk#13, ws_web_page_sk#15], [ws_sold_time_sk#13, ws_web_page_sk#15] + +(32) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [2]: [t_time_sk#18, t_hour#19] +Arguments: [t_time_sk#18, t_hour#19] + +(33) CometFilter +Input [2]: [t_time_sk#18, t_hour#19] +Condition : (((isnotnull(t_hour#19) AND (t_hour#19 >= 19)) AND (t_hour#19 <= 20)) AND isnotnull(t_time_sk#18)) + +(34) CometProject +Input [2]: [t_time_sk#18, t_hour#19] +Arguments: [t_time_sk#18], [t_time_sk#18] + +(35) CometBroadcastExchange +Input [1]: [t_time_sk#18] +Arguments: [t_time_sk#18] + +(36) CometBroadcastHashJoin +Left output [2]: [ws_sold_time_sk#13, ws_web_page_sk#15] +Right output [1]: [t_time_sk#18] +Arguments: [ws_sold_time_sk#13], [t_time_sk#18], Inner, BuildRight + +(37) CometProject +Input [3]: [ws_sold_time_sk#13, ws_web_page_sk#15, t_time_sk#18] +Arguments: [ws_web_page_sk#15], [ws_web_page_sk#15] + +(38) ReusedExchange [Reuses operator id: 19] +Output [1]: [wp_web_page_sk#20] + +(39) CometBroadcastHashJoin +Left output [1]: [ws_web_page_sk#15] +Right output [1]: [wp_web_page_sk#20] +Arguments: [ws_web_page_sk#15], [wp_web_page_sk#20], Inner, BuildRight + +(40) CometProject +Input [2]: [ws_web_page_sk#15, wp_web_page_sk#20] + +(41) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(42) CometExchange +Input [1]: [count#21] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(43) CometHashAggregate +Input [1]: [count#21] +Keys: [] +Functions [1]: [count(1)] + +(44) ColumnarToRow [codegen id : 1] +Input [1]: [pmc#22] + +(45) BroadcastExchange +Input [1]: [pmc#22] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(46) BroadcastNestedLoopJoin [codegen id : 2] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 2] +Output [1]: [(cast(amc#12 as decimal(15,4)) / cast(pmc#22 as decimal(15,4))) AS am_pm_ratio#23] +Input [2]: [amc#12, pmc#22] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a746bdf6f4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_datafusion/simplified.txt @@ -0,0 +1,52 @@ +WholeStageCodegen (2) + Project [amc,pmc] + BroadcastNestedLoopJoin + ColumnarToRow + InputAdapter + CometHashAggregate [amc,count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + CometProject [ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_web_page_sk,t_time_sk] + CometProject [ws_sold_time_sk,ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,hd_demo_sk] + CometProject [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + CometFilter [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + CometBroadcastExchange [hd_demo_sk] #2 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count] + CometBroadcastExchange [t_time_sk] #3 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour] + CometBroadcastExchange [wp_web_page_sk] #4 + CometProject [wp_web_page_sk] + CometFilter [wp_web_page_sk,wp_char_count] + CometNativeScan: `spark_catalog`.`default`.`web_page` [wp_web_page_sk,wp_char_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [pmc,count,count(1)] + CometExchange #6 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + CometProject [ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_web_page_sk,t_time_sk] + CometProject [ws_sold_time_sk,ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,hd_demo_sk] + CometProject [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + CometFilter [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #7 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour] + ReusedExchange [wp_web_page_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..a6ec6f4b96 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_iceberg_compat/explain.txt @@ -0,0 +1,260 @@ +== Physical Plan == +* Project (47) ++- * BroadcastNestedLoopJoin Inner BuildRight (46) + :- * ColumnarToRow (25) + : +- CometHashAggregate (24) + : +- CometExchange (23) + : +- CometHashAggregate (22) + : +- CometProject (21) + : +- CometBroadcastHashJoin (20) + : :- CometProject (15) + : : +- CometBroadcastHashJoin (14) + : : :- CometProject (9) + : : : +- CometBroadcastHashJoin (8) + : : : :- CometProject (3) + : : : : +- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : : +- CometBroadcastExchange (7) + : : : +- CometProject (6) + : : : +- CometFilter (5) + : : : +- CometScan parquet spark_catalog.default.household_demographics (4) + : : +- CometBroadcastExchange (13) + : : +- CometProject (12) + : : +- CometFilter (11) + : : +- CometScan parquet spark_catalog.default.time_dim (10) + : +- CometBroadcastExchange (19) + : +- CometProject (18) + : +- CometFilter (17) + : +- CometScan parquet spark_catalog.default.web_page (16) + +- BroadcastExchange (45) + +- * ColumnarToRow (44) + +- CometHashAggregate (43) + +- CometExchange (42) + +- CometHashAggregate (41) + +- CometProject (40) + +- CometBroadcastHashJoin (39) + :- CometProject (37) + : +- CometBroadcastHashJoin (36) + : :- CometProject (31) + : : +- CometBroadcastHashJoin (30) + : : :- CometProject (28) + : : : +- CometFilter (27) + : : : +- CometScan parquet spark_catalog.default.web_sales (26) + : : +- ReusedExchange (29) + : +- CometBroadcastExchange (35) + : +- CometProject (34) + : +- CometFilter (33) + : +- CometScan parquet spark_catalog.default.time_dim (32) + +- ReusedExchange (38) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(3) CometProject +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Arguments: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3], [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] + +(4) CometScan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#5, hd_dep_count#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(5) CometFilter +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Condition : ((isnotnull(hd_dep_count#6) AND (hd_dep_count#6 = 6)) AND isnotnull(hd_demo_sk#5)) + +(6) CometProject +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Arguments: [hd_demo_sk#5], [hd_demo_sk#5] + +(7) CometBroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: [hd_demo_sk#5] + +(8) CometBroadcastHashJoin +Left output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Right output [1]: [hd_demo_sk#5] +Arguments: [ws_ship_hdemo_sk#2], [hd_demo_sk#5], Inner, BuildRight + +(9) CometProject +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#5] +Arguments: [ws_sold_time_sk#1, ws_web_page_sk#3], [ws_sold_time_sk#1, ws_web_page_sk#3] + +(10) CometScan parquet spark_catalog.default.time_dim +Output [2]: [t_time_sk#7, t_hour#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)] +ReadSchema: struct + +(11) CometFilter +Input [2]: [t_time_sk#7, t_hour#8] +Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 8)) AND (t_hour#8 <= 9)) AND isnotnull(t_time_sk#7)) + +(12) CometProject +Input [2]: [t_time_sk#7, t_hour#8] +Arguments: [t_time_sk#7], [t_time_sk#7] + +(13) CometBroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: [t_time_sk#7] + +(14) CometBroadcastHashJoin +Left output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] +Right output [1]: [t_time_sk#7] +Arguments: [ws_sold_time_sk#1], [t_time_sk#7], Inner, BuildRight + +(15) CometProject +Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] +Arguments: [ws_web_page_sk#3], [ws_web_page_sk#3] + +(16) CometScan parquet spark_catalog.default.web_page +Output [2]: [wp_web_page_sk#9, wp_char_count#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,5200), IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(17) CometFilter +Input [2]: [wp_web_page_sk#9, wp_char_count#10] +Condition : (((isnotnull(wp_char_count#10) AND (wp_char_count#10 >= 5000)) AND (wp_char_count#10 <= 5200)) AND isnotnull(wp_web_page_sk#9)) + +(18) CometProject +Input [2]: [wp_web_page_sk#9, wp_char_count#10] +Arguments: [wp_web_page_sk#9], [wp_web_page_sk#9] + +(19) CometBroadcastExchange +Input [1]: [wp_web_page_sk#9] +Arguments: [wp_web_page_sk#9] + +(20) CometBroadcastHashJoin +Left output [1]: [ws_web_page_sk#3] +Right output [1]: [wp_web_page_sk#9] +Arguments: [ws_web_page_sk#3], [wp_web_page_sk#9], Inner, BuildRight + +(21) CometProject +Input [2]: [ws_web_page_sk#3, wp_web_page_sk#9] + +(22) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(23) CometExchange +Input [1]: [count#11] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [1]: [count#11] +Keys: [] +Functions [1]: [count(1)] + +(25) ColumnarToRow [codegen id : 2] +Input [1]: [amc#12] + +(26) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(27) CometFilter +Input [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] +Condition : ((isnotnull(ws_ship_hdemo_sk#14) AND isnotnull(ws_sold_time_sk#13)) AND isnotnull(ws_web_page_sk#15)) + +(28) CometProject +Input [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] +Arguments: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15], [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15] + +(29) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#17] + +(30) CometBroadcastHashJoin +Left output [3]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15] +Right output [1]: [hd_demo_sk#17] +Arguments: [ws_ship_hdemo_sk#14], [hd_demo_sk#17], Inner, BuildRight + +(31) CometProject +Input [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, hd_demo_sk#17] +Arguments: [ws_sold_time_sk#13, ws_web_page_sk#15], [ws_sold_time_sk#13, ws_web_page_sk#15] + +(32) CometScan parquet spark_catalog.default.time_dim +Output [2]: [t_time_sk#18, t_hour#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)] +ReadSchema: struct + +(33) CometFilter +Input [2]: [t_time_sk#18, t_hour#19] +Condition : (((isnotnull(t_hour#19) AND (t_hour#19 >= 19)) AND (t_hour#19 <= 20)) AND isnotnull(t_time_sk#18)) + +(34) CometProject +Input [2]: [t_time_sk#18, t_hour#19] +Arguments: [t_time_sk#18], [t_time_sk#18] + +(35) CometBroadcastExchange +Input [1]: [t_time_sk#18] +Arguments: [t_time_sk#18] + +(36) CometBroadcastHashJoin +Left output [2]: [ws_sold_time_sk#13, ws_web_page_sk#15] +Right output [1]: [t_time_sk#18] +Arguments: [ws_sold_time_sk#13], [t_time_sk#18], Inner, BuildRight + +(37) CometProject +Input [3]: [ws_sold_time_sk#13, ws_web_page_sk#15, t_time_sk#18] +Arguments: [ws_web_page_sk#15], [ws_web_page_sk#15] + +(38) ReusedExchange [Reuses operator id: 19] +Output [1]: [wp_web_page_sk#20] + +(39) CometBroadcastHashJoin +Left output [1]: [ws_web_page_sk#15] +Right output [1]: [wp_web_page_sk#20] +Arguments: [ws_web_page_sk#15], [wp_web_page_sk#20], Inner, BuildRight + +(40) CometProject +Input [2]: [ws_web_page_sk#15, wp_web_page_sk#20] + +(41) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(42) CometExchange +Input [1]: [count#21] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(43) CometHashAggregate +Input [1]: [count#21] +Keys: [] +Functions [1]: [count(1)] + +(44) ColumnarToRow [codegen id : 1] +Input [1]: [pmc#22] + +(45) BroadcastExchange +Input [1]: [pmc#22] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(46) BroadcastNestedLoopJoin [codegen id : 2] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 2] +Output [1]: [(cast(amc#12 as decimal(15,4)) / cast(pmc#22 as decimal(15,4))) AS am_pm_ratio#23] +Input [2]: [amc#12, pmc#22] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..95fd73d86e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q90.native_iceberg_compat/simplified.txt @@ -0,0 +1,52 @@ +WholeStageCodegen (2) + Project [amc,pmc] + BroadcastNestedLoopJoin + ColumnarToRow + InputAdapter + CometHashAggregate [amc,count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + CometProject [ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_web_page_sk,t_time_sk] + CometProject [ws_sold_time_sk,ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,hd_demo_sk] + CometProject [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + CometFilter [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + CometBroadcastExchange [hd_demo_sk] #2 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count] + CometBroadcastExchange [t_time_sk] #3 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour] + CometBroadcastExchange [wp_web_page_sk] #4 + CometProject [wp_web_page_sk] + CometFilter [wp_web_page_sk,wp_char_count] + CometScan parquet spark_catalog.default.web_page [wp_web_page_sk,wp_char_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [pmc,count,count(1)] + CometExchange #6 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + CometProject [ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_web_page_sk,t_time_sk] + CometProject [ws_sold_time_sk,ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,hd_demo_sk] + CometProject [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + CometFilter [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #7 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour] + ReusedExchange [wp_web_page_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_datafusion/explain.txt new file mode 100644 index 0000000000..0277b481b8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_datafusion/explain.txt @@ -0,0 +1,215 @@ +== Physical Plan == +* ColumnarToRow (41) ++- CometSort (40) + +- CometColumnarExchange (39) + +- CometHashAggregate (38) + +- CometExchange (37) + +- CometHashAggregate (36) + +- CometProject (35) + +- CometBroadcastHashJoin (34) + :- CometProject (29) + : +- CometBroadcastHashJoin (28) + : :- CometProject (24) + : : +- CometBroadcastHashJoin (23) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (13) + : : : : +- CometBroadcastHashJoin (12) + : : : : :- CometProject (7) + : : : : : +- CometBroadcastHashJoin (6) + : : : : : :- CometFilter (2) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`call_center` (1) + : : : : : +- CometBroadcastExchange (5) + : : : : : +- CometFilter (4) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (3) + : : : : +- CometBroadcastExchange (11) + : : : : +- CometProject (10) + : : : : +- CometFilter (9) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : : +- CometBroadcastExchange (16) + : : : +- CometFilter (15) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (14) + : : +- CometBroadcastExchange (22) + : : +- CometProject (21) + : : +- CometFilter (20) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (19) + : +- CometBroadcastExchange (27) + : +- CometFilter (26) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (25) + +- CometBroadcastExchange (33) + +- CometProject (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`call_center` +Output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Arguments: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] + +(2) CometFilter +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Condition : isnotnull(cc_call_center_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] + +(4) CometFilter +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Condition : (isnotnull(cr_call_center_sk#6) AND isnotnull(cr_returning_customer_sk#5)) + +(5) CometBroadcastExchange +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] + +(6) CometBroadcastHashJoin +Left output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Right output [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cc_call_center_sk#1], [cr_call_center_sk#6], Inner, BuildRight + +(7) CometProject +Input [8]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9, d_year#10, d_moy#11] + +(9) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#11)) AND (d_year#10 = 1998)) AND (d_moy#11 = 11)) AND isnotnull(d_date_sk#9)) + +(10) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(12) CometBroadcastHashJoin +Left output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [cr_returned_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(13) CometProject +Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8, d_date_sk#9] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7] + +(14) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(15) CometFilter +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Condition : (((isnotnull(c_customer_sk#12) AND isnotnull(c_current_addr_sk#15)) AND isnotnull(c_current_cdemo_sk#13)) AND isnotnull(c_current_hdemo_sk#14)) + +(16) CometBroadcastExchange +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(17) CometBroadcastHashJoin +Left output [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7] +Right output [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [cr_returning_customer_sk#5], [c_customer_sk#12], Inner, BuildRight + +(18) CometProject +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(19) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#16, ca_gmt_offset#17] +Arguments: [ca_address_sk#16, ca_gmt_offset#17] + +(20) CometFilter +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] +Condition : ((isnotnull(ca_gmt_offset#17) AND (ca_gmt_offset#17 = -7.00)) AND isnotnull(ca_address_sk#16)) + +(21) CometProject +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] +Arguments: [ca_address_sk#16], [ca_address_sk#16] + +(22) CometBroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: [ca_address_sk#16] + +(23) CometBroadcastHashJoin +Left output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Right output [1]: [ca_address_sk#16] +Arguments: [c_current_addr_sk#15], [ca_address_sk#16], Inner, BuildRight + +(24) CometProject +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15, ca_address_sk#16] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14] + +(25) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(26) CometFilter +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Condition : ((((cd_marital_status#19 = M) AND (cd_education_status#20 = Unknown )) OR ((cd_marital_status#19 = W) AND (cd_education_status#20 = Advanced Degree ))) AND isnotnull(cd_demo_sk#18)) + +(27) CometBroadcastExchange +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(28) CometBroadcastHashJoin +Left output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14] +Right output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [c_current_cdemo_sk#13], [cd_demo_sk#18], Inner, BuildRight + +(29) CometProject +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20] + +(30) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#21, hd_buy_potential#22] +Arguments: [hd_demo_sk#21, hd_buy_potential#22] + +(31) CometFilter +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] +Condition : ((isnotnull(hd_buy_potential#22) AND StartsWith(hd_buy_potential#22, Unknown)) AND isnotnull(hd_demo_sk#21)) + +(32) CometProject +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] +Arguments: [hd_demo_sk#21], [hd_demo_sk#21] + +(33) CometBroadcastExchange +Input [1]: [hd_demo_sk#21] +Arguments: [hd_demo_sk#21] + +(34) CometBroadcastHashJoin +Left output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20] +Right output [1]: [hd_demo_sk#21] +Arguments: [c_current_hdemo_sk#14], [hd_demo_sk#21], Inner, BuildRight + +(35) CometProject +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20, hd_demo_sk#21] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] + +(36) CometHashAggregate +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#7))] + +(37) CometExchange +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#23] +Arguments: hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(38) CometHashAggregate +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#23] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20] +Functions [1]: [sum(UnscaledValue(cr_net_loss#7))] + +(39) CometColumnarExchange +Input [4]: [Call_Center#24, Call_Center_Name#25, Manager#26, Returns_Loss#27] +Arguments: rangepartitioning(Returns_Loss#27 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(40) CometSort +Input [4]: [Call_Center#24, Call_Center_Name#25, Manager#26, Returns_Loss#27] +Arguments: [Call_Center#24, Call_Center_Name#25, Manager#26, Returns_Loss#27], [Returns_Loss#27 DESC NULLS LAST] + +(41) ColumnarToRow [codegen id : 1] +Input [4]: [Call_Center#24, Call_Center_Name#25, Manager#26, Returns_Loss#27] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_datafusion/simplified.txt new file mode 100644 index 0000000000..1cf9256a1a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_datafusion/simplified.txt @@ -0,0 +1,43 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [Call_Center,Call_Center_Name,Manager,Returns_Loss] + CometColumnarExchange [Returns_Loss] #1 + CometHashAggregate [Call_Center,Call_Center_Name,Manager,Returns_Loss,cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,sum,sum(UnscaledValue(cr_net_loss))] + CometExchange [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status] #2 + CometHashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,sum,cr_net_loss] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_hdemo_sk,cd_marital_status,cd_education_status,hd_demo_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_hdemo_sk,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,ca_address_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,cr_returned_date_sk,d_date_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,cr_returned_date_sk] + CometBroadcastHashJoin [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + CometFilter [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + CometNativeScan: `spark_catalog`.`default`.`call_center` [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + CometBroadcastExchange [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] #3 + CometFilter [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] #5 + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #7 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [hd_demo_sk] #8 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_buy_potential] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..2451027478 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_iceberg_compat/explain.txt @@ -0,0 +1,237 @@ +== Physical Plan == +* ColumnarToRow (41) ++- CometSort (40) + +- CometColumnarExchange (39) + +- CometHashAggregate (38) + +- CometExchange (37) + +- CometHashAggregate (36) + +- CometProject (35) + +- CometBroadcastHashJoin (34) + :- CometProject (29) + : +- CometBroadcastHashJoin (28) + : :- CometProject (24) + : : +- CometBroadcastHashJoin (23) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (13) + : : : : +- CometBroadcastHashJoin (12) + : : : : :- CometProject (7) + : : : : : +- CometBroadcastHashJoin (6) + : : : : : :- CometFilter (2) + : : : : : : +- CometScan parquet spark_catalog.default.call_center (1) + : : : : : +- CometBroadcastExchange (5) + : : : : : +- CometFilter (4) + : : : : : +- CometScan parquet spark_catalog.default.catalog_returns (3) + : : : : +- CometBroadcastExchange (11) + : : : : +- CometProject (10) + : : : : +- CometFilter (9) + : : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : : +- CometBroadcastExchange (16) + : : : +- CometFilter (15) + : : : +- CometScan parquet spark_catalog.default.customer (14) + : : +- CometBroadcastExchange (22) + : : +- CometProject (21) + : : +- CometFilter (20) + : : +- CometScan parquet spark_catalog.default.customer_address (19) + : +- CometBroadcastExchange (27) + : +- CometFilter (26) + : +- CometScan parquet spark_catalog.default.customer_demographics (25) + +- CometBroadcastExchange (33) + +- CometProject (32) + +- CometFilter (31) + +- CometScan parquet spark_catalog.default.household_demographics (30) + + +(1) CometScan parquet spark_catalog.default.call_center +Output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Condition : isnotnull(cc_call_center_sk#1) + +(3) CometScan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#8)] +PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Condition : (isnotnull(cr_call_center_sk#6) AND isnotnull(cr_returning_customer_sk#5)) + +(5) CometBroadcastExchange +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] + +(6) CometBroadcastHashJoin +Left output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Right output [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cc_call_center_sk#1], [cr_call_center_sk#6], Inner, BuildRight + +(7) CometProject +Input [8]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#11)) AND (d_year#10 = 1998)) AND (d_moy#11 = 11)) AND isnotnull(d_date_sk#9)) + +(10) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(12) CometBroadcastHashJoin +Left output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [cr_returned_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(13) CometProject +Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8, d_date_sk#9] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7] + +(14) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(15) CometFilter +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Condition : (((isnotnull(c_customer_sk#12) AND isnotnull(c_current_addr_sk#15)) AND isnotnull(c_current_cdemo_sk#13)) AND isnotnull(c_current_hdemo_sk#14)) + +(16) CometBroadcastExchange +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(17) CometBroadcastHashJoin +Left output [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7] +Right output [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [cr_returning_customer_sk#5], [c_customer_sk#12], Inner, BuildRight + +(18) CometProject +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(19) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#16, ca_gmt_offset#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(20) CometFilter +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] +Condition : ((isnotnull(ca_gmt_offset#17) AND (ca_gmt_offset#17 = -7.00)) AND isnotnull(ca_address_sk#16)) + +(21) CometProject +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] +Arguments: [ca_address_sk#16], [ca_address_sk#16] + +(22) CometBroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: [ca_address_sk#16] + +(23) CometBroadcastHashJoin +Left output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Right output [1]: [ca_address_sk#16] +Arguments: [c_current_addr_sk#15], [ca_address_sk#16], Inner, BuildRight + +(24) CometProject +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15, ca_address_sk#16] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14] + +(25) CometScan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown )),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree ))), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(26) CometFilter +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Condition : ((((cd_marital_status#19 = M) AND (cd_education_status#20 = Unknown )) OR ((cd_marital_status#19 = W) AND (cd_education_status#20 = Advanced Degree ))) AND isnotnull(cd_demo_sk#18)) + +(27) CometBroadcastExchange +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(28) CometBroadcastHashJoin +Left output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14] +Right output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [c_current_cdemo_sk#13], [cd_demo_sk#18], Inner, BuildRight + +(29) CometProject +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20] + +(30) CometScan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#21, hd_buy_potential#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(31) CometFilter +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] +Condition : ((isnotnull(hd_buy_potential#22) AND StartsWith(hd_buy_potential#22, Unknown)) AND isnotnull(hd_demo_sk#21)) + +(32) CometProject +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] +Arguments: [hd_demo_sk#21], [hd_demo_sk#21] + +(33) CometBroadcastExchange +Input [1]: [hd_demo_sk#21] +Arguments: [hd_demo_sk#21] + +(34) CometBroadcastHashJoin +Left output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20] +Right output [1]: [hd_demo_sk#21] +Arguments: [c_current_hdemo_sk#14], [hd_demo_sk#21], Inner, BuildRight + +(35) CometProject +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20, hd_demo_sk#21] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] + +(36) CometHashAggregate +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#7))] + +(37) CometExchange +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#23] +Arguments: hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(38) CometHashAggregate +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#23] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20] +Functions [1]: [sum(UnscaledValue(cr_net_loss#7))] + +(39) CometColumnarExchange +Input [4]: [Call_Center#24, Call_Center_Name#25, Manager#26, Returns_Loss#27] +Arguments: rangepartitioning(Returns_Loss#27 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(40) CometSort +Input [4]: [Call_Center#24, Call_Center_Name#25, Manager#26, Returns_Loss#27] +Arguments: [Call_Center#24, Call_Center_Name#25, Manager#26, Returns_Loss#27], [Returns_Loss#27 DESC NULLS LAST] + +(41) ColumnarToRow [codegen id : 1] +Input [4]: [Call_Center#24, Call_Center_Name#25, Manager#26, Returns_Loss#27] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c6a24dd149 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q91.native_iceberg_compat/simplified.txt @@ -0,0 +1,43 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [Call_Center,Call_Center_Name,Manager,Returns_Loss] + CometColumnarExchange [Returns_Loss] #1 + CometHashAggregate [Call_Center,Call_Center_Name,Manager,Returns_Loss,cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,sum,sum(UnscaledValue(cr_net_loss))] + CometExchange [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status] #2 + CometHashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,sum,cr_net_loss] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_hdemo_sk,cd_marital_status,cd_education_status,hd_demo_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_hdemo_sk,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,ca_address_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,cr_returned_date_sk,d_date_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,cr_returned_date_sk] + CometBroadcastHashJoin [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + CometFilter [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + CometScan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + CometBroadcastExchange [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] #3 + CometFilter [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] #5 + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #7 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [hd_demo_sk] #8 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_datafusion/explain.txt new file mode 100644 index 0000000000..8acc3929c9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_datafusion/explain.txt @@ -0,0 +1,159 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometHashAggregate (29) + +- CometExchange (28) + +- CometHashAggregate (27) + +- CometProject (26) + +- CometBroadcastHashJoin (25) + :- CometProject (23) + : +- CometBroadcastHashJoin (22) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : +- CometBroadcastExchange (21) + : +- CometFilter (20) + : +- CometHashAggregate (19) + : +- CometExchange (18) + : +- CometHashAggregate (17) + : +- CometProject (16) + : +- CometBroadcastHashJoin (15) + : :- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (9) + : +- CometBroadcastExchange (14) + : +- CometProject (13) + : +- CometFilter (12) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (11) + +- ReusedExchange (24) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Condition : (isnotnull(ws_item_sk#1) AND isnotnull(ws_ext_discount_amt#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#4, i_manufact_id#5] +Arguments: [i_item_sk#4, i_manufact_id#5] + +(4) CometFilter +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 350)) AND isnotnull(i_item_sk#4)) + +(5) CometProject +Input [2]: [i_item_sk#4, i_manufact_id#5] +Arguments: [i_item_sk#4], [i_item_sk#4] + +(6) CometBroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: [i_item_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Right output [1]: [i_item_sk#4] +Arguments: [ws_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] +Arguments: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4], [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Arguments: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] + +(10) CometFilter +Input [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Condition : isnotnull(ws_item_sk#6) + +(11) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9, d_date#10] + +(12) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-01-27)) AND (d_date#10 <= 2000-04-26)) AND isnotnull(d_date_sk#9)) + +(13) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(14) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(15) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ws_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(16) CometProject +Input [4]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8, d_date_sk#9] +Arguments: [ws_item_sk#6, ws_ext_discount_amt#7], [ws_item_sk#6, ws_ext_discount_amt#7] + +(17) CometHashAggregate +Input [2]: [ws_item_sk#6, ws_ext_discount_amt#7] +Keys [1]: [ws_item_sk#6] +Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#7))] + +(18) CometExchange +Input [3]: [ws_item_sk#6, sum#11, count#12] +Arguments: hashpartitioning(ws_item_sk#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(19) CometHashAggregate +Input [3]: [ws_item_sk#6, sum#11, count#12] +Keys [1]: [ws_item_sk#6] +Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#7))] + +(20) CometFilter +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#6] +Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#13) + +(21) CometBroadcastExchange +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#6] +Arguments: [(1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#6] + +(22) CometBroadcastHashJoin +Left output [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] +Right output [2]: [(1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#6] +Arguments: [i_item_sk#4], [ws_item_sk#6], Inner, (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#13), BuildRight + +(23) CometProject +Input [5]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4, (1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#6] +Arguments: [ws_ext_discount_amt#2, ws_sold_date_sk#3], [ws_ext_discount_amt#2, ws_sold_date_sk#3] + +(24) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ws_ext_discount_amt#2, ws_sold_date_sk#3] +Right output [1]: [d_date_sk#14] +Arguments: [ws_sold_date_sk#3], [d_date_sk#14], Inner, BuildRight + +(26) CometProject +Input [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, d_date_sk#14] +Arguments: [ws_ext_discount_amt#2], [ws_ext_discount_amt#2] + +(27) CometHashAggregate +Input [1]: [ws_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#2))] + +(28) CometExchange +Input [1]: [sum#15] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [1]: [sum#15] +Keys: [] +Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))] + +(30) ColumnarToRow [codegen id : 1] +Input [1]: [Excess Discount Amount #16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_datafusion/simplified.txt new file mode 100644 index 0000000000..08b10d6a88 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_datafusion/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [Excess Discount Amount ,sum,sum(UnscaledValue(ws_ext_discount_amt))] + CometExchange #1 + CometHashAggregate [sum,ws_ext_discount_amt] + CometProject [ws_ext_discount_amt] + CometBroadcastHashJoin [ws_ext_discount_amt,ws_sold_date_sk,d_date_sk] + CometProject [ws_ext_discount_amt,ws_sold_date_sk] + CometBroadcastHashJoin [ws_ext_discount_amt,ws_sold_date_sk,i_item_sk,(1.3 * avg(ws_ext_discount_amt)),ws_item_sk] + CometProject [ws_ext_discount_amt,ws_sold_date_sk,i_item_sk] + CometBroadcastHashJoin [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk,i_item_sk] + CometFilter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometBroadcastExchange [i_item_sk] #2 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_manufact_id] + CometBroadcastExchange [(1.3 * avg(ws_ext_discount_amt)),ws_item_sk] #3 + CometFilter [(1.3 * avg(ws_ext_discount_amt)),ws_item_sk] + CometHashAggregate [(1.3 * avg(ws_ext_discount_amt)),ws_item_sk,sum,count,avg(UnscaledValue(ws_ext_discount_amt))] + CometExchange [ws_item_sk] #4 + CometHashAggregate [ws_item_sk,sum,count,ws_ext_discount_amt] + CometProject [ws_item_sk,ws_ext_discount_amt] + CometBroadcastHashJoin [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..75185ea081 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_iceberg_compat/explain.txt @@ -0,0 +1,173 @@ +== Physical Plan == +* ColumnarToRow (30) ++- CometHashAggregate (29) + +- CometExchange (28) + +- CometHashAggregate (27) + +- CometProject (26) + +- CometBroadcastHashJoin (25) + :- CometProject (23) + : +- CometBroadcastHashJoin (22) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.item (3) + : +- CometBroadcastExchange (21) + : +- CometFilter (20) + : +- CometHashAggregate (19) + : +- CometExchange (18) + : +- CometHashAggregate (17) + : +- CometProject (16) + : +- CometBroadcastHashJoin (15) + : :- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.web_sales (9) + : +- CometBroadcastExchange (14) + : +- CometProject (13) + : +- CometFilter (12) + : +- CometScan parquet spark_catalog.default.date_dim (11) + +- ReusedExchange (24) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Condition : (isnotnull(ws_item_sk#1) AND isnotnull(ws_ext_discount_amt#2)) + +(3) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 350)) AND isnotnull(i_item_sk#4)) + +(5) CometProject +Input [2]: [i_item_sk#4, i_manufact_id#5] +Arguments: [i_item_sk#4], [i_item_sk#4] + +(6) CometBroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: [i_item_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Right output [1]: [i_item_sk#4] +Arguments: [ws_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] +Arguments: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4], [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] + +(9) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#8)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Condition : isnotnull(ws_item_sk#6) + +(11) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-01-27)) AND (d_date#10 <= 2000-04-26)) AND isnotnull(d_date_sk#9)) + +(13) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(14) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(15) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ws_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(16) CometProject +Input [4]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8, d_date_sk#9] +Arguments: [ws_item_sk#6, ws_ext_discount_amt#7], [ws_item_sk#6, ws_ext_discount_amt#7] + +(17) CometHashAggregate +Input [2]: [ws_item_sk#6, ws_ext_discount_amt#7] +Keys [1]: [ws_item_sk#6] +Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#7))] + +(18) CometExchange +Input [3]: [ws_item_sk#6, sum#11, count#12] +Arguments: hashpartitioning(ws_item_sk#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(19) CometHashAggregate +Input [3]: [ws_item_sk#6, sum#11, count#12] +Keys [1]: [ws_item_sk#6] +Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#7))] + +(20) CometFilter +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#6] +Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#13) + +(21) CometBroadcastExchange +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#6] +Arguments: [(1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#6] + +(22) CometBroadcastHashJoin +Left output [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] +Right output [2]: [(1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#6] +Arguments: [i_item_sk#4], [ws_item_sk#6], Inner, (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#13), BuildRight + +(23) CometProject +Input [5]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4, (1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#6] +Arguments: [ws_ext_discount_amt#2, ws_sold_date_sk#3], [ws_ext_discount_amt#2, ws_sold_date_sk#3] + +(24) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ws_ext_discount_amt#2, ws_sold_date_sk#3] +Right output [1]: [d_date_sk#14] +Arguments: [ws_sold_date_sk#3], [d_date_sk#14], Inner, BuildRight + +(26) CometProject +Input [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, d_date_sk#14] +Arguments: [ws_ext_discount_amt#2], [ws_ext_discount_amt#2] + +(27) CometHashAggregate +Input [1]: [ws_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#2))] + +(28) CometExchange +Input [1]: [sum#15] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [1]: [sum#15] +Keys: [] +Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))] + +(30) ColumnarToRow [codegen id : 1] +Input [1]: [Excess Discount Amount #16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..02d526a316 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q92.native_iceberg_compat/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [Excess Discount Amount ,sum,sum(UnscaledValue(ws_ext_discount_amt))] + CometExchange #1 + CometHashAggregate [sum,ws_ext_discount_amt] + CometProject [ws_ext_discount_amt] + CometBroadcastHashJoin [ws_ext_discount_amt,ws_sold_date_sk,d_date_sk] + CometProject [ws_ext_discount_amt,ws_sold_date_sk] + CometBroadcastHashJoin [ws_ext_discount_amt,ws_sold_date_sk,i_item_sk,(1.3 * avg(ws_ext_discount_amt)),ws_item_sk] + CometProject [ws_ext_discount_amt,ws_sold_date_sk,i_item_sk] + CometBroadcastHashJoin [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk,i_item_sk] + CometFilter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometBroadcastExchange [i_item_sk] #2 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_manufact_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_manufact_id] + CometBroadcastExchange [(1.3 * avg(ws_ext_discount_amt)),ws_item_sk] #3 + CometFilter [(1.3 * avg(ws_ext_discount_amt)),ws_item_sk] + CometHashAggregate [(1.3 * avg(ws_ext_discount_amt)),ws_item_sk,sum,count,avg(UnscaledValue(ws_ext_discount_amt))] + CometExchange [ws_item_sk] #4 + CometHashAggregate [ws_item_sk,sum,count,ws_ext_discount_amt] + CometProject [ws_item_sk,ws_ext_discount_amt] + CometBroadcastHashJoin [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_datafusion/explain.txt new file mode 100644 index 0000000000..ad03c691af --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_datafusion/explain.txt @@ -0,0 +1,116 @@ +== Physical Plan == +* ColumnarToRow (22) ++- CometTakeOrderedAndProject (21) + +- CometHashAggregate (20) + +- CometExchange (19) + +- CometHashAggregate (18) + +- CometProject (17) + +- CometBroadcastHashJoin (16) + :- CometProject (11) + : +- CometSortMergeJoin (10) + : :- CometSort (4) + : : +- CometExchange (3) + : : +- CometProject (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : +- CometSort (9) + : +- CometExchange (8) + : +- CometProject (7) + : +- CometFilter (6) + : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (5) + +- CometBroadcastExchange (15) + +- CometProject (14) + +- CometFilter (13) + +- CometNativeScan: `spark_catalog`.`default`.`reason` (12) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] + +(2) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5], [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] + +(3) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#3, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5], [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#3 ASC NULLS FIRST] + +(5) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) CometFilter +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_item_sk#7) AND isnotnull(sr_ticket_number#9)) AND isnotnull(sr_reason_sk#8)) + +(7) CometProject +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10], [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] + +(8) CometExchange +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: hashpartitioning(sr_item_sk#7, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10], [sr_item_sk#7 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Right output [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [ss_item_sk#1, ss_ticket_number#3], [sr_item_sk#7, sr_ticket_number#9], Inner + +(11) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10], [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10] + +(12) CometNativeScan: `spark_catalog`.`default`.`reason` +Output [2]: [r_reason_sk#12, r_reason_desc#13] +Arguments: [r_reason_sk#12, r_reason_desc#13] + +(13) CometFilter +Input [2]: [r_reason_sk#12, r_reason_desc#13] +Condition : ((isnotnull(r_reason_desc#13) AND (r_reason_desc#13 = reason 28 )) AND isnotnull(r_reason_sk#12)) + +(14) CometProject +Input [2]: [r_reason_sk#12, r_reason_desc#13] +Arguments: [r_reason_sk#12], [r_reason_sk#12] + +(15) CometBroadcastExchange +Input [1]: [r_reason_sk#12] +Arguments: [r_reason_sk#12] + +(16) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10] +Right output [1]: [r_reason_sk#12] +Arguments: [sr_reason_sk#8], [r_reason_sk#12], Inner, BuildRight + +(17) CometProject +Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10, r_reason_sk#12] +Arguments: [ss_customer_sk#2, act_sales#14], [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#10) THEN (cast((ss_quantity#4 - sr_return_quantity#10) as decimal(10,0)) * ss_sales_price#5) ELSE (cast(ss_quantity#4 as decimal(10,0)) * ss_sales_price#5) END AS act_sales#14] + +(18) CometHashAggregate +Input [2]: [ss_customer_sk#2, act_sales#14] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [partial_sum(act_sales#14)] + +(19) CometExchange +Input [3]: [ss_customer_sk#2, sum#15, isEmpty#16] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometHashAggregate +Input [3]: [ss_customer_sk#2, sum#15, isEmpty#16] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [sum(act_sales#14)] + +(21) CometTakeOrderedAndProject +Input [2]: [ss_customer_sk#2, sumsales#17] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[sumsales#17 ASC NULLS FIRST,ss_customer_sk#2 ASC NULLS FIRST], output=[ss_customer_sk#2,sumsales#17]), [ss_customer_sk#2, sumsales#17], 100, [sumsales#17 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], [ss_customer_sk#2, sumsales#17] + +(22) ColumnarToRow [codegen id : 1] +Input [2]: [ss_customer_sk#2, sumsales#17] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_datafusion/simplified.txt new file mode 100644 index 0000000000..4ede7ca586 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_datafusion/simplified.txt @@ -0,0 +1,24 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [ss_customer_sk,sumsales] + CometHashAggregate [ss_customer_sk,sumsales,sum,isEmpty,sum(act_sales)] + CometExchange [ss_customer_sk] #1 + CometHashAggregate [ss_customer_sk,sum,isEmpty,act_sales] + CometProject [sr_return_quantity,ss_quantity,ss_sales_price] [ss_customer_sk,act_sales] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,sr_reason_sk,sr_return_quantity,r_reason_sk] + CometProject [ss_customer_sk,ss_quantity,ss_sales_price,sr_reason_sk,sr_return_quantity] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + CometSort [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price] + CometExchange [ss_item_sk,ss_ticket_number] #2 + CometProject [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometSort [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + CometExchange [sr_item_sk,sr_ticket_number] #3 + CometProject [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + CometFilter [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [r_reason_sk] #4 + CometProject [r_reason_sk] + CometFilter [r_reason_sk,r_reason_desc] + CometNativeScan: `spark_catalog`.`default`.`reason` [r_reason_sk,r_reason_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..dc64f3c4ce --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_iceberg_compat/explain.txt @@ -0,0 +1,124 @@ +== Physical Plan == +* ColumnarToRow (22) ++- CometTakeOrderedAndProject (21) + +- CometHashAggregate (20) + +- CometExchange (19) + +- CometHashAggregate (18) + +- CometProject (17) + +- CometBroadcastHashJoin (16) + :- CometProject (11) + : +- CometSortMergeJoin (10) + : :- CometSort (4) + : : +- CometExchange (3) + : : +- CometProject (2) + : : +- CometScan parquet spark_catalog.default.store_sales (1) + : +- CometSort (9) + : +- CometExchange (8) + : +- CometProject (7) + : +- CometFilter (6) + : +- CometScan parquet spark_catalog.default.store_returns (5) + +- CometBroadcastExchange (15) + +- CometProject (14) + +- CometFilter (13) + +- CometScan parquet spark_catalog.default.reason (12) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +ReadSchema: struct + +(2) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5], [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] + +(3) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#3, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5], [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#3 ASC NULLS FIRST] + +(5) CometScan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_reason_sk)] +ReadSchema: struct + +(6) CometFilter +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_item_sk#7) AND isnotnull(sr_ticket_number#9)) AND isnotnull(sr_reason_sk#8)) + +(7) CometProject +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10], [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] + +(8) CometExchange +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: hashpartitioning(sr_item_sk#7, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10], [sr_item_sk#7 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Right output [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [ss_item_sk#1, ss_ticket_number#3], [sr_item_sk#7, sr_ticket_number#9], Inner + +(11) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10], [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10] + +(12) CometScan parquet spark_catalog.default.reason +Output [2]: [r_reason_sk#12, r_reason_desc#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28 ), IsNotNull(r_reason_sk)] +ReadSchema: struct + +(13) CometFilter +Input [2]: [r_reason_sk#12, r_reason_desc#13] +Condition : ((isnotnull(r_reason_desc#13) AND (r_reason_desc#13 = reason 28 )) AND isnotnull(r_reason_sk#12)) + +(14) CometProject +Input [2]: [r_reason_sk#12, r_reason_desc#13] +Arguments: [r_reason_sk#12], [r_reason_sk#12] + +(15) CometBroadcastExchange +Input [1]: [r_reason_sk#12] +Arguments: [r_reason_sk#12] + +(16) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10] +Right output [1]: [r_reason_sk#12] +Arguments: [sr_reason_sk#8], [r_reason_sk#12], Inner, BuildRight + +(17) CometProject +Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10, r_reason_sk#12] +Arguments: [ss_customer_sk#2, act_sales#14], [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#10) THEN (cast((ss_quantity#4 - sr_return_quantity#10) as decimal(10,0)) * ss_sales_price#5) ELSE (cast(ss_quantity#4 as decimal(10,0)) * ss_sales_price#5) END AS act_sales#14] + +(18) CometHashAggregate +Input [2]: [ss_customer_sk#2, act_sales#14] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [partial_sum(act_sales#14)] + +(19) CometExchange +Input [3]: [ss_customer_sk#2, sum#15, isEmpty#16] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometHashAggregate +Input [3]: [ss_customer_sk#2, sum#15, isEmpty#16] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [sum(act_sales#14)] + +(21) CometTakeOrderedAndProject +Input [2]: [ss_customer_sk#2, sumsales#17] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[sumsales#17 ASC NULLS FIRST,ss_customer_sk#2 ASC NULLS FIRST], output=[ss_customer_sk#2,sumsales#17]), [ss_customer_sk#2, sumsales#17], 100, [sumsales#17 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], [ss_customer_sk#2, sumsales#17] + +(22) ColumnarToRow [codegen id : 1] +Input [2]: [ss_customer_sk#2, sumsales#17] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..6795d7e399 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q93.native_iceberg_compat/simplified.txt @@ -0,0 +1,24 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [ss_customer_sk,sumsales] + CometHashAggregate [ss_customer_sk,sumsales,sum,isEmpty,sum(act_sales)] + CometExchange [ss_customer_sk] #1 + CometHashAggregate [ss_customer_sk,sum,isEmpty,act_sales] + CometProject [sr_return_quantity,ss_quantity,ss_sales_price] [ss_customer_sk,act_sales] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,sr_reason_sk,sr_return_quantity,r_reason_sk] + CometProject [ss_customer_sk,ss_quantity,ss_sales_price,sr_reason_sk,sr_return_quantity] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + CometSort [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price] + CometExchange [ss_item_sk,ss_ticket_number] #2 + CometProject [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometSort [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + CometExchange [sr_item_sk,sr_ticket_number] #3 + CometProject [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + CometFilter [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [r_reason_sk] #4 + CometProject [r_reason_sk] + CometFilter [r_reason_sk,r_reason_desc] + CometScan parquet spark_catalog.default.reason [r_reason_sk,r_reason_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_datafusion/explain.txt new file mode 100644 index 0000000000..1eb8827312 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_datafusion/explain.txt @@ -0,0 +1,217 @@ +== Physical Plan == +* HashAggregate (40) ++- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- * ColumnarToRow (36) + +- CometHashAggregate (35) + +- CometProject (34) + +- CometBroadcastHashJoin (33) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (22) + : : +- CometBroadcastHashJoin (21) + : : :- CometSortMergeJoin (16) + : : : :- CometProject (11) + : : : : +- CometSortMergeJoin (10) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : : +- CometSort (9) + : : : : +- CometExchange (8) + : : : : +- CometProject (7) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (6) + : : : +- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (12) + : : +- CometBroadcastExchange (20) + : : +- CometProject (19) + : : +- CometFilter (18) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (17) + : +- CometBroadcastExchange (26) + : +- CometProject (25) + : +- CometFilter (24) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (23) + +- CometBroadcastExchange (32) + +- CometProject (31) + +- CometFilter (30) + +- CometNativeScan: `spark_catalog`.`default`.`web_site` (29) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(3) CometProject +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(4) CometExchange +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_order_number#5 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Arguments: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] + +(7) CometProject +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Arguments: [ws_warehouse_sk#9, ws_order_number#10], [ws_warehouse_sk#9, ws_order_number#10] + +(8) CometExchange +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: hashpartitioning(ws_order_number#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: [ws_warehouse_sk#9, ws_order_number#10], [ws_order_number#10 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: [ws_order_number#5], [ws_order_number#10], LeftSemi, NOT (ws_warehouse_sk#4 = ws_warehouse_sk#9) + +(11) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(12) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [2]: [wr_order_number#12, wr_returned_date_sk#13] +Arguments: [wr_order_number#12, wr_returned_date_sk#13] + +(13) CometProject +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] +Arguments: [wr_order_number#12], [wr_order_number#12] + +(14) CometExchange +Input [1]: [wr_order_number#12] +Arguments: hashpartitioning(wr_order_number#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(15) CometSort +Input [1]: [wr_order_number#12] +Arguments: [wr_order_number#12], [wr_order_number#12 ASC NULLS FIRST] + +(16) CometSortMergeJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [wr_order_number#12] +Arguments: [ws_order_number#5], [wr_order_number#12], LeftAnti + +(17) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#14, d_date#15] +Arguments: [d_date_sk#14, d_date#15] + +(18) CometFilter +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 1999-02-01)) AND (d_date#15 <= 1999-04-02)) AND isnotnull(d_date_sk#14)) + +(19) CometProject +Input [2]: [d_date_sk#14, d_date#15] +Arguments: [d_date_sk#14], [d_date_sk#14] + +(20) CometBroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [d_date_sk#14] +Arguments: [ws_ship_date_sk#1], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#14] +Arguments: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(23) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16, ca_state#17] + +(24) CometFilter +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = IL)) AND isnotnull(ca_address_sk#16)) + +(25) CometProject +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16], [ca_address_sk#16] + +(26) CometBroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: [ca_address_sk#16] + +(27) CometBroadcastHashJoin +Left output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [ca_address_sk#16] +Arguments: [ws_ship_addr_sk#2], [ca_address_sk#16], Inner, BuildRight + +(28) CometProject +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#16] +Arguments: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(29) CometNativeScan: `spark_catalog`.`default`.`web_site` +Output [2]: [web_site_sk#18, web_company_name#19] +Arguments: [web_site_sk#18, web_company_name#19] + +(30) CometFilter +Input [2]: [web_site_sk#18, web_company_name#19] +Condition : ((isnotnull(web_company_name#19) AND (web_company_name#19 = pri )) AND isnotnull(web_site_sk#18)) + +(31) CometProject +Input [2]: [web_site_sk#18, web_company_name#19] +Arguments: [web_site_sk#18], [web_site_sk#18] + +(32) CometBroadcastExchange +Input [1]: [web_site_sk#18] +Arguments: [web_site_sk#18] + +(33) CometBroadcastHashJoin +Left output [4]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [web_site_sk#18] +Arguments: [ws_web_site_sk#3], [web_site_sk#18], Inner, BuildRight + +(34) CometProject +Input [5]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#18] +Arguments: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(35) CometHashAggregate +Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Keys [1]: [ws_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] + +(36) ColumnarToRow [codegen id : 1] +Input [3]: [ws_order_number#5, sum#20, sum#21] + +(37) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#5, sum#20, sum#21] +Keys [1]: [ws_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23] +Results [3]: [ws_order_number#5, sum#20, sum#21] + +(38) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#5, sum#20, sum#21] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#24] +Results [3]: [sum#20, sum#21, count#25] + +(39) Exchange +Input [3]: [sum#20, sum#21, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 2] +Input [3]: [sum#20, sum#21, count#25] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#24] +Results [3]: [count(ws_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bafb48b0bd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_datafusion/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (2) + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometHashAggregate [ws_order_number,sum,sum,ws_ext_ship_cost,ws_net_profit] + CometProject [ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,web_site_sk] + CometProject [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ca_address_sk] + CometProject [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,d_date_sk] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,wr_order_number] + CometProject [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_warehouse_sk] + CometSort [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometExchange [ws_order_number] #2 + CometProject [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometFilter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometSort [ws_warehouse_sk,ws_order_number] + CometExchange [ws_order_number] #3 + CometProject [ws_warehouse_sk,ws_order_number] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + CometSort [wr_order_number] + CometExchange [wr_order_number] #4 + CometProject [wr_order_number] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_order_number,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [web_site_sk] #7 + CometProject [web_site_sk] + CometFilter [web_site_sk,web_company_name] + CometNativeScan: `spark_catalog`.`default`.`web_site` [web_site_sk,web_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..4dd9246cd7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_iceberg_compat/explain.txt @@ -0,0 +1,233 @@ +== Physical Plan == +* HashAggregate (40) ++- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- * ColumnarToRow (36) + +- CometHashAggregate (35) + +- CometProject (34) + +- CometBroadcastHashJoin (33) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (22) + : : +- CometBroadcastHashJoin (21) + : : :- CometSortMergeJoin (16) + : : : :- CometProject (11) + : : : : +- CometSortMergeJoin (10) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : : : +- CometSort (9) + : : : : +- CometExchange (8) + : : : : +- CometProject (7) + : : : : +- CometScan parquet spark_catalog.default.web_sales (6) + : : : +- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometScan parquet spark_catalog.default.web_returns (12) + : : +- CometBroadcastExchange (20) + : : +- CometProject (19) + : : +- CometFilter (18) + : : +- CometScan parquet spark_catalog.default.date_dim (17) + : +- CometBroadcastExchange (26) + : +- CometProject (25) + : +- CometFilter (24) + : +- CometScan parquet spark_catalog.default.customer_address (23) + +- CometBroadcastExchange (32) + +- CometProject (31) + +- CometFilter (30) + +- CometScan parquet spark_catalog.default.web_site (29) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(3) CometProject +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(4) CometExchange +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_order_number#5 ASC NULLS FIRST] + +(6) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +ReadSchema: struct + +(7) CometProject +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Arguments: [ws_warehouse_sk#9, ws_order_number#10], [ws_warehouse_sk#9, ws_order_number#10] + +(8) CometExchange +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: hashpartitioning(ws_order_number#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: [ws_warehouse_sk#9, ws_order_number#10], [ws_order_number#10 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: [ws_order_number#5], [ws_order_number#10], LeftSemi, NOT (ws_warehouse_sk#4 = ws_warehouse_sk#9) + +(11) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(12) CometScan parquet spark_catalog.default.web_returns +Output [2]: [wr_order_number#12, wr_returned_date_sk#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +ReadSchema: struct + +(13) CometProject +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] +Arguments: [wr_order_number#12], [wr_order_number#12] + +(14) CometExchange +Input [1]: [wr_order_number#12] +Arguments: hashpartitioning(wr_order_number#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(15) CometSort +Input [1]: [wr_order_number#12] +Arguments: [wr_order_number#12], [wr_order_number#12 ASC NULLS FIRST] + +(16) CometSortMergeJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [wr_order_number#12] +Arguments: [ws_order_number#5], [wr_order_number#12], LeftAnti + +(17) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#14, d_date#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) CometFilter +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 1999-02-01)) AND (d_date#15 <= 1999-04-02)) AND isnotnull(d_date_sk#14)) + +(19) CometProject +Input [2]: [d_date_sk#14, d_date#15] +Arguments: [d_date_sk#14], [d_date_sk#14] + +(20) CometBroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [d_date_sk#14] +Arguments: [ws_ship_date_sk#1], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#14] +Arguments: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(23) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#16, ca_state#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(24) CometFilter +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = IL)) AND isnotnull(ca_address_sk#16)) + +(25) CometProject +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16], [ca_address_sk#16] + +(26) CometBroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: [ca_address_sk#16] + +(27) CometBroadcastHashJoin +Left output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [ca_address_sk#16] +Arguments: [ws_ship_addr_sk#2], [ca_address_sk#16], Inner, BuildRight + +(28) CometProject +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#16] +Arguments: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(29) CometScan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#18, web_company_name#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri ), IsNotNull(web_site_sk)] +ReadSchema: struct + +(30) CometFilter +Input [2]: [web_site_sk#18, web_company_name#19] +Condition : ((isnotnull(web_company_name#19) AND (web_company_name#19 = pri )) AND isnotnull(web_site_sk#18)) + +(31) CometProject +Input [2]: [web_site_sk#18, web_company_name#19] +Arguments: [web_site_sk#18], [web_site_sk#18] + +(32) CometBroadcastExchange +Input [1]: [web_site_sk#18] +Arguments: [web_site_sk#18] + +(33) CometBroadcastHashJoin +Left output [4]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [web_site_sk#18] +Arguments: [ws_web_site_sk#3], [web_site_sk#18], Inner, BuildRight + +(34) CometProject +Input [5]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#18] +Arguments: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(35) CometHashAggregate +Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Keys [1]: [ws_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] + +(36) ColumnarToRow [codegen id : 1] +Input [3]: [ws_order_number#5, sum#20, sum#21] + +(37) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#5, sum#20, sum#21] +Keys [1]: [ws_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23] +Results [3]: [ws_order_number#5, sum#20, sum#21] + +(38) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#5, sum#20, sum#21] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#24] +Results [3]: [sum#20, sum#21, count#25] + +(39) Exchange +Input [3]: [sum#20, sum#21, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 2] +Input [3]: [sum#20, sum#21, count#25] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#24] +Results [3]: [count(ws_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..601f577da9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q94.native_iceberg_compat/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (2) + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometHashAggregate [ws_order_number,sum,sum,ws_ext_ship_cost,ws_net_profit] + CometProject [ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,web_site_sk] + CometProject [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ca_address_sk] + CometProject [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,d_date_sk] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,wr_order_number] + CometProject [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_warehouse_sk] + CometSort [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometExchange [ws_order_number] #2 + CometProject [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometFilter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometSort [ws_warehouse_sk,ws_order_number] + CometExchange [ws_order_number] #3 + CometProject [ws_warehouse_sk,ws_order_number] + CometScan parquet spark_catalog.default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + CometSort [wr_order_number] + CometExchange [wr_order_number] #4 + CometProject [wr_order_number] + CometScan parquet spark_catalog.default.web_returns [wr_order_number,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + CometBroadcastExchange [web_site_sk] #7 + CometProject [web_site_sk] + CometFilter [web_site_sk,web_company_name] + CometScan parquet spark_catalog.default.web_site [web_site_sk,web_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_datafusion/explain.txt new file mode 100644 index 0000000000..2c87988976 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_datafusion/explain.txt @@ -0,0 +1,282 @@ +== Physical Plan == +* HashAggregate (53) ++- Exchange (52) + +- * HashAggregate (51) + +- * HashAggregate (50) + +- * ColumnarToRow (49) + +- CometHashAggregate (48) + +- CometProject (47) + +- CometBroadcastHashJoin (46) + :- CometProject (41) + : +- CometBroadcastHashJoin (40) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometSortMergeJoin (29) + : : : :- CometSortMergeJoin (15) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : : +- CometProject (14) + : : : : +- CometSortMergeJoin (13) + : : : : :- CometSort (10) + : : : : : +- CometExchange (9) + : : : : : +- CometProject (8) + : : : : : +- CometFilter (7) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (6) + : : : : +- CometSort (12) + : : : : +- ReusedExchange (11) + : : : +- CometProject (28) + : : : +- CometSortMergeJoin (27) + : : : :- CometSort (20) + : : : : +- CometExchange (19) + : : : : +- CometProject (18) + : : : : +- CometFilter (17) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (16) + : : : +- CometProject (26) + : : : +- CometSortMergeJoin (25) + : : : :- CometSort (22) + : : : : +- ReusedExchange (21) + : : : +- CometSort (24) + : : : +- ReusedExchange (23) + : : +- CometBroadcastExchange (33) + : : +- CometProject (32) + : : +- CometFilter (31) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (30) + : +- CometBroadcastExchange (39) + : +- CometProject (38) + : +- CometFilter (37) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (36) + +- CometBroadcastExchange (45) + +- CometProject (44) + +- CometFilter (43) + +- CometNativeScan: `spark_catalog`.`default`.`web_site` (42) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(3) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(4) CometExchange +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_order_number#4 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Arguments: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] + +(7) CometFilter +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8)) + +(8) CometProject +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Arguments: [ws_warehouse_sk#8, ws_order_number#9], [ws_warehouse_sk#8, ws_order_number#9] + +(9) CometExchange +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: hashpartitioning(ws_order_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_warehouse_sk#8, ws_order_number#9], [ws_order_number#9 ASC NULLS FIRST] + +(11) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] + +(12) CometSort +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_warehouse_sk#11, ws_order_number#12], [ws_order_number#12 ASC NULLS FIRST] + +(13) CometSortMergeJoin +Left output [2]: [ws_warehouse_sk#8, ws_order_number#9] +Right output [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#9], [ws_order_number#12], Inner, NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) + +(14) CometProject +Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#9], [ws_order_number#9] + +(15) CometSortMergeJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [ws_order_number#9] +Arguments: [ws_order_number#4], [ws_order_number#9], LeftSemi + +(16) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [2]: [wr_order_number#13, wr_returned_date_sk#14] +Arguments: [wr_order_number#13, wr_returned_date_sk#14] + +(17) CometFilter +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] +Condition : isnotnull(wr_order_number#13) + +(18) CometProject +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] +Arguments: [wr_order_number#13], [wr_order_number#13] + +(19) CometExchange +Input [1]: [wr_order_number#13] +Arguments: hashpartitioning(wr_order_number#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometSort +Input [1]: [wr_order_number#13] +Arguments: [wr_order_number#13], [wr_order_number#13 ASC NULLS FIRST] + +(21) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#15, ws_order_number#16] + +(22) CometSort +Input [2]: [ws_warehouse_sk#15, ws_order_number#16] +Arguments: [ws_warehouse_sk#15, ws_order_number#16], [ws_order_number#16 ASC NULLS FIRST] + +(23) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#17, ws_order_number#18] + +(24) CometSort +Input [2]: [ws_warehouse_sk#17, ws_order_number#18] +Arguments: [ws_warehouse_sk#17, ws_order_number#18], [ws_order_number#18 ASC NULLS FIRST] + +(25) CometSortMergeJoin +Left output [2]: [ws_warehouse_sk#15, ws_order_number#16] +Right output [2]: [ws_warehouse_sk#17, ws_order_number#18] +Arguments: [ws_order_number#16], [ws_order_number#18], Inner, NOT (ws_warehouse_sk#15 = ws_warehouse_sk#17) + +(26) CometProject +Input [4]: [ws_warehouse_sk#15, ws_order_number#16, ws_warehouse_sk#17, ws_order_number#18] +Arguments: [ws_order_number#16], [ws_order_number#16] + +(27) CometSortMergeJoin +Left output [1]: [wr_order_number#13] +Right output [1]: [ws_order_number#16] +Arguments: [wr_order_number#13], [ws_order_number#16], Inner + +(28) CometProject +Input [2]: [wr_order_number#13, ws_order_number#16] +Arguments: [wr_order_number#13], [wr_order_number#13] + +(29) CometSortMergeJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [wr_order_number#13] +Arguments: [ws_order_number#4], [wr_order_number#13], LeftSemi + +(30) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#19, d_date#20] +Arguments: [d_date_sk#19, d_date#20] + +(31) CometFilter +Input [2]: [d_date_sk#19, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-01)) AND (d_date#20 <= 1999-04-02)) AND isnotnull(d_date_sk#19)) + +(32) CometProject +Input [2]: [d_date_sk#19, d_date#20] +Arguments: [d_date_sk#19], [d_date_sk#19] + +(33) CometBroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: [d_date_sk#19] + +(34) CometBroadcastHashJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [d_date_sk#19] +Arguments: [ws_ship_date_sk#1], [d_date_sk#19], Inner, BuildRight + +(35) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#19] +Arguments: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(36) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#21, ca_state#22] +Arguments: [ca_address_sk#21, ca_state#22] + +(37) CometFilter +Input [2]: [ca_address_sk#21, ca_state#22] +Condition : ((isnotnull(ca_state#22) AND (ca_state#22 = IL)) AND isnotnull(ca_address_sk#21)) + +(38) CometProject +Input [2]: [ca_address_sk#21, ca_state#22] +Arguments: [ca_address_sk#21], [ca_address_sk#21] + +(39) CometBroadcastExchange +Input [1]: [ca_address_sk#21] +Arguments: [ca_address_sk#21] + +(40) CometBroadcastHashJoin +Left output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [ca_address_sk#21] +Arguments: [ws_ship_addr_sk#2], [ca_address_sk#21], Inner, BuildRight + +(41) CometProject +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#21] +Arguments: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(42) CometNativeScan: `spark_catalog`.`default`.`web_site` +Output [2]: [web_site_sk#23, web_company_name#24] +Arguments: [web_site_sk#23, web_company_name#24] + +(43) CometFilter +Input [2]: [web_site_sk#23, web_company_name#24] +Condition : ((isnotnull(web_company_name#24) AND (web_company_name#24 = pri )) AND isnotnull(web_site_sk#23)) + +(44) CometProject +Input [2]: [web_site_sk#23, web_company_name#24] +Arguments: [web_site_sk#23], [web_site_sk#23] + +(45) CometBroadcastExchange +Input [1]: [web_site_sk#23] +Arguments: [web_site_sk#23] + +(46) CometBroadcastHashJoin +Left output [4]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [web_site_sk#23] +Arguments: [ws_web_site_sk#3], [web_site_sk#23], Inner, BuildRight + +(47) CometProject +Input [5]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#23] +Arguments: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(48) CometHashAggregate +Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Keys [1]: [ws_order_number#4] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] + +(49) ColumnarToRow [codegen id : 1] +Input [3]: [ws_order_number#4, sum#25, sum#26] + +(50) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#4, sum#25, sum#26] +Keys [1]: [ws_order_number#4] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] +Results [3]: [ws_order_number#4, sum#25, sum#26] + +(51) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#4, sum#25, sum#26] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#29] +Results [3]: [sum#25, sum#26, count#30] + +(52) Exchange +Input [3]: [sum#25, sum#26, count#30] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(53) HashAggregate [codegen id : 2] +Input [3]: [sum#25, sum#26, count#30] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#29] +Results [3]: [count(ws_order_number#4)#29 AS order count #31, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #32, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #33] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_datafusion/simplified.txt new file mode 100644 index 0000000000..4b35a28ce4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_datafusion/simplified.txt @@ -0,0 +1,57 @@ +WholeStageCodegen (2) + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometHashAggregate [ws_order_number,sum,sum,ws_ext_ship_cost,ws_net_profit] + CometProject [ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,web_site_sk] + CometProject [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ca_address_sk] + CometProject [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,d_date_sk] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,wr_order_number] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_order_number] + CometSort [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometExchange [ws_order_number] #2 + CometProject [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometFilter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometProject [ws_order_number] + CometSortMergeJoin [ws_warehouse_sk,ws_order_number,ws_warehouse_sk,ws_order_number] + CometSort [ws_warehouse_sk,ws_order_number] + CometExchange [ws_order_number] #3 + CometProject [ws_warehouse_sk,ws_order_number] + CometFilter [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + CometSort [ws_warehouse_sk,ws_order_number] + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + CometProject [wr_order_number] + CometSortMergeJoin [wr_order_number,ws_order_number] + CometSort [wr_order_number] + CometExchange [wr_order_number] #4 + CometProject [wr_order_number] + CometFilter [wr_order_number,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_order_number,wr_returned_date_sk] + CometProject [ws_order_number] + CometSortMergeJoin [ws_warehouse_sk,ws_order_number,ws_warehouse_sk,ws_order_number] + CometSort [ws_warehouse_sk,ws_order_number] + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + CometSort [ws_warehouse_sk,ws_order_number] + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [web_site_sk] #7 + CometProject [web_site_sk] + CometFilter [web_site_sk,web_company_name] + CometNativeScan: `spark_catalog`.`default`.`web_site` [web_site_sk,web_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..986abf83ca --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_iceberg_compat/explain.txt @@ -0,0 +1,300 @@ +== Physical Plan == +* HashAggregate (53) ++- Exchange (52) + +- * HashAggregate (51) + +- * HashAggregate (50) + +- * ColumnarToRow (49) + +- CometHashAggregate (48) + +- CometProject (47) + +- CometBroadcastHashJoin (46) + :- CometProject (41) + : +- CometBroadcastHashJoin (40) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometSortMergeJoin (29) + : : : :- CometSortMergeJoin (15) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : : : +- CometProject (14) + : : : : +- CometSortMergeJoin (13) + : : : : :- CometSort (10) + : : : : : +- CometExchange (9) + : : : : : +- CometProject (8) + : : : : : +- CometFilter (7) + : : : : : +- CometScan parquet spark_catalog.default.web_sales (6) + : : : : +- CometSort (12) + : : : : +- ReusedExchange (11) + : : : +- CometProject (28) + : : : +- CometSortMergeJoin (27) + : : : :- CometSort (20) + : : : : +- CometExchange (19) + : : : : +- CometProject (18) + : : : : +- CometFilter (17) + : : : : +- CometScan parquet spark_catalog.default.web_returns (16) + : : : +- CometProject (26) + : : : +- CometSortMergeJoin (25) + : : : :- CometSort (22) + : : : : +- ReusedExchange (21) + : : : +- CometSort (24) + : : : +- ReusedExchange (23) + : : +- CometBroadcastExchange (33) + : : +- CometProject (32) + : : +- CometFilter (31) + : : +- CometScan parquet spark_catalog.default.date_dim (30) + : +- CometBroadcastExchange (39) + : +- CometProject (38) + : +- CometFilter (37) + : +- CometScan parquet spark_catalog.default.customer_address (36) + +- CometBroadcastExchange (45) + +- CometProject (44) + +- CometFilter (43) + +- CometScan parquet spark_catalog.default.web_site (42) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) CometFilter +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(3) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(4) CometExchange +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_order_number#4 ASC NULLS FIRST] + +(6) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] +ReadSchema: struct + +(7) CometFilter +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8)) + +(8) CometProject +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Arguments: [ws_warehouse_sk#8, ws_order_number#9], [ws_warehouse_sk#8, ws_order_number#9] + +(9) CometExchange +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: hashpartitioning(ws_order_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_warehouse_sk#8, ws_order_number#9], [ws_order_number#9 ASC NULLS FIRST] + +(11) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] + +(12) CometSort +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_warehouse_sk#11, ws_order_number#12], [ws_order_number#12 ASC NULLS FIRST] + +(13) CometSortMergeJoin +Left output [2]: [ws_warehouse_sk#8, ws_order_number#9] +Right output [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#9], [ws_order_number#12], Inner, NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) + +(14) CometProject +Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#9], [ws_order_number#9] + +(15) CometSortMergeJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [ws_order_number#9] +Arguments: [ws_order_number#4], [ws_order_number#9], LeftSemi + +(16) CometScan parquet spark_catalog.default.web_returns +Output [2]: [wr_order_number#13, wr_returned_date_sk#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number)] +ReadSchema: struct + +(17) CometFilter +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] +Condition : isnotnull(wr_order_number#13) + +(18) CometProject +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] +Arguments: [wr_order_number#13], [wr_order_number#13] + +(19) CometExchange +Input [1]: [wr_order_number#13] +Arguments: hashpartitioning(wr_order_number#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometSort +Input [1]: [wr_order_number#13] +Arguments: [wr_order_number#13], [wr_order_number#13 ASC NULLS FIRST] + +(21) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#15, ws_order_number#16] + +(22) CometSort +Input [2]: [ws_warehouse_sk#15, ws_order_number#16] +Arguments: [ws_warehouse_sk#15, ws_order_number#16], [ws_order_number#16 ASC NULLS FIRST] + +(23) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#17, ws_order_number#18] + +(24) CometSort +Input [2]: [ws_warehouse_sk#17, ws_order_number#18] +Arguments: [ws_warehouse_sk#17, ws_order_number#18], [ws_order_number#18 ASC NULLS FIRST] + +(25) CometSortMergeJoin +Left output [2]: [ws_warehouse_sk#15, ws_order_number#16] +Right output [2]: [ws_warehouse_sk#17, ws_order_number#18] +Arguments: [ws_order_number#16], [ws_order_number#18], Inner, NOT (ws_warehouse_sk#15 = ws_warehouse_sk#17) + +(26) CometProject +Input [4]: [ws_warehouse_sk#15, ws_order_number#16, ws_warehouse_sk#17, ws_order_number#18] +Arguments: [ws_order_number#16], [ws_order_number#16] + +(27) CometSortMergeJoin +Left output [1]: [wr_order_number#13] +Right output [1]: [ws_order_number#16] +Arguments: [wr_order_number#13], [ws_order_number#16], Inner + +(28) CometProject +Input [2]: [wr_order_number#13, ws_order_number#16] +Arguments: [wr_order_number#13], [wr_order_number#13] + +(29) CometSortMergeJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [wr_order_number#13] +Arguments: [ws_order_number#4], [wr_order_number#13], LeftSemi + +(30) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#19, d_date#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) CometFilter +Input [2]: [d_date_sk#19, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-01)) AND (d_date#20 <= 1999-04-02)) AND isnotnull(d_date_sk#19)) + +(32) CometProject +Input [2]: [d_date_sk#19, d_date#20] +Arguments: [d_date_sk#19], [d_date_sk#19] + +(33) CometBroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: [d_date_sk#19] + +(34) CometBroadcastHashJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [d_date_sk#19] +Arguments: [ws_ship_date_sk#1], [d_date_sk#19], Inner, BuildRight + +(35) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#19] +Arguments: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(36) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#21, ca_state#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(37) CometFilter +Input [2]: [ca_address_sk#21, ca_state#22] +Condition : ((isnotnull(ca_state#22) AND (ca_state#22 = IL)) AND isnotnull(ca_address_sk#21)) + +(38) CometProject +Input [2]: [ca_address_sk#21, ca_state#22] +Arguments: [ca_address_sk#21], [ca_address_sk#21] + +(39) CometBroadcastExchange +Input [1]: [ca_address_sk#21] +Arguments: [ca_address_sk#21] + +(40) CometBroadcastHashJoin +Left output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [ca_address_sk#21] +Arguments: [ws_ship_addr_sk#2], [ca_address_sk#21], Inner, BuildRight + +(41) CometProject +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#21] +Arguments: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(42) CometScan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#23, web_company_name#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri ), IsNotNull(web_site_sk)] +ReadSchema: struct + +(43) CometFilter +Input [2]: [web_site_sk#23, web_company_name#24] +Condition : ((isnotnull(web_company_name#24) AND (web_company_name#24 = pri )) AND isnotnull(web_site_sk#23)) + +(44) CometProject +Input [2]: [web_site_sk#23, web_company_name#24] +Arguments: [web_site_sk#23], [web_site_sk#23] + +(45) CometBroadcastExchange +Input [1]: [web_site_sk#23] +Arguments: [web_site_sk#23] + +(46) CometBroadcastHashJoin +Left output [4]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [web_site_sk#23] +Arguments: [ws_web_site_sk#3], [web_site_sk#23], Inner, BuildRight + +(47) CometProject +Input [5]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#23] +Arguments: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(48) CometHashAggregate +Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Keys [1]: [ws_order_number#4] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] + +(49) ColumnarToRow [codegen id : 1] +Input [3]: [ws_order_number#4, sum#25, sum#26] + +(50) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#4, sum#25, sum#26] +Keys [1]: [ws_order_number#4] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] +Results [3]: [ws_order_number#4, sum#25, sum#26] + +(51) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#4, sum#25, sum#26] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#29] +Results [3]: [sum#25, sum#26, count#30] + +(52) Exchange +Input [3]: [sum#25, sum#26, count#30] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(53) HashAggregate [codegen id : 2] +Input [3]: [sum#25, sum#26, count#30] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#29] +Results [3]: [count(ws_order_number#4)#29 AS order count #31, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #32, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #33] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..168f353a7b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q95.native_iceberg_compat/simplified.txt @@ -0,0 +1,57 @@ +WholeStageCodegen (2) + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometHashAggregate [ws_order_number,sum,sum,ws_ext_ship_cost,ws_net_profit] + CometProject [ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,web_site_sk] + CometProject [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ca_address_sk] + CometProject [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,d_date_sk] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,wr_order_number] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_order_number] + CometSort [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometExchange [ws_order_number] #2 + CometProject [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometFilter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometProject [ws_order_number] + CometSortMergeJoin [ws_warehouse_sk,ws_order_number,ws_warehouse_sk,ws_order_number] + CometSort [ws_warehouse_sk,ws_order_number] + CometExchange [ws_order_number] #3 + CometProject [ws_warehouse_sk,ws_order_number] + CometFilter [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + CometSort [ws_warehouse_sk,ws_order_number] + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + CometProject [wr_order_number] + CometSortMergeJoin [wr_order_number,ws_order_number] + CometSort [wr_order_number] + CometExchange [wr_order_number] #4 + CometProject [wr_order_number] + CometFilter [wr_order_number,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_order_number,wr_returned_date_sk] + CometProject [ws_order_number] + CometSortMergeJoin [ws_warehouse_sk,ws_order_number,ws_warehouse_sk,ws_order_number] + CometSort [ws_warehouse_sk,ws_order_number] + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + CometSort [ws_warehouse_sk,ws_order_number] + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + CometBroadcastExchange [web_site_sk] #7 + CometProject [web_site_sk] + CometFilter [web_site_sk,web_company_name] + CometScan parquet spark_catalog.default.web_site [web_site_sk,web_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_datafusion/explain.txt new file mode 100644 index 0000000000..16424de5f3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_datafusion/explain.txt @@ -0,0 +1,131 @@ +== Physical Plan == +* ColumnarToRow (25) ++- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometBroadcastHashJoin (20) + :- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (7) + : : +- CometProject (6) + : : +- CometFilter (5) + : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (4) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (10) + +- CometBroadcastExchange (19) + +- CometProject (18) + +- CometFilter (17) + +- CometNativeScan: `spark_catalog`.`default`.`store` (16) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Arguments: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(3) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Arguments: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3], [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(4) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#5, hd_dep_count#6] +Arguments: [hd_demo_sk#5, hd_dep_count#6] + +(5) CometFilter +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Condition : ((isnotnull(hd_dep_count#6) AND (hd_dep_count#6 = 7)) AND isnotnull(hd_demo_sk#5)) + +(6) CometProject +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Arguments: [hd_demo_sk#5], [hd_demo_sk#5] + +(7) CometBroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: [hd_demo_sk#5] + +(8) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Right output [1]: [hd_demo_sk#5] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#5], Inner, BuildRight + +(9) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] +Arguments: [ss_sold_time_sk#1, ss_store_sk#3], [ss_sold_time_sk#1, ss_store_sk#3] + +(10) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Arguments: [t_time_sk#7, t_hour#8, t_minute#9] + +(11) CometFilter +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Condition : ((((isnotnull(t_hour#8) AND isnotnull(t_minute#9)) AND (t_hour#8 = 20)) AND (t_minute#9 >= 30)) AND isnotnull(t_time_sk#7)) + +(12) CometProject +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Arguments: [t_time_sk#7], [t_time_sk#7] + +(13) CometBroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: [t_time_sk#7] + +(14) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Right output [1]: [t_time_sk#7] +Arguments: [ss_sold_time_sk#1], [t_time_sk#7], Inner, BuildRight + +(15) CometProject +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#7] +Arguments: [ss_store_sk#3], [ss_store_sk#3] + +(16) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#10, s_store_name#11] +Arguments: [s_store_sk#10, s_store_name#11] + +(17) CometFilter +Input [2]: [s_store_sk#10, s_store_name#11] +Condition : ((isnotnull(s_store_name#11) AND (s_store_name#11 = ese)) AND isnotnull(s_store_sk#10)) + +(18) CometProject +Input [2]: [s_store_sk#10, s_store_name#11] +Arguments: [s_store_sk#10], [s_store_sk#10] + +(19) CometBroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: [s_store_sk#10] + +(20) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#3] +Right output [1]: [s_store_sk#10] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(21) CometProject +Input [2]: [ss_store_sk#3, s_store_sk#10] + +(22) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(23) CometExchange +Input [1]: [count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [1]: [count#12] +Keys: [] +Functions [1]: [count(1)] + +(25) ColumnarToRow [codegen id : 1] +Input [1]: [count(1)#13] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_datafusion/simplified.txt new file mode 100644 index 0000000000..1996d860c8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_datafusion/simplified.txt @@ -0,0 +1,27 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [count(1),count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometBroadcastExchange [hd_demo_sk] #2 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count] + CometBroadcastExchange [t_time_sk] #3 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d2e63bee25 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_iceberg_compat/explain.txt @@ -0,0 +1,143 @@ +== Physical Plan == +* ColumnarToRow (25) ++- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometBroadcastHashJoin (20) + :- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : +- CometBroadcastExchange (7) + : : +- CometProject (6) + : : +- CometFilter (5) + : : +- CometScan parquet spark_catalog.default.household_demographics (4) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometScan parquet spark_catalog.default.time_dim (10) + +- CometBroadcastExchange (19) + +- CometProject (18) + +- CometFilter (17) + +- CometScan parquet spark_catalog.default.store (16) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(3) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Arguments: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3], [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(4) CometScan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#5, hd_dep_count#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(5) CometFilter +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Condition : ((isnotnull(hd_dep_count#6) AND (hd_dep_count#6 = 7)) AND isnotnull(hd_demo_sk#5)) + +(6) CometProject +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Arguments: [hd_demo_sk#5], [hd_demo_sk#5] + +(7) CometBroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: [hd_demo_sk#5] + +(8) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Right output [1]: [hd_demo_sk#5] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#5], Inner, BuildRight + +(9) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] +Arguments: [ss_sold_time_sk#1, ss_store_sk#3], [ss_sold_time_sk#1, ss_store_sk#3] + +(10) CometScan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(11) CometFilter +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Condition : ((((isnotnull(t_hour#8) AND isnotnull(t_minute#9)) AND (t_hour#8 = 20)) AND (t_minute#9 >= 30)) AND isnotnull(t_time_sk#7)) + +(12) CometProject +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Arguments: [t_time_sk#7], [t_time_sk#7] + +(13) CometBroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: [t_time_sk#7] + +(14) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Right output [1]: [t_time_sk#7] +Arguments: [ss_sold_time_sk#1], [t_time_sk#7], Inner, BuildRight + +(15) CometProject +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#7] +Arguments: [ss_store_sk#3], [ss_store_sk#3] + +(16) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#10, s_store_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(17) CometFilter +Input [2]: [s_store_sk#10, s_store_name#11] +Condition : ((isnotnull(s_store_name#11) AND (s_store_name#11 = ese)) AND isnotnull(s_store_sk#10)) + +(18) CometProject +Input [2]: [s_store_sk#10, s_store_name#11] +Arguments: [s_store_sk#10], [s_store_sk#10] + +(19) CometBroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: [s_store_sk#10] + +(20) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#3] +Right output [1]: [s_store_sk#10] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(21) CometProject +Input [2]: [ss_store_sk#3, s_store_sk#10] + +(22) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(23) CometExchange +Input [1]: [count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [1]: [count#12] +Keys: [] +Functions [1]: [count(1)] + +(25) ColumnarToRow [codegen id : 1] +Input [1]: [count(1)#13] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..d9a87aa3c4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q96.native_iceberg_compat/simplified.txt @@ -0,0 +1,27 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [count(1),count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometBroadcastExchange [hd_demo_sk] #2 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count] + CometBroadcastExchange [t_time_sk] #3 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometScan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_store_name] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_datafusion/explain.txt new file mode 100644 index 0000000000..2c42ae9d7f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_datafusion/explain.txt @@ -0,0 +1,135 @@ +== Physical Plan == +* ColumnarToRow (25) ++- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometSortMergeJoin (20) + :- CometSort (11) + : +- CometHashAggregate (10) + : +- CometExchange (9) + : +- CometHashAggregate (8) + : +- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometProject (4) + : +- CometFilter (3) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (2) + +- CometSort (19) + +- CometHashAggregate (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometProject (15) + +- CometBroadcastHashJoin (14) + :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (12) + +- ReusedExchange (13) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] + +(2) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4, d_month_seq#5] + +(3) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(4) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(5) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(7) CometProject +Input [4]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_item_sk#1, ss_customer_sk#2], [ss_item_sk#1, ss_customer_sk#2] + +(8) CometHashAggregate +Input [2]: [ss_item_sk#1, ss_customer_sk#2] +Keys [2]: [ss_customer_sk#2, ss_item_sk#1] +Functions: [] + +(9) CometExchange +Input [2]: [ss_customer_sk#2, ss_item_sk#1] +Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometHashAggregate +Input [2]: [ss_customer_sk#2, ss_item_sk#1] +Keys [2]: [ss_customer_sk#2, ss_item_sk#1] +Functions: [] + +(11) CometSort +Input [2]: [customer_sk#6, item_sk#7] +Arguments: [customer_sk#6, item_sk#7], [customer_sk#6 ASC NULLS FIRST, item_sk#7 ASC NULLS FIRST] + +(12) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Arguments: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] + +(13) ReusedExchange [Reuses operator id: 5] +Output [1]: [d_date_sk#11] + +(14) CometBroadcastHashJoin +Left output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Right output [1]: [d_date_sk#11] +Arguments: [cs_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(15) CometProject +Input [4]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10, d_date_sk#11] +Arguments: [cs_bill_customer_sk#8, cs_item_sk#9], [cs_bill_customer_sk#8, cs_item_sk#9] + +(16) CometHashAggregate +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Functions: [] + +(17) CometExchange +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Arguments: hashpartitioning(cs_bill_customer_sk#8, cs_item_sk#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometHashAggregate +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Functions: [] + +(19) CometSort +Input [2]: [customer_sk#12, item_sk#13] +Arguments: [customer_sk#12, item_sk#13], [customer_sk#12 ASC NULLS FIRST, item_sk#13 ASC NULLS FIRST] + +(20) CometSortMergeJoin +Left output [2]: [customer_sk#6, item_sk#7] +Right output [2]: [customer_sk#12, item_sk#13] +Arguments: [customer_sk#6, item_sk#7], [customer_sk#12, item_sk#13], FullOuter + +(21) CometProject +Input [4]: [customer_sk#6, item_sk#7, customer_sk#12, item_sk#13] +Arguments: [customer_sk#6, customer_sk#12], [customer_sk#6, customer_sk#12] + +(22) CometHashAggregate +Input [2]: [customer_sk#6, customer_sk#12] +Keys: [] +Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] + +(23) CometExchange +Input [3]: [sum#14, sum#15, sum#16] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(24) CometHashAggregate +Input [3]: [sum#14, sum#15, sum#16] +Keys: [] +Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] + +(25) ColumnarToRow [codegen id : 1] +Input [3]: [store_only#17, catalog_only#18, store_and_catalog#19] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_datafusion/simplified.txt new file mode 100644 index 0000000000..3a2bf7df1d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_datafusion/simplified.txt @@ -0,0 +1,27 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [store_only,catalog_only,store_and_catalog,sum,sum,sum,sum(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END)] + CometExchange #1 + CometHashAggregate [sum,sum,sum,customer_sk,customer_sk] + CometProject [customer_sk,customer_sk] + CometSortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + CometSort [customer_sk,item_sk] + CometHashAggregate [customer_sk,item_sk,ss_customer_sk,ss_item_sk] + CometExchange [ss_customer_sk,ss_item_sk] #2 + CometHashAggregate [ss_customer_sk,ss_item_sk] + CometProject [ss_item_sk,ss_customer_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometSort [customer_sk,item_sk] + CometHashAggregate [customer_sk,item_sk,cs_bill_customer_sk,cs_item_sk] + CometExchange [cs_bill_customer_sk,cs_item_sk] #4 + CometHashAggregate [cs_bill_customer_sk,cs_item_sk] + CometProject [cs_bill_customer_sk,cs_item_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..6b2f37b3fc --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_iceberg_compat/explain.txt @@ -0,0 +1,144 @@ +== Physical Plan == +* ColumnarToRow (25) ++- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometSortMergeJoin (20) + :- CometSort (11) + : +- CometHashAggregate (10) + : +- CometExchange (9) + : +- CometHashAggregate (8) + : +- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometScan parquet spark_catalog.default.store_sales (1) + : +- CometBroadcastExchange (5) + : +- CometProject (4) + : +- CometFilter (3) + : +- CometScan parquet spark_catalog.default.date_dim (2) + +- CometSort (19) + +- CometHashAggregate (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometProject (15) + +- CometBroadcastHashJoin (14) + :- CometScan parquet spark_catalog.default.catalog_sales (12) + +- ReusedExchange (13) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +ReadSchema: struct + +(2) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(3) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(4) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(5) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(7) CometProject +Input [4]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_item_sk#1, ss_customer_sk#2], [ss_item_sk#1, ss_customer_sk#2] + +(8) CometHashAggregate +Input [2]: [ss_item_sk#1, ss_customer_sk#2] +Keys [2]: [ss_customer_sk#2, ss_item_sk#1] +Functions: [] + +(9) CometExchange +Input [2]: [ss_customer_sk#2, ss_item_sk#1] +Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometHashAggregate +Input [2]: [ss_customer_sk#2, ss_item_sk#1] +Keys [2]: [ss_customer_sk#2, ss_item_sk#1] +Functions: [] + +(11) CometSort +Input [2]: [customer_sk#6, item_sk#7] +Arguments: [customer_sk#6, item_sk#7], [customer_sk#6 ASC NULLS FIRST, item_sk#7 ASC NULLS FIRST] + +(12) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#10)] +ReadSchema: struct + +(13) ReusedExchange [Reuses operator id: 5] +Output [1]: [d_date_sk#11] + +(14) CometBroadcastHashJoin +Left output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Right output [1]: [d_date_sk#11] +Arguments: [cs_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(15) CometProject +Input [4]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10, d_date_sk#11] +Arguments: [cs_bill_customer_sk#8, cs_item_sk#9], [cs_bill_customer_sk#8, cs_item_sk#9] + +(16) CometHashAggregate +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Functions: [] + +(17) CometExchange +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Arguments: hashpartitioning(cs_bill_customer_sk#8, cs_item_sk#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometHashAggregate +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Functions: [] + +(19) CometSort +Input [2]: [customer_sk#12, item_sk#13] +Arguments: [customer_sk#12, item_sk#13], [customer_sk#12 ASC NULLS FIRST, item_sk#13 ASC NULLS FIRST] + +(20) CometSortMergeJoin +Left output [2]: [customer_sk#6, item_sk#7] +Right output [2]: [customer_sk#12, item_sk#13] +Arguments: [customer_sk#6, item_sk#7], [customer_sk#12, item_sk#13], FullOuter + +(21) CometProject +Input [4]: [customer_sk#6, item_sk#7, customer_sk#12, item_sk#13] +Arguments: [customer_sk#6, customer_sk#12], [customer_sk#6, customer_sk#12] + +(22) CometHashAggregate +Input [2]: [customer_sk#6, customer_sk#12] +Keys: [] +Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] + +(23) CometExchange +Input [3]: [sum#14, sum#15, sum#16] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(24) CometHashAggregate +Input [3]: [sum#14, sum#15, sum#16] +Keys: [] +Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] + +(25) ColumnarToRow [codegen id : 1] +Input [3]: [store_only#17, catalog_only#18, store_and_catalog#19] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bcb24e7423 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q97.native_iceberg_compat/simplified.txt @@ -0,0 +1,27 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [store_only,catalog_only,store_and_catalog,sum,sum,sum,sum(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END)] + CometExchange #1 + CometHashAggregate [sum,sum,sum,customer_sk,customer_sk] + CometProject [customer_sk,customer_sk] + CometSortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + CometSort [customer_sk,item_sk] + CometHashAggregate [customer_sk,item_sk,ss_customer_sk,ss_item_sk] + CometExchange [ss_customer_sk,ss_item_sk] #2 + CometHashAggregate [ss_customer_sk,ss_item_sk] + CometProject [ss_item_sk,ss_customer_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + CometSort [customer_sk,item_sk] + CometHashAggregate [customer_sk,item_sk,cs_bill_customer_sk,cs_item_sk] + CometExchange [cs_bill_customer_sk,cs_item_sk] #4 + CometHashAggregate [cs_bill_customer_sk,cs_item_sk] + CometProject [cs_bill_customer_sk,cs_item_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_datafusion/explain.txt new file mode 100644 index 0000000000..769ec05b5f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_datafusion/explain.txt @@ -0,0 +1,130 @@ +== Physical Plan == +* ColumnarToRow (25) ++- CometProject (24) + +- CometSort (23) + +- CometColumnarExchange (22) + +- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Arguments: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ss_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [ss_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] + +(20) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5, _we0#15] + +(22) CometColumnarExchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=3] + +(23) CometSort +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5], [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST] + +(24) CometProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + +(25) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f4bffe18d8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_datafusion/simplified.txt @@ -0,0 +1,31 @@ +WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio] + CometSort [i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio,i_item_id] + CometColumnarExchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 + WholeStageCodegen (2) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id] + CometExchange [i_class] #2 + CometHashAggregate [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #4 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d1b5b4582c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_iceberg_compat/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +* ColumnarToRow (25) ++- CometProject (24) + +- CometSort (23) + +- CometColumnarExchange (22) + +- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.store_sales (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometScan parquet spark_catalog.default.item (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometScan parquet spark_catalog.default.date_dim (8) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ss_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [ss_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] + +(20) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, i_item_id#5, _we0#15] + +(22) CometColumnarExchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=3] + +(23) CometSort +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5], [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST] + +(24) CometProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + +(25) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..50ed7d5a61 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q98.native_iceberg_compat/simplified.txt @@ -0,0 +1,31 @@ +WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio] + CometSort [i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio,i_item_id] + CometColumnarExchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 + WholeStageCodegen (2) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id] + CometExchange [i_class] #2 + CometHashAggregate [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #4 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_datafusion/explain.txt new file mode 100644 index 0000000000..8c3bfd0df7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_datafusion/explain.txt @@ -0,0 +1,126 @@ +== Physical Plan == +* ColumnarToRow (24) ++- CometTakeOrderedAndProject (23) + +- CometHashAggregate (22) + +- CometExchange (21) + +- CometHashAggregate (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (10) + : : +- CometBroadcastHashJoin (9) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (3) + : : +- ReusedExchange (8) + : +- ReusedExchange (11) + +- CometBroadcastExchange (17) + +- CometProject (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Arguments: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] + +(2) CometFilter +Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Condition : (((isnotnull(cs_warehouse_sk#4) AND isnotnull(cs_ship_mode_sk#3)) AND isnotnull(cs_call_center_sk#2)) AND isnotnull(cs_ship_date_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(4) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(5) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [cs_warehouse_sk#4], [w_warehouse_sk#6], Inner, BuildRight + +(7) CometProject +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7], [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7] + +(8) ReusedExchange [Reuses operator id: 5] +Output [2]: [sm_ship_mode_sk#8, sm_type#9] + +(9) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7] +Right output [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: [cs_ship_mode_sk#3], [sm_ship_mode_sk#8], Inner, BuildRight + +(10) CometProject +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] +Arguments: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9], [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9] + +(11) ReusedExchange [Reuses operator id: 5] +Output [2]: [cc_call_center_sk#10, cc_name#11] + +(12) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Right output [2]: [cc_call_center_sk#10, cc_name#11] +Arguments: [cs_call_center_sk#2], [cc_call_center_sk#10], Inner, BuildRight + +(13) CometProject +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_call_center_sk#10, cc_name#11] +Arguments: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11], [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11] + +(14) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#12, d_month_seq#13] +Arguments: [d_date_sk#12, d_month_seq#13] + +(15) CometFilter +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) + +(16) CometProject +Input [2]: [d_date_sk#12, d_month_seq#13] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(17) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(18) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11] +Right output [1]: [d_date_sk#12] +Arguments: [cs_ship_date_sk#1], [d_date_sk#12], Inner, BuildRight + +(19) CometProject +Input [6]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11, d_date_sk#12] +Arguments: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14], [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] + +(20) CometHashAggregate +Input [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] +Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(21) CometExchange +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#15, sum#16, sum#17, sum#18, sum#19] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, cc_name#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#15, sum#16, sum#17, sum#18, sum#19] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] +Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(23) CometTakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#20, sm_type#9, cc_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[substr(w_warehouse_name, 1, 20)#20 ASC NULLS FIRST,sm_type#9 ASC NULLS FIRST,cc_name#11 ASC NULLS FIRST], output=[substr(w_warehouse_name, 1, 20)#20,sm_type#9,cc_name#11,30 days #21,31 - 60 days #22,61 - 90 days #23,91 - 120 days #24,>120 days #25]), [substr(w_warehouse_name, 1, 20)#20, sm_type#9, cc_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25], 100, [substr(w_warehouse_name, 1, 20)#20 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, cc_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#20, sm_type#9, cc_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] + +(24) ColumnarToRow [codegen id : 1] +Input [8]: [substr(w_warehouse_name, 1, 20)#20, sm_type#9, cc_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ddd2b3d421 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_datafusion/simplified.txt @@ -0,0 +1,26 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + CometHashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,_groupingexpression,sum,sum,sum,sum,sum,sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END)] + CometExchange [_groupingexpression,sm_type,cc_name] #1 + CometHashAggregate [_groupingexpression,sm_type,cc_name,sum,sum,sum,sum,sum,cs_ship_date_sk,cs_sold_date_sk] + CometProject [w_warehouse_name] [cs_ship_date_sk,cs_sold_date_sk,sm_type,cc_name,_groupingexpression] + CometBroadcastHashJoin [cs_ship_date_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_name,d_date_sk] + CometProject [cs_ship_date_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_call_center_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_call_center_sk,cc_name] + CometProject [cs_ship_date_sk,cs_call_center_sk,cs_sold_date_sk,w_warehouse_name,sm_type] + CometBroadcastHashJoin [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name,sm_ship_mode_sk,sm_type] + CometProject [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk,w_warehouse_sk,w_warehouse_name] + CometFilter [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #2 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + ReusedExchange [sm_ship_mode_sk,sm_type] #2 + ReusedExchange [cc_call_center_sk,cc_name] #2 + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..6dfcf8b322 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_iceberg_compat/explain.txt @@ -0,0 +1,163 @@ +== Physical Plan == +* ColumnarToRow (28) ++- CometTakeOrderedAndProject (27) + +- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometProject (23) + +- CometBroadcastHashJoin (22) + :- CometProject (17) + : +- CometBroadcastHashJoin (16) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.warehouse (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometScan parquet spark_catalog.default.ship_mode (8) + : +- CometBroadcastExchange (15) + : +- CometFilter (14) + : +- CometScan parquet spark_catalog.default.call_center (13) + +- CometBroadcastExchange (21) + +- CometProject (20) + +- CometFilter (19) + +- CometScan parquet spark_catalog.default.date_dim (18) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Condition : (((isnotnull(cs_warehouse_sk#4) AND isnotnull(cs_ship_mode_sk#3)) AND isnotnull(cs_call_center_sk#2)) AND isnotnull(cs_ship_date_sk#1)) + +(3) CometScan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(5) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [cs_warehouse_sk#4], [w_warehouse_sk#6], Inner, BuildRight + +(7) CometProject +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7], [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7] + +(8) CometScan parquet spark_catalog.default.ship_mode +Output [2]: [sm_ship_mode_sk#8, sm_type#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Condition : isnotnull(sm_ship_mode_sk#8) + +(10) CometBroadcastExchange +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: [sm_ship_mode_sk#8, sm_type#9] + +(11) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7] +Right output [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: [cs_ship_mode_sk#3], [sm_ship_mode_sk#8], Inner, BuildRight + +(12) CometProject +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] +Arguments: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9], [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9] + +(13) CometScan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#10, cc_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [cc_call_center_sk#10, cc_name#11] +Condition : isnotnull(cc_call_center_sk#10) + +(15) CometBroadcastExchange +Input [2]: [cc_call_center_sk#10, cc_name#11] +Arguments: [cc_call_center_sk#10, cc_name#11] + +(16) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Right output [2]: [cc_call_center_sk#10, cc_name#11] +Arguments: [cs_call_center_sk#2], [cc_call_center_sk#10], Inner, BuildRight + +(17) CometProject +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_call_center_sk#10, cc_name#11] +Arguments: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11], [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11] + +(18) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_month_seq#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) CometFilter +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) + +(20) CometProject +Input [2]: [d_date_sk#12, d_month_seq#13] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(21) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(22) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11] +Right output [1]: [d_date_sk#12] +Arguments: [cs_ship_date_sk#1], [d_date_sk#12], Inner, BuildRight + +(23) CometProject +Input [6]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11, d_date_sk#12] +Arguments: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14], [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] + +(24) CometHashAggregate +Input [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] +Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(25) CometExchange +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#15, sum#16, sum#17, sum#18, sum#19] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, cc_name#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(26) CometHashAggregate +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#15, sum#16, sum#17, sum#18, sum#19] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] +Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] + +(27) CometTakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#20, sm_type#9, cc_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[substr(w_warehouse_name, 1, 20)#20 ASC NULLS FIRST,sm_type#9 ASC NULLS FIRST,cc_name#11 ASC NULLS FIRST], output=[substr(w_warehouse_name, 1, 20)#20,sm_type#9,cc_name#11,30 days #21,31 - 60 days #22,61 - 90 days #23,91 - 120 days #24,>120 days #25]), [substr(w_warehouse_name, 1, 20)#20, sm_type#9, cc_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25], 100, [substr(w_warehouse_name, 1, 20)#20 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, cc_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#20, sm_type#9, cc_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] + +(28) ColumnarToRow [codegen id : 1] +Input [8]: [substr(w_warehouse_name, 1, 20)#20, sm_type#9, cc_name#11, 30 days #21, 31 - 60 days #22, 61 - 90 days #23, 91 - 120 days #24, >120 days #25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..51599575db --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q99.native_iceberg_compat/simplified.txt @@ -0,0 +1,30 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + CometHashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,_groupingexpression,sum,sum,sum,sum,sum,sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END)] + CometExchange [_groupingexpression,sm_type,cc_name] #1 + CometHashAggregate [_groupingexpression,sm_type,cc_name,sum,sum,sum,sum,sum,cs_ship_date_sk,cs_sold_date_sk] + CometProject [w_warehouse_name] [cs_ship_date_sk,cs_sold_date_sk,sm_type,cc_name,_groupingexpression] + CometBroadcastHashJoin [cs_ship_date_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_name,d_date_sk] + CometProject [cs_ship_date_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_call_center_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_call_center_sk,cc_name] + CometProject [cs_ship_date_sk,cs_call_center_sk,cs_sold_date_sk,w_warehouse_name,sm_type] + CometBroadcastHashJoin [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name,sm_ship_mode_sk,sm_type] + CometProject [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk,w_warehouse_sk,w_warehouse_name] + CometFilter [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #2 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [sm_ship_mode_sk,sm_type] #3 + CometFilter [sm_ship_mode_sk,sm_type] + CometScan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_type] + CometBroadcastExchange [cc_call_center_sk,cc_name] #4 + CometFilter [cc_call_center_sk,cc_name] + CometScan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q1.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q1.native_datafusion/explain.txt new file mode 100644 index 0000000000..a1a0e06f4f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q1.native_datafusion/explain.txt @@ -0,0 +1,239 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Filter (13) + : : : +- * HashAggregate (12) + : : : +- Exchange (11) + : : : +- * HashAggregate (10) + : : : +- * ColumnarToRow (9) + : : : +- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- BroadcastExchange (27) + : : +- * Filter (26) + : : +- * HashAggregate (25) + : : +- Exchange (24) + : : +- * HashAggregate (23) + : : +- * HashAggregate (22) + : : +- Exchange (21) + : : +- * HashAggregate (20) + : : +- * ColumnarToRow (19) + : : +- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometFilter (15) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (14) + : : +- ReusedExchange (16) + : +- BroadcastExchange (34) + : +- * ColumnarToRow (33) + : +- CometProject (32) + : +- CometFilter (31) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (30) + +- BroadcastExchange (40) + +- * ColumnarToRow (39) + +- CometFilter (38) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (37) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Arguments: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] + +(2) CometFilter +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Condition : (isnotnull(sr_store_sk#2) AND isnotnull(sr_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5, d_year#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [sr_returned_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4, d_date_sk#5] +Arguments: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3], [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] + +(9) ColumnarToRow [codegen id : 1] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] + +(10) HashAggregate [codegen id : 1] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#3))] +Aggregate Attributes [1]: [sum#7] +Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] + +(11) Exchange +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(12) HashAggregate [codegen id : 7] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#9] +Results [3]: [sr_customer_sk#1 AS ctr_customer_sk#10, sr_store_sk#2 AS ctr_store_sk#11, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#9,17,2) AS ctr_total_return#12] + +(13) Filter [codegen id : 7] +Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12] +Condition : isnotnull(ctr_total_return#12) + +(14) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [4]: [sr_customer_sk#13, sr_store_sk#14, sr_return_amt#15, sr_returned_date_sk#16] +Arguments: [sr_customer_sk#13, sr_store_sk#14, sr_return_amt#15, sr_returned_date_sk#16] + +(15) CometFilter +Input [4]: [sr_customer_sk#13, sr_store_sk#14, sr_return_amt#15, sr_returned_date_sk#16] +Condition : isnotnull(sr_store_sk#14) + +(16) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#17] + +(17) CometBroadcastHashJoin +Left output [4]: [sr_customer_sk#13, sr_store_sk#14, sr_return_amt#15, sr_returned_date_sk#16] +Right output [1]: [d_date_sk#17] +Arguments: [sr_returned_date_sk#16], [d_date_sk#17], Inner, BuildRight + +(18) CometProject +Input [5]: [sr_customer_sk#13, sr_store_sk#14, sr_return_amt#15, sr_returned_date_sk#16, d_date_sk#17] +Arguments: [sr_customer_sk#13, sr_store_sk#14, sr_return_amt#15], [sr_customer_sk#13, sr_store_sk#14, sr_return_amt#15] + +(19) ColumnarToRow [codegen id : 2] +Input [3]: [sr_customer_sk#13, sr_store_sk#14, sr_return_amt#15] + +(20) HashAggregate [codegen id : 2] +Input [3]: [sr_customer_sk#13, sr_store_sk#14, sr_return_amt#15] +Keys [2]: [sr_customer_sk#13, sr_store_sk#14] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#15))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [sr_customer_sk#13, sr_store_sk#14, sum#19] + +(21) Exchange +Input [3]: [sr_customer_sk#13, sr_store_sk#14, sum#19] +Arguments: hashpartitioning(sr_customer_sk#13, sr_store_sk#14, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(22) HashAggregate [codegen id : 3] +Input [3]: [sr_customer_sk#13, sr_store_sk#14, sum#19] +Keys [2]: [sr_customer_sk#13, sr_store_sk#14] +Functions [1]: [sum(UnscaledValue(sr_return_amt#15))] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#15))#9] +Results [2]: [sr_store_sk#14 AS ctr_store_sk#20, MakeDecimal(sum(UnscaledValue(sr_return_amt#15))#9,17,2) AS ctr_total_return#21] + +(23) HashAggregate [codegen id : 3] +Input [2]: [ctr_store_sk#20, ctr_total_return#21] +Keys [1]: [ctr_store_sk#20] +Functions [1]: [partial_avg(ctr_total_return#21)] +Aggregate Attributes [2]: [sum#22, count#23] +Results [3]: [ctr_store_sk#20, sum#24, count#25] + +(24) Exchange +Input [3]: [ctr_store_sk#20, sum#24, count#25] +Arguments: hashpartitioning(ctr_store_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(25) HashAggregate [codegen id : 4] +Input [3]: [ctr_store_sk#20, sum#24, count#25] +Keys [1]: [ctr_store_sk#20] +Functions [1]: [avg(ctr_total_return#21)] +Aggregate Attributes [1]: [avg(ctr_total_return#21)#26] +Results [2]: [(avg(ctr_total_return#21)#26 * 1.2) AS (avg(ctr_total_return) * 1.2)#27, ctr_store_sk#20] + +(26) Filter [codegen id : 4] +Input [2]: [(avg(ctr_total_return) * 1.2)#27, ctr_store_sk#20] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#27) + +(27) BroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#27, ctr_store_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ctr_store_sk#11] +Right keys [1]: [ctr_store_sk#20] +Join type: Inner +Join condition: (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#27) + +(29) Project [codegen id : 7] +Output [2]: [ctr_customer_sk#10, ctr_store_sk#11] +Input [5]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#27, ctr_store_sk#20] + +(30) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#28, s_state#29] +Arguments: [s_store_sk#28, s_state#29] + +(31) CometFilter +Input [2]: [s_store_sk#28, s_state#29] +Condition : ((isnotnull(s_state#29) AND (s_state#29 = TN)) AND isnotnull(s_store_sk#28)) + +(32) CometProject +Input [2]: [s_store_sk#28, s_state#29] +Arguments: [s_store_sk#28], [s_store_sk#28] + +(33) ColumnarToRow [codegen id : 5] +Input [1]: [s_store_sk#28] + +(34) BroadcastExchange +Input [1]: [s_store_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ctr_store_sk#11] +Right keys [1]: [s_store_sk#28] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 7] +Output [1]: [ctr_customer_sk#10] +Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, s_store_sk#28] + +(37) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#30, c_customer_id#31] +Arguments: [c_customer_sk#30, c_customer_id#31] + +(38) CometFilter +Input [2]: [c_customer_sk#30, c_customer_id#31] +Condition : isnotnull(c_customer_sk#30) + +(39) ColumnarToRow [codegen id : 6] +Input [2]: [c_customer_sk#30, c_customer_id#31] + +(40) BroadcastExchange +Input [2]: [c_customer_sk#30, c_customer_id#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ctr_customer_sk#10] +Right keys [1]: [c_customer_sk#30] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 7] +Output [1]: [c_customer_id#31] +Input [3]: [ctr_customer_sk#10, c_customer_sk#30, c_customer_id#31] + +(43) TakeOrderedAndProject +Input [1]: [c_customer_id#31] +Arguments: 100, [c_customer_id#31 ASC NULLS FIRST], [c_customer_id#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q1.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q1.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2e40cb58cc --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q1.native_datafusion/simplified.txt @@ -0,0 +1,60 @@ +TakeOrderedAndProject [c_customer_id] + WholeStageCodegen (7) + Project [c_customer_id] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk] + BroadcastHashJoin [ctr_store_sk,s_store_sk] + Project [ctr_customer_sk,ctr_store_sk] + BroadcastHashJoin [ctr_store_sk,ctr_store_sk,ctr_total_return,(avg(ctr_total_return) * 1.2)] + Filter [ctr_total_return] + HashAggregate [sr_customer_sk,sr_store_sk,sum] [sum(UnscaledValue(sr_return_amt)),ctr_customer_sk,ctr_store_sk,ctr_total_return,sum] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #1 + WholeStageCodegen (1) + HashAggregate [sr_customer_sk,sr_store_sk,sr_return_amt] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [sr_customer_sk,sr_store_sk,sr_return_amt] + CometBroadcastHashJoin [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk,d_date_sk] + CometFilter [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Filter [(avg(ctr_total_return) * 1.2)] + HashAggregate [ctr_store_sk,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),sum,count] + InputAdapter + Exchange [ctr_store_sk] #4 + WholeStageCodegen (3) + HashAggregate [ctr_store_sk,ctr_total_return] [sum,count,sum,count] + HashAggregate [sr_customer_sk,sr_store_sk,sum] [sum(UnscaledValue(sr_return_amt)),ctr_store_sk,ctr_total_return,sum] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #5 + WholeStageCodegen (2) + HashAggregate [sr_customer_sk,sr_store_sk,sr_return_amt] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [sr_customer_sk,sr_store_sk,sr_return_amt] + CometBroadcastHashJoin [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk,d_date_sk] + CometFilter [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + ReusedExchange [d_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometProject [s_store_sk] + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + ColumnarToRow + InputAdapter + CometFilter [c_customer_sk,c_customer_id] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q10.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q10.native_datafusion/explain.txt new file mode 100644 index 0000000000..7c92771145 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q10.native_datafusion/explain.txt @@ -0,0 +1,214 @@ +== Physical Plan == +TakeOrderedAndProject (40) ++- * HashAggregate (39) + +- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * BroadcastHashJoin Inner BuildRight (35) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildRight (29) + : :- * Project (23) + : : +- * Filter (22) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (21) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (13) + : : : +- ReusedExchange (14) + : : +- ReusedExchange (20) + : +- BroadcastExchange (28) + : +- * ColumnarToRow (27) + : +- CometProject (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (24) + +- BroadcastExchange (34) + +- * ColumnarToRow (33) + +- CometFilter (32) + +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (31) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(2) CometFilter +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#6, ss_sold_date_sk#7] + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(5) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : (((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2002)) AND (d_moy#10 >= 1)) AND (d_moy#10 <= 4)) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ss_customer_sk#6], [ss_customer_sk#6] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Right output [1]: [ss_customer_sk#6] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(13) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Arguments: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#13] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Right output [1]: [d_date_sk#13] +Arguments: [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Arguments: [ws_bill_customer_sk#11], [ws_bill_customer_sk#11] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#11] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(20) ReusedExchange [Reuses operator id: 18] +Output [1]: [cs_ship_customer_sk#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(22) Filter [codegen id : 5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(23) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#15, ca_county#16] +Arguments: [ca_address_sk#15, ca_county#16] + +(25) CometFilter +Input [2]: [ca_address_sk#15, ca_county#16] +Condition : (ca_county#16 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#15)) + +(26) CometProject +Input [2]: [ca_address_sk#15, ca_county#16] +Arguments: [ca_address_sk#15], [ca_address_sk#15] + +(27) ColumnarToRow [codegen id : 3] +Input [1]: [ca_address_sk#15] + +(28) BroadcastExchange +Input [1]: [ca_address_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#15] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [1]: [c_current_cdemo_sk#4] +Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#15] + +(31) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [9]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Arguments: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] + +(32) CometFilter +Input [9]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Condition : isnotnull(cd_demo_sk#17) + +(33) ColumnarToRow [codegen id : 4] +Input [9]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] + +(34) BroadcastExchange +Input [9]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(35) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 5] +Output [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] + +(37) HashAggregate [codegen id : 5] +Input [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Keys [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#26] +Results [9]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25, count#27] + +(38) Exchange +Input [9]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25, count#27] +Arguments: hashpartitioning(cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(39) HashAggregate [codegen id : 6] +Input [9]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25, count#27] +Keys [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, cd_dep_count#23, cd_dep_employed_count#24, cd_dep_college_count#25] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#28] +Results [14]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, count(1)#28 AS cnt1#29, cd_purchase_estimate#21, count(1)#28 AS cnt2#30, cd_credit_rating#22, count(1)#28 AS cnt3#31, cd_dep_count#23, count(1)#28 AS cnt4#32, cd_dep_employed_count#24, count(1)#28 AS cnt5#33, cd_dep_college_count#25, count(1)#28 AS cnt6#34] + +(40) TakeOrderedAndProject +Input [14]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cnt1#29, cd_purchase_estimate#21, cnt2#30, cd_credit_rating#22, cnt3#31, cd_dep_count#23, cnt4#32, cd_dep_employed_count#24, cnt5#33, cd_dep_college_count#25, cnt6#34] +Arguments: 100, [cd_gender#18 ASC NULLS FIRST, cd_marital_status#19 ASC NULLS FIRST, cd_education_status#20 ASC NULLS FIRST, cd_purchase_estimate#21 ASC NULLS FIRST, cd_credit_rating#22 ASC NULLS FIRST, cd_dep_count#23 ASC NULLS FIRST, cd_dep_employed_count#24 ASC NULLS FIRST, cd_dep_college_count#25 ASC NULLS FIRST], [cd_gender#18, cd_marital_status#19, cd_education_status#20, cnt1#29, cd_purchase_estimate#21, cnt2#30, cd_credit_rating#22, cnt3#31, cd_dep_count#23, cnt4#32, cd_dep_employed_count#24, cnt5#33, cd_dep_college_count#25, cnt6#34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q10.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q10.native_datafusion/simplified.txt new file mode 100644 index 0000000000..228afd2bac --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q10.native_datafusion/simplified.txt @@ -0,0 +1,54 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (6) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count] [count(1),cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (5) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [cs_ship_customer_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_county] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q11.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q11.native_datafusion/explain.txt new file mode 100644 index 0000000000..72f8bb38d1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q11.native_datafusion/explain.txt @@ -0,0 +1,362 @@ +== Physical Plan == +TakeOrderedAndProject (66) ++- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (50) + : +- * BroadcastHashJoin Inner BuildRight (49) + : :- * Project (34) + : : +- * BroadcastHashJoin Inner BuildRight (33) + : : :- * Filter (17) + : : : +- * HashAggregate (16) + : : : +- Exchange (15) + : : : +- * HashAggregate (14) + : : : +- * ColumnarToRow (13) + : : : +- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- BroadcastExchange (32) + : : +- * HashAggregate (31) + : : +- Exchange (30) + : : +- * HashAggregate (29) + : : +- * ColumnarToRow (28) + : : +- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometFilter (19) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (18) + : : : +- ReusedExchange (20) + : : +- CometBroadcastExchange (25) + : : +- CometFilter (24) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (23) + : +- BroadcastExchange (48) + : +- * Filter (47) + : +- * HashAggregate (46) + : +- Exchange (45) + : +- * HashAggregate (44) + : +- * ColumnarToRow (43) + : +- CometProject (42) + : +- CometBroadcastHashJoin (41) + : :- CometProject (39) + : : +- CometBroadcastHashJoin (38) + : : :- CometFilter (36) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (35) + : : +- ReusedExchange (37) + : +- ReusedExchange (40) + +- BroadcastExchange (63) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * ColumnarToRow (59) + +- CometProject (58) + +- CometBroadcastHashJoin (57) + :- CometProject (55) + : +- CometBroadcastHashJoin (54) + : :- CometFilter (52) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (51) + : +- ReusedExchange (53) + +- ReusedExchange (56) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Arguments: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(2) CometFilter +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(4) CometFilter +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_customer_sk#9) + +(5) CometBroadcastExchange +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(6) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Right output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [c_customer_sk#1], [ss_customer_sk#9], Inner, BuildRight + +(7) CometProject +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(9) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(11) CometBroadcastHashJoin +Left output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Right output [2]: [d_date_sk#13, d_year#14] +Arguments: [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] + +(13) ColumnarToRow [codegen id : 1] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] + +(14) HashAggregate [codegen id : 1] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] +Aggregate Attributes [1]: [sum#15] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] + +(15) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(16) HashAggregate [codegen id : 8] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#17] +Results [2]: [c_customer_id#2 AS customer_id#18, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#17,18,2) AS year_total#19] + +(17) Filter [codegen id : 8] +Input [2]: [customer_id#18, year_total#19] +Condition : (isnotnull(year_total#19) AND (year_total#19 > 0.00)) + +(18) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Arguments: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] + +(19) CometFilter +Input [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Condition : (isnotnull(c_customer_sk#20) AND isnotnull(c_customer_id#21)) + +(20) ReusedExchange [Reuses operator id: 5] +Output [4]: [ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] + +(21) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Right output [4]: [ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] +Arguments: [c_customer_sk#20], [ss_customer_sk#28], Inner, BuildRight + +(22) CometProject +Input [12]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] +Arguments: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31], [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] + +(23) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#32, d_year#33] +Arguments: [d_date_sk#32, d_year#33] + +(24) CometFilter +Input [2]: [d_date_sk#32, d_year#33] +Condition : ((isnotnull(d_year#33) AND (d_year#33 = 2002)) AND isnotnull(d_date_sk#32)) + +(25) CometBroadcastExchange +Input [2]: [d_date_sk#32, d_year#33] +Arguments: [d_date_sk#32, d_year#33] + +(26) CometBroadcastHashJoin +Left output [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] +Right output [2]: [d_date_sk#32, d_year#33] +Arguments: [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + +(27) CometProject +Input [12]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31, d_date_sk#32, d_year#33] +Arguments: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33], [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] + +(28) ColumnarToRow [codegen id : 2] +Input [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] + +(29) HashAggregate [codegen id : 2] +Input [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] +Keys [8]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))] +Aggregate Attributes [1]: [sum#34] +Results [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] + +(30) Exchange +Input [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] +Arguments: hashpartitioning(c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(31) HashAggregate [codegen id : 3] +Input [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] +Keys [8]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))#17] +Results [3]: [c_customer_id#21 AS customer_id#36, c_preferred_cust_flag#24 AS customer_preferred_cust_flag#37, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))#17,18,2) AS year_total#38] + +(32) BroadcastExchange +Input [3]: [customer_id#36, customer_preferred_cust_flag#37, year_total#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#36] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 8] +Output [4]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38] +Input [5]: [customer_id#18, year_total#19, customer_id#36, customer_preferred_cust_flag#37, year_total#38] + +(35) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] +Arguments: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] + +(36) CometFilter +Input [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] +Condition : (isnotnull(c_customer_sk#39) AND isnotnull(c_customer_id#40)) + +(37) ReusedExchange [Reuses operator id: 5] +Output [4]: [ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] + +(38) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] +Right output [4]: [ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Arguments: [c_customer_sk#39], [ws_bill_customer_sk#47], Inner, BuildRight + +(39) CometProject +Input [12]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Arguments: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50], [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] + +(40) ReusedExchange [Reuses operator id: 10] +Output [2]: [d_date_sk#51, d_year#52] + +(41) CometBroadcastHashJoin +Left output [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Right output [2]: [d_date_sk#51, d_year#52] +Arguments: [ws_sold_date_sk#50], [d_date_sk#51], Inner, BuildRight + +(42) CometProject +Input [12]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50, d_date_sk#51, d_year#52] +Arguments: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, d_year#52], [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, d_year#52] + +(43) ColumnarToRow [codegen id : 4] +Input [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, d_year#52] + +(44) HashAggregate [codegen id : 4] +Input [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, d_year#52] +Keys [8]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))] +Aggregate Attributes [1]: [sum#53] +Results [9]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, sum#54] + +(45) Exchange +Input [9]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, sum#54] +Arguments: hashpartitioning(c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(46) HashAggregate [codegen id : 5] +Input [9]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, sum#54] +Keys [8]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))#55] +Results [2]: [c_customer_id#40 AS customer_id#56, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))#55,18,2) AS year_total#57] + +(47) Filter [codegen id : 5] +Input [2]: [customer_id#56, year_total#57] +Condition : (isnotnull(year_total#57) AND (year_total#57 > 0.00)) + +(48) BroadcastExchange +Input [2]: [customer_id#56, year_total#57] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5] + +(49) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#56] +Join type: Inner +Join condition: None + +(50) Project [codegen id : 8] +Output [5]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38, year_total#57] +Input [6]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38, customer_id#56, year_total#57] + +(51) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65] +Arguments: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65] + +(52) CometFilter +Input [8]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65] +Condition : (isnotnull(c_customer_sk#58) AND isnotnull(c_customer_id#59)) + +(53) ReusedExchange [Reuses operator id: 5] +Output [4]: [ws_bill_customer_sk#66, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] + +(54) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65] +Right output [4]: [ws_bill_customer_sk#66, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] +Arguments: [c_customer_sk#58], [ws_bill_customer_sk#66], Inner, BuildRight + +(55) CometProject +Input [12]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_bill_customer_sk#66, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] +Arguments: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69], [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] + +(56) ReusedExchange [Reuses operator id: 25] +Output [2]: [d_date_sk#70, d_year#71] + +(57) CometBroadcastHashJoin +Left output [10]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] +Right output [2]: [d_date_sk#70, d_year#71] +Arguments: [ws_sold_date_sk#69], [d_date_sk#70], Inner, BuildRight + +(58) CometProject +Input [12]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69, d_date_sk#70, d_year#71] +Arguments: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, d_year#71], [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, d_year#71] + +(59) ColumnarToRow [codegen id : 6] +Input [10]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, d_year#71] + +(60) HashAggregate [codegen id : 6] +Input [10]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, d_year#71] +Keys [8]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))] +Aggregate Attributes [1]: [sum#72] +Results [9]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, sum#73] + +(61) Exchange +Input [9]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, sum#73] +Arguments: hashpartitioning(c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(62) HashAggregate [codegen id : 7] +Input [9]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, sum#73] +Keys [8]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))#55] +Results [2]: [c_customer_id#59 AS customer_id#74, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))#55,18,2) AS year_total#75] + +(63) BroadcastExchange +Input [2]: [customer_id#74, year_total#75] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=7] + +(64) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#74] +Join type: Inner +Join condition: (CASE WHEN (year_total#57 > 0.00) THEN (year_total#75 / year_total#57) END > CASE WHEN (year_total#19 > 0.00) THEN (year_total#38 / year_total#19) END) + +(65) Project [codegen id : 8] +Output [1]: [customer_preferred_cust_flag#37] +Input [7]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38, year_total#57, customer_id#74, year_total#75] + +(66) TakeOrderedAndProject +Input [1]: [customer_preferred_cust_flag#37] +Arguments: 100, [customer_preferred_cust_flag#37 ASC NULLS FIRST], [customer_preferred_cust_flag#37] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q11.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q11.native_datafusion/simplified.txt new file mode 100644 index 0000000000..64b628845f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q11.native_datafusion/simplified.txt @@ -0,0 +1,85 @@ +TakeOrderedAndProject [customer_preferred_cust_flag] + WholeStageCodegen (8) + Project [customer_preferred_cust_flag] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_preferred_cust_flag,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + Project [customer_id,year_total,customer_preferred_cust_flag,year_total] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1 + WholeStageCodegen (1) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,customer_preferred_cust_flag,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5 + WholeStageCodegen (2) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + WholeStageCodegen (4) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #2 + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #10 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #2 + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q12.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q12.native_datafusion/explain.txt new file mode 100644 index 0000000000..1f55fd0951 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q12.native_datafusion/explain.txt @@ -0,0 +1,120 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * Project (21) + +- Window (20) + +- * Sort (19) + +- Exchange (18) + +- * HashAggregate (17) + +- Exchange (16) + +- * HashAggregate (15) + +- * ColumnarToRow (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ws_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [ws_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) ColumnarToRow [codegen id : 1] +Input [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(15) HashAggregate [codegen id : 1] +Input [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(16) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(17) HashAggregate [codegen id : 2] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14] +Results [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#5] + +(18) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(19) Sort [codegen id : 3] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(20) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(21) Project [codegen id : 4] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5, _we0#17] + +(22) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q12.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q12.native_datafusion/simplified.txt new file mode 100644 index 0000000000..dc9d0df5f1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q12.native_datafusion/simplified.txt @@ -0,0 +1,30 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (4) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (3) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (2) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (1) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ws_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #3 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q13.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q13.native_datafusion/explain.txt new file mode 100644 index 0000000000..07b08a045c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q13.native_datafusion/explain.txt @@ -0,0 +1,178 @@ +== Physical Plan == +* HashAggregate (33) ++- Exchange (32) + +- * HashAggregate (31) + +- * ColumnarToRow (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometProject (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (8) + : : +- CometBroadcastExchange (17) + : : +- CometProject (16) + : : +- CometFilter (15) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (14) + : +- CometBroadcastExchange (22) + : +- CometFilter (21) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (20) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] + +(2) CometFilter +Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_hdemo_sk#2)) AND ((((ss_net_profit#9 >= 100.00) AND (ss_net_profit#9 <= 200.00)) OR ((ss_net_profit#9 >= 150.00) AND (ss_net_profit#9 <= 300.00))) OR ((ss_net_profit#9 >= 50.00) AND (ss_net_profit#9 <= 250.00)))) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00)))) + +(3) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(4) CometFilter +Input [1]: [s_store_sk#11] +Condition : isnotnull(s_store_sk#11) + +(5) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(6) CometBroadcastHashJoin +Left output [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#4], [s_store_sk#11], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, s_store_sk#11] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10], [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] + +(8) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Arguments: [ca_address_sk#12, ca_state#13, ca_country#14] + +(9) CometFilter +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Condition : (((isnotnull(ca_country#14) AND (ca_country#14 = United States)) AND isnotnull(ca_address_sk#12)) AND ((ca_state#13 IN (TX,OH) OR ca_state#13 IN (OR,NM,KY)) OR ca_state#13 IN (VA,TX,MS))) + +(10) CometProject +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Arguments: [ca_address_sk#12, ca_state#13], [ca_address_sk#12, ca_state#13] + +(11) CometBroadcastExchange +Input [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ca_address_sk#12, ca_state#13] + +(12) CometBroadcastHashJoin +Left output [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Right output [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ss_addr_sk#3], [ca_address_sk#12], Inner, ((((ca_state#13 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#13 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#13 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00))), BuildRight + +(13) CometProject +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, ca_address_sk#12, ca_state#13] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10], [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10] + +(14) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15, d_year#16] + +(15) CometFilter +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(16) CometProject +Input [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15], [d_date_sk#15] + +(17) CometBroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: [d_date_sk#15] + +(18) CometBroadcastHashJoin +Left output [7]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10] +Right output [1]: [d_date_sk#15] +Arguments: [ss_sold_date_sk#10], [d_date_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10, d_date_sk#15] +Arguments: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8], [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] + +(20) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] + +(21) CometFilter +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Condition : (isnotnull(cd_demo_sk#17) AND ((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) OR ((cd_marital_status#18 = S) AND (cd_education_status#19 = College ))) OR ((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )))) + +(22) CometBroadcastExchange +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] + +(23) CometBroadcastHashJoin +Left output [6]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Right output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [ss_cdemo_sk#1], [cd_demo_sk#17], Inner, ((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))), BuildRight + +(24) CometProject +Input [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19], [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19] + +(25) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: [hd_demo_sk#20, hd_dep_count#21] + +(26) CometFilter +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Condition : (isnotnull(hd_demo_sk#20) AND ((hd_dep_count#21 = 3) OR (hd_dep_count#21 = 1))) + +(27) CometBroadcastExchange +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: [hd_demo_sk#20, hd_dep_count#21] + +(28) CometBroadcastHashJoin +Left output [7]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19] +Right output [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#20], Inner, (((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#21 = 3)) OR (((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#21 = 1))) OR (((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#21 = 1))), BuildRight + +(29) CometProject +Input [9]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19, hd_demo_sk#20, hd_dep_count#21] +Arguments: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8], [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] + +(30) ColumnarToRow [codegen id : 1] +Input [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] + +(31) HashAggregate [codegen id : 1] +Input [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Keys: [] +Functions [4]: [partial_avg(ss_quantity#5), partial_avg(UnscaledValue(ss_ext_sales_price#7)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#8)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#8))] +Aggregate Attributes [7]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28] +Results [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] + +(32) Exchange +Input [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=1] + +(33) HashAggregate [codegen id : 2] +Input [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] +Keys: [] +Functions [4]: [avg(ss_quantity#5), avg(UnscaledValue(ss_ext_sales_price#7)), avg(UnscaledValue(ss_ext_wholesale_cost#8)), sum(UnscaledValue(ss_ext_wholesale_cost#8))] +Aggregate Attributes [4]: [avg(ss_quantity#5)#36, avg(UnscaledValue(ss_ext_sales_price#7))#37, avg(UnscaledValue(ss_ext_wholesale_cost#8))#38, sum(UnscaledValue(ss_ext_wholesale_cost#8))#39] +Results [4]: [avg(ss_quantity#5)#36 AS avg(ss_quantity)#40, cast((avg(UnscaledValue(ss_ext_sales_price#7))#37 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#41, cast((avg(UnscaledValue(ss_ext_wholesale_cost#8))#38 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#42, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#8))#39,17,2) AS sum(ss_ext_wholesale_cost)#43] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q13.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q13.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9905fa96b4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q13.native_datafusion/simplified.txt @@ -0,0 +1,37 @@ +WholeStageCodegen (2) + HashAggregate [sum,count,sum,count,sum,count,sum] [avg(ss_quantity),avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),sum(UnscaledValue(ss_ext_wholesale_cost)),avg(ss_quantity),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),sum(ss_ext_wholesale_cost),sum,count,sum,count,sum,count,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] [sum,count,sum,count,sum,count,sum,sum,count,sum,count,sum,count,sum] + ColumnarToRow + InputAdapter + CometProject [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + CometBroadcastHashJoin [ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_marital_status,cd_education_status,hd_demo_sk,hd_dep_count] + CometProject [ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk,d_date_sk] + CometProject [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk,ca_address_sk,ca_state] + CometProject [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk,s_store_sk] + CometFilter [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk] #2 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] + CometBroadcastExchange [ca_address_sk,ca_state] #3 + CometProject [ca_address_sk,ca_state] + CometFilter [ca_address_sk,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #5 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [hd_demo_sk,hd_dep_count] #6 + CometFilter [hd_demo_sk,hd_dep_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q14a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q14a.native_datafusion/explain.txt new file mode 100644 index 0000000000..61b469888b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q14a.native_datafusion/explain.txt @@ -0,0 +1,487 @@ +== Physical Plan == +TakeOrderedAndProject (72) ++- * HashAggregate (71) + +- Exchange (70) + +- * HashAggregate (69) + +- * Expand (68) + +- Union (67) + :- * Project (58) + : +- * Filter (57) + : +- * HashAggregate (56) + : +- Exchange (55) + : +- * HashAggregate (54) + : +- * ColumnarToRow (53) + : +- CometProject (52) + : +- CometBroadcastHashJoin (51) + : :- CometProject (46) + : : +- CometBroadcastHashJoin (45) + : : :- CometBroadcastHashJoin (39) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (38) + : : : +- CometProject (37) + : : : +- CometBroadcastHashJoin (36) + : : : :- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : : +- CometBroadcastExchange (35) + : : : +- CometBroadcastHashJoin (34) + : : : :- CometHashAggregate (32) + : : : : +- CometExchange (31) + : : : : +- CometHashAggregate (30) + : : : : +- CometProject (29) + : : : : +- CometBroadcastHashJoin (28) + : : : : :- CometProject (26) + : : : : : +- CometBroadcastHashJoin (25) + : : : : : :- CometFilter (6) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (5) + : : : : : +- CometBroadcastExchange (24) + : : : : : +- CometBroadcastHashJoin (23) + : : : : : :- CometFilter (8) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (7) + : : : : : +- CometBroadcastExchange (22) + : : : : : +- CometProject (21) + : : : : : +- CometBroadcastHashJoin (20) + : : : : : :- CometProject (15) + : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : :- CometFilter (10) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (9) + : : : : : : +- CometBroadcastExchange (13) + : : : : : : +- CometFilter (12) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (11) + : : : : : +- CometBroadcastExchange (19) + : : : : : +- CometProject (18) + : : : : : +- CometFilter (17) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (16) + : : : : +- ReusedExchange (27) + : : : +- ReusedExchange (33) + : : +- CometBroadcastExchange (44) + : : +- CometBroadcastHashJoin (43) + : : :- CometFilter (41) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (40) + : : +- ReusedExchange (42) + : +- CometBroadcastExchange (50) + : +- CometProject (49) + : +- CometFilter (48) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (47) + :- * Project (62) + : +- * Filter (61) + : +- * HashAggregate (60) + : +- ReusedExchange (59) + +- * Project (66) + +- * Filter (65) + +- * HashAggregate (64) + +- ReusedExchange (63) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(4) CometFilter +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(5) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Arguments: [ss_item_sk#9, ss_sold_date_sk#10] + +(6) CometFilter +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(7) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(8) CometFilter +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(9) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Arguments: [cs_item_sk#15, cs_sold_date_sk#16] + +(10) CometFilter +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(11) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(12) CometFilter +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(13) CometBroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(14) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Right output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_item_sk#15], [i_item_sk#17], Inner, BuildRight + +(15) CometProject +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20], [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] + +(16) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21, d_year#22] + +(17) CometFilter +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(18) CometProject +Input [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(19) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(20) CometBroadcastHashJoin +Left output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Right output [1]: [d_date_sk#21] +Arguments: [cs_sold_date_sk#16], [d_date_sk#21], Inner, BuildRight + +(21) CometProject +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20], [i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) CometBroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20] + +(23) CometBroadcastHashJoin +Left output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)], [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)], LeftSemi, BuildRight + +(24) CometBroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Right output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_item_sk#9], [i_item_sk#11], Inner, BuildRight + +(26) CometProject +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14], [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] + +(27) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#23] + +(28) CometBroadcastHashJoin +Left output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [1]: [d_date_sk#23] +Arguments: [ss_sold_date_sk#10], [d_date_sk#23], Inner, BuildRight + +(29) CometProject +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] +Arguments: [brand_id#24, class_id#25, category_id#26], [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] + +(30) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(31) CometExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(33) ReusedExchange [Reuses operator id: 22] +Output [3]: [i_brand_id#27, i_class_id#28, i_category_id#29] + +(34) CometBroadcastHashJoin +Left output [3]: [brand_id#24, class_id#25, category_id#26] +Right output [3]: [i_brand_id#27, i_class_id#28, i_category_id#29] +Arguments: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)], [coalesce(i_brand_id#27, 0), isnull(i_brand_id#27), coalesce(i_class_id#28, 0), isnull(i_class_id#28), coalesce(i_category_id#29, 0), isnull(i_category_id#29)], LeftSemi, BuildRight + +(35) CometBroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [brand_id#24, class_id#25, category_id#26] + +(36) CometBroadcastHashJoin +Left output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Right output [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [i_brand_id#6, i_class_id#7, i_category_id#8], [brand_id#24, class_id#25, category_id#26], Inner, BuildRight + +(37) CometProject +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] +Arguments: [ss_item_sk#30], [i_item_sk#5 AS ss_item_sk#30] + +(38) CometBroadcastExchange +Input [1]: [ss_item_sk#30] +Arguments: [ss_item_sk#30] + +(39) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [1]: [ss_item_sk#30] +Arguments: [ss_item_sk#1], [ss_item_sk#30], LeftSemi, BuildRight + +(40) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] + +(41) CometFilter +Input [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Condition : isnotnull(i_item_sk#31) + +(42) ReusedExchange [Reuses operator id: 38] +Output [1]: [ss_item_sk#30] + +(43) CometBroadcastHashJoin +Left output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Right output [1]: [ss_item_sk#30] +Arguments: [i_item_sk#31], [ss_item_sk#30], LeftSemi, BuildRight + +(44) CometBroadcastExchange +Input [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] + +(45) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [ss_item_sk#1], [i_item_sk#31], Inner, BuildRight + +(46) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34], [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34] + +(47) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#35, d_year#36, d_moy#37] +Arguments: [d_date_sk#35, d_year#36, d_moy#37] + +(48) CometFilter +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] +Condition : ((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2001)) AND (d_moy#37 = 11)) AND isnotnull(d_date_sk#35)) + +(49) CometProject +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] +Arguments: [d_date_sk#35], [d_date_sk#35] + +(50) CometBroadcastExchange +Input [1]: [d_date_sk#35] +Arguments: [d_date_sk#35] + +(51) CometBroadcastHashJoin +Left output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34] +Right output [1]: [d_date_sk#35] +Arguments: [ss_sold_date_sk#4], [d_date_sk#35], Inner, BuildRight + +(52) CometProject +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34, d_date_sk#35] +Arguments: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34], [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] + +(53) ColumnarToRow [codegen id : 1] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] + +(54) HashAggregate [codegen id : 1] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] +Keys [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] +Aggregate Attributes [3]: [sum#38, isEmpty#39, count#40] +Results [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#41, isEmpty#42, count#43] + +(55) Exchange +Input [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#41, isEmpty#42, count#43] +Arguments: hashpartitioning(i_brand_id#32, i_class_id#33, i_category_id#34, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(56) HashAggregate [codegen id : 2] +Input [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#41, isEmpty#42, count#43] +Keys [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#44, count(1)#45] +Results [5]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#44 AS sales#46, count(1)#45 AS number_sales#47] + +(57) Filter [codegen id : 2] +Input [5]: [i_brand_id#32, i_class_id#33, i_category_id#34, sales#46, number_sales#47] +Condition : (isnotnull(sales#46) AND (cast(sales#46 as decimal(32,6)) > cast(Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) + +(58) Project [codegen id : 2] +Output [6]: [sales#46, number_sales#47, store AS channel#50, i_brand_id#32 AS i_brand_id#51, i_class_id#33 AS i_class_id#52, i_category_id#34 AS i_category_id#53] +Input [5]: [i_brand_id#32, i_class_id#33, i_category_id#34, sales#46, number_sales#47] + +(59) ReusedExchange [Reuses operator id: 55] +Output [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#57, isEmpty#58, count#59] + +(60) HashAggregate [codegen id : 4] +Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#57, isEmpty#58, count#59] +Keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56] +Functions [2]: [sum((cast(cs_quantity#60 as decimal(10,0)) * cs_list_price#61)), count(1)] +Aggregate Attributes [2]: [sum((cast(cs_quantity#60 as decimal(10,0)) * cs_list_price#61))#62, count(1)#63] +Results [5]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum((cast(cs_quantity#60 as decimal(10,0)) * cs_list_price#61))#62 AS sales#64, count(1)#63 AS number_sales#65] + +(61) Filter [codegen id : 4] +Input [5]: [i_brand_id#54, i_class_id#55, i_category_id#56, sales#64, number_sales#65] +Condition : (isnotnull(sales#64) AND (cast(sales#64 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) + +(62) Project [codegen id : 4] +Output [6]: [sales#64, number_sales#65, catalog AS channel#66, i_brand_id#54, i_class_id#55, i_category_id#56] +Input [5]: [i_brand_id#54, i_class_id#55, i_category_id#56, sales#64, number_sales#65] + +(63) ReusedExchange [Reuses operator id: 55] +Output [6]: [i_brand_id#67, i_class_id#68, i_category_id#69, sum#70, isEmpty#71, count#72] + +(64) HashAggregate [codegen id : 6] +Input [6]: [i_brand_id#67, i_class_id#68, i_category_id#69, sum#70, isEmpty#71, count#72] +Keys [3]: [i_brand_id#67, i_class_id#68, i_category_id#69] +Functions [2]: [sum((cast(ws_quantity#73 as decimal(10,0)) * ws_list_price#74)), count(1)] +Aggregate Attributes [2]: [sum((cast(ws_quantity#73 as decimal(10,0)) * ws_list_price#74))#75, count(1)#76] +Results [5]: [i_brand_id#67, i_class_id#68, i_category_id#69, sum((cast(ws_quantity#73 as decimal(10,0)) * ws_list_price#74))#75 AS sales#77, count(1)#76 AS number_sales#78] + +(65) Filter [codegen id : 6] +Input [5]: [i_brand_id#67, i_class_id#68, i_category_id#69, sales#77, number_sales#78] +Condition : (isnotnull(sales#77) AND (cast(sales#77 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) + +(66) Project [codegen id : 6] +Output [6]: [sales#77, number_sales#78, web AS channel#79, i_brand_id#67, i_class_id#68, i_category_id#69] +Input [5]: [i_brand_id#67, i_class_id#68, i_category_id#69, sales#77, number_sales#78] + +(67) Union + +(68) Expand [codegen id : 7] +Input [6]: [sales#46, number_sales#47, channel#50, i_brand_id#51, i_class_id#52, i_category_id#53] +Arguments: [[sales#46, number_sales#47, channel#50, i_brand_id#51, i_class_id#52, i_category_id#53, 0], [sales#46, number_sales#47, channel#50, i_brand_id#51, i_class_id#52, null, 1], [sales#46, number_sales#47, channel#50, i_brand_id#51, null, null, 3], [sales#46, number_sales#47, channel#50, null, null, null, 7], [sales#46, number_sales#47, null, null, null, null, 15]], [sales#46, number_sales#47, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] + +(69) HashAggregate [codegen id : 7] +Input [7]: [sales#46, number_sales#47, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] +Keys [5]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] +Functions [2]: [partial_sum(sales#46), partial_sum(number_sales#47)] +Aggregate Attributes [3]: [sum#85, isEmpty#86, sum#87] +Results [8]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, sum#88, isEmpty#89, sum#90] + +(70) Exchange +Input [8]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, sum#88, isEmpty#89, sum#90] +Arguments: hashpartitioning(channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(71) HashAggregate [codegen id : 8] +Input [8]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, sum#88, isEmpty#89, sum#90] +Keys [5]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84] +Functions [2]: [sum(sales#46), sum(number_sales#47)] +Aggregate Attributes [2]: [sum(sales#46)#91, sum(number_sales#47)#92] +Results [6]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales#46)#91 AS sum(sales)#93, sum(number_sales#47)#92 AS sum(number_sales)#94] + +(72) TakeOrderedAndProject +Input [6]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales)#93, sum(number_sales)#94] +Arguments: 100, [channel#80 ASC NULLS FIRST, i_brand_id#81 ASC NULLS FIRST, i_class_id#82 ASC NULLS FIRST, i_category_id#83 ASC NULLS FIRST], [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales)#93, sum(number_sales)#94] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 57 Hosting Expression = Subquery scalar-subquery#48, [id=#49] +* HashAggregate (89) ++- Exchange (88) + +- * HashAggregate (87) + +- * ColumnarToRow (86) + +- CometUnion (85) + :- CometProject (76) + : +- CometBroadcastHashJoin (75) + : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (73) + : +- ReusedExchange (74) + :- CometProject (80) + : +- CometBroadcastHashJoin (79) + : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (77) + : +- ReusedExchange (78) + +- CometProject (84) + +- CometBroadcastHashJoin (83) + :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (81) + +- ReusedExchange (82) + + +(73) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_quantity#95, ss_list_price#96, ss_sold_date_sk#97] +Arguments: [ss_quantity#95, ss_list_price#96, ss_sold_date_sk#97] + +(74) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#98] + +(75) CometBroadcastHashJoin +Left output [3]: [ss_quantity#95, ss_list_price#96, ss_sold_date_sk#97] +Right output [1]: [d_date_sk#98] +Arguments: [ss_sold_date_sk#97], [d_date_sk#98], Inner, BuildRight + +(76) CometProject +Input [4]: [ss_quantity#95, ss_list_price#96, ss_sold_date_sk#97, d_date_sk#98] +Arguments: [quantity#99, list_price#100], [ss_quantity#95 AS quantity#99, ss_list_price#96 AS list_price#100] + +(77) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_quantity#101, cs_list_price#102, cs_sold_date_sk#103] +Arguments: [cs_quantity#101, cs_list_price#102, cs_sold_date_sk#103] + +(78) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#104] + +(79) CometBroadcastHashJoin +Left output [3]: [cs_quantity#101, cs_list_price#102, cs_sold_date_sk#103] +Right output [1]: [d_date_sk#104] +Arguments: [cs_sold_date_sk#103], [d_date_sk#104], Inner, BuildRight + +(80) CometProject +Input [4]: [cs_quantity#101, cs_list_price#102, cs_sold_date_sk#103, d_date_sk#104] +Arguments: [quantity#105, list_price#106], [cs_quantity#101 AS quantity#105, cs_list_price#102 AS list_price#106] + +(81) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_quantity#107, ws_list_price#108, ws_sold_date_sk#109] +Arguments: [ws_quantity#107, ws_list_price#108, ws_sold_date_sk#109] + +(82) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#110] + +(83) CometBroadcastHashJoin +Left output [3]: [ws_quantity#107, ws_list_price#108, ws_sold_date_sk#109] +Right output [1]: [d_date_sk#110] +Arguments: [ws_sold_date_sk#109], [d_date_sk#110], Inner, BuildRight + +(84) CometProject +Input [4]: [ws_quantity#107, ws_list_price#108, ws_sold_date_sk#109, d_date_sk#110] +Arguments: [quantity#111, list_price#112], [ws_quantity#107 AS quantity#111, ws_list_price#108 AS list_price#112] + +(85) CometUnion +Child 0 Input [2]: [quantity#99, list_price#100] +Child 1 Input [2]: [quantity#105, list_price#106] +Child 2 Input [2]: [quantity#111, list_price#112] + +(86) ColumnarToRow [codegen id : 1] +Input [2]: [quantity#99, list_price#100] + +(87) HashAggregate [codegen id : 1] +Input [2]: [quantity#99, list_price#100] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#99 as decimal(10,0)) * list_price#100))] +Aggregate Attributes [2]: [sum#113, count#114] +Results [2]: [sum#115, count#116] + +(88) Exchange +Input [2]: [sum#115, count#116] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(89) HashAggregate [codegen id : 2] +Input [2]: [sum#115, count#116] +Keys: [] +Functions [1]: [avg((cast(quantity#99 as decimal(10,0)) * list_price#100))] +Aggregate Attributes [1]: [avg((cast(quantity#99 as decimal(10,0)) * list_price#100))#117] +Results [1]: [avg((cast(quantity#99 as decimal(10,0)) * list_price#100))#117 AS average_sales#118] + +Subquery:2 Hosting operator id = 61 Hosting Expression = ReusedSubquery Subquery scalar-subquery#48, [id=#49] + +Subquery:3 Hosting operator id = 65 Hosting Expression = ReusedSubquery Subquery scalar-subquery#48, [id=#49] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q14a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q14a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a9e4ee92e1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q14a.native_datafusion/simplified.txt @@ -0,0 +1,108 @@ +TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales)] + WholeStageCodegen (8) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum(sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] #1 + WholeStageCodegen (7) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + Expand [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + InputAdapter + Union + WholeStageCodegen (2) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sales] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen (1) + HashAggregate [quantity,list_price] [sum,count,sum,count] + ColumnarToRow + InputAdapter + CometUnion [quantity,list_price] + CometProject [ss_quantity,ss_list_price] [quantity,list_price] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #9 + CometProject [cs_quantity,cs_list_price] [quantity,list_price] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_quantity,cs_list_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #9 + CometProject [ws_quantity,ws_list_price] [quantity,list_price] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #9 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #2 + WholeStageCodegen (1) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + ColumnarToRow + InputAdapter + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometBroadcastExchange [ss_item_sk] #3 + CometProject [i_item_sk] [ss_item_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [brand_id,class_id,category_id] #4 + CometBroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + CometHashAggregate [brand_id,class_id,category_id] + CometExchange [brand_id,class_id,category_id] #5 + CometHashAggregate [brand_id,class_id,category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [brand_id,class_id,category_id] + CometBroadcastHashJoin [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #6 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #7 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [d_date_sk] #9 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + ReusedExchange [d_date_sk] #9 + ReusedExchange [i_brand_id,i_class_id,i_category_id] #7 + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,ss_item_sk] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + ReusedExchange [ss_item_sk] #3 + CometBroadcastExchange [d_date_sk] #11 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + WholeStageCodegen (4) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sales] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1),sales,number_sales,sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #2 + WholeStageCodegen (6) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sales] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1),sales,number_sales,sum,isEmpty,count] + InputAdapter + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q15.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q15.native_datafusion/explain.txt new file mode 100644 index 0000000000..1492cbc85d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q15.native_datafusion/explain.txt @@ -0,0 +1,126 @@ +== Physical Plan == +TakeOrderedAndProject (23) ++- * HashAggregate (22) + +- Exchange (21) + +- * HashAggregate (20) + +- * ColumnarToRow (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (12) + : +- CometBroadcastHashJoin (11) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (3) + : +- CometBroadcastExchange (10) + : +- CometFilter (9) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (8) + +- CometBroadcastExchange (16) + +- CometProject (15) + +- CometFilter (14) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Arguments: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] + +(2) CometFilter +Input [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_bill_customer_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#4, c_current_addr_sk#5] + +(4) CometFilter +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_customer_sk#4) AND isnotnull(c_current_addr_sk#5)) + +(5) CometBroadcastExchange +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#4, c_current_addr_sk#5] + +(6) CometBroadcastHashJoin +Left output [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Right output [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#4], Inner, BuildRight + +(7) CometProject +Input [5]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3, c_customer_sk#4, c_current_addr_sk#5] +Arguments: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5], [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5] + +(8) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [ca_address_sk#6, ca_state#7, ca_zip#8] + +(9) CometFilter +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Condition : isnotnull(ca_address_sk#6) + +(10) CometBroadcastExchange +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [ca_address_sk#6, ca_state#7, ca_zip#8] + +(11) CometBroadcastHashJoin +Left output [3]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5] +Right output [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [c_current_addr_sk#5], [ca_address_sk#6], Inner, ((substr(ca_zip#8, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#7 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00)), BuildRight + +(12) CometProject +Input [6]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5, ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8], [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Arguments: [d_date_sk#9, d_year#10, d_qoy#11] + +(14) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Condition : ((((isnotnull(d_qoy#11) AND isnotnull(d_year#10)) AND (d_qoy#11 = 2)) AND (d_year#10 = 2001)) AND isnotnull(d_date_sk#9)) + +(15) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(17) CometBroadcastHashJoin +Left output [3]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8] +Right output [1]: [d_date_sk#9] +Arguments: [cs_sold_date_sk#3], [d_date_sk#9], Inner, BuildRight + +(18) CometProject +Input [4]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8, d_date_sk#9] +Arguments: [cs_sales_price#2, ca_zip#8], [cs_sales_price#2, ca_zip#8] + +(19) ColumnarToRow [codegen id : 1] +Input [2]: [cs_sales_price#2, ca_zip#8] + +(20) HashAggregate [codegen id : 1] +Input [2]: [cs_sales_price#2, ca_zip#8] +Keys [1]: [ca_zip#8] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [2]: [ca_zip#8, sum#13] + +(21) Exchange +Input [2]: [ca_zip#8, sum#13] +Arguments: hashpartitioning(ca_zip#8, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(22) HashAggregate [codegen id : 2] +Input [2]: [ca_zip#8, sum#13] +Keys [1]: [ca_zip#8] +Functions [1]: [sum(UnscaledValue(cs_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#2))#14] +Results [2]: [ca_zip#8, MakeDecimal(sum(UnscaledValue(cs_sales_price#2))#14,17,2) AS sum(cs_sales_price)#15] + +(23) TakeOrderedAndProject +Input [2]: [ca_zip#8, sum(cs_sales_price)#15] +Arguments: 100, [ca_zip#8 ASC NULLS FIRST], [ca_zip#8, sum(cs_sales_price)#15] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q15.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q15.native_datafusion/simplified.txt new file mode 100644 index 0000000000..366d77ff30 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q15.native_datafusion/simplified.txt @@ -0,0 +1,27 @@ +TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] + WholeStageCodegen (2) + HashAggregate [ca_zip,sum] [sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price),sum] + InputAdapter + Exchange [ca_zip] #1 + WholeStageCodegen (1) + HashAggregate [ca_zip,cs_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [cs_sales_price,ca_zip] + CometBroadcastHashJoin [cs_sales_price,cs_sold_date_sk,ca_zip,d_date_sk] + CometProject [cs_sales_price,cs_sold_date_sk,ca_zip] + CometBroadcastHashJoin [cs_sales_price,cs_sold_date_sk,c_current_addr_sk,ca_address_sk,ca_state,ca_zip] + CometProject [cs_sales_price,cs_sold_date_sk,c_current_addr_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,c_customer_sk,c_current_addr_sk] + CometFilter [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #2 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk,ca_state,ca_zip] #3 + CometFilter [ca_address_sk,ca_state,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_zip] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q16.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q16.native_datafusion/explain.txt new file mode 100644 index 0000000000..4552905a94 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q16.native_datafusion/explain.txt @@ -0,0 +1,219 @@ +== Physical Plan == +* HashAggregate (40) ++- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- * HashAggregate (36) + +- * ColumnarToRow (35) + +- CometProject (34) + +- CometBroadcastHashJoin (33) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (22) + : : +- CometBroadcastHashJoin (21) + : : :- CometSortMergeJoin (16) + : : : :- CometProject (11) + : : : : +- CometSortMergeJoin (10) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : +- CometSort (9) + : : : : +- CometExchange (8) + : : : : +- CometProject (7) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (6) + : : : +- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (12) + : : +- CometBroadcastExchange (20) + : : +- CometProject (19) + : : +- CometFilter (18) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (17) + : +- CometBroadcastExchange (26) + : +- CometProject (25) + : +- CometFilter (24) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (23) + +- CometBroadcastExchange (32) + +- CometProject (31) + +- CometFilter (30) + +- CometNativeScan: `spark_catalog`.`default`.`call_center` (29) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] + +(2) CometFilter +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Condition : ((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3)) + +(3) CometProject +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(4) CometExchange +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_order_number#5 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] +Arguments: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] + +(7) CometProject +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] +Arguments: [cs_warehouse_sk#9, cs_order_number#10], [cs_warehouse_sk#9, cs_order_number#10] + +(8) CometExchange +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: hashpartitioning(cs_order_number#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: [cs_warehouse_sk#9, cs_order_number#10], [cs_order_number#10 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: [cs_order_number#5], [cs_order_number#10], LeftSemi, NOT (cs_warehouse_sk#4 = cs_warehouse_sk#9) + +(11) CometProject +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(12) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [2]: [cr_order_number#12, cr_returned_date_sk#13] +Arguments: [cr_order_number#12, cr_returned_date_sk#13] + +(13) CometProject +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] +Arguments: [cr_order_number#12], [cr_order_number#12] + +(14) CometExchange +Input [1]: [cr_order_number#12] +Arguments: hashpartitioning(cr_order_number#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(15) CometSort +Input [1]: [cr_order_number#12] +Arguments: [cr_order_number#12], [cr_order_number#12 ASC NULLS FIRST] + +(16) CometSortMergeJoin +Left output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [cr_order_number#12] +Arguments: [cs_order_number#5], [cr_order_number#12], LeftAnti + +(17) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#14, d_date#15] +Arguments: [d_date_sk#14, d_date#15] + +(18) CometFilter +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 2002-02-01)) AND (d_date#15 <= 2002-04-02)) AND isnotnull(d_date_sk#14)) + +(19) CometProject +Input [2]: [d_date_sk#14, d_date#15] +Arguments: [d_date_sk#14], [d_date_sk#14] + +(20) CometBroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [d_date_sk#14] +Arguments: [cs_ship_date_sk#1], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#14] +Arguments: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(23) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16, ca_state#17] + +(24) CometFilter +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = GA)) AND isnotnull(ca_address_sk#16)) + +(25) CometProject +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16], [ca_address_sk#16] + +(26) CometBroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: [ca_address_sk#16] + +(27) CometBroadcastHashJoin +Left output [5]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [ca_address_sk#16] +Arguments: [cs_ship_addr_sk#2], [ca_address_sk#16], Inner, BuildRight + +(28) CometProject +Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#16] +Arguments: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(29) CometNativeScan: `spark_catalog`.`default`.`call_center` +Output [2]: [cc_call_center_sk#18, cc_county#19] +Arguments: [cc_call_center_sk#18, cc_county#19] + +(30) CometFilter +Input [2]: [cc_call_center_sk#18, cc_county#19] +Condition : ((isnotnull(cc_county#19) AND (cc_county#19 = Williamson County)) AND isnotnull(cc_call_center_sk#18)) + +(31) CometProject +Input [2]: [cc_call_center_sk#18, cc_county#19] +Arguments: [cc_call_center_sk#18], [cc_call_center_sk#18] + +(32) CometBroadcastExchange +Input [1]: [cc_call_center_sk#18] +Arguments: [cc_call_center_sk#18] + +(33) CometBroadcastHashJoin +Left output [4]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Right output [1]: [cc_call_center_sk#18] +Arguments: [cs_call_center_sk#3], [cc_call_center_sk#18], Inner, BuildRight + +(34) CometProject +Input [5]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#18] +Arguments: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7], [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(35) ColumnarToRow [codegen id : 1] +Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(36) HashAggregate [codegen id : 1] +Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Keys [1]: [cs_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21] +Results [3]: [cs_order_number#5, sum#22, sum#23] + +(37) HashAggregate [codegen id : 1] +Input [3]: [cs_order_number#5, sum#22, sum#23] +Keys [1]: [cs_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21] +Results [3]: [cs_order_number#5, sum#22, sum#23] + +(38) HashAggregate [codegen id : 1] +Input [3]: [cs_order_number#5, sum#22, sum#23] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24] +Results [3]: [sum#22, sum#23, count#25] + +(39) Exchange +Input [3]: [sum#22, sum#23, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 2] +Input [3]: [sum#22, sum#23, count#25] +Keys: [] +Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24] +Results [3]: [count(cs_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#21,17,2) AS total net profit #28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q16.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q16.native_datafusion/simplified.txt new file mode 100644 index 0000000000..4cd5e96597 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q16.native_datafusion/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (2) + HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,count] + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] + HashAggregate [cs_order_number,cs_ext_ship_cost,cs_net_profit] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometBroadcastHashJoin [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cc_call_center_sk] + CometProject [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometBroadcastHashJoin [cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,ca_address_sk] + CometProject [cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometBroadcastHashJoin [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,d_date_sk] + CometSortMergeJoin [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cr_order_number] + CometProject [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometSortMergeJoin [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_warehouse_sk] + CometSort [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometExchange [cs_order_number] #2 + CometProject [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + CometFilter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk] + CometSort [cs_warehouse_sk,cs_order_number] + CometExchange [cs_order_number] #3 + CometProject [cs_warehouse_sk,cs_order_number] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_warehouse_sk,cs_order_number,cs_sold_date_sk] + CometSort [cr_order_number] + CometExchange [cr_order_number] #4 + CometProject [cr_order_number] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_order_number,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [cc_call_center_sk] #7 + CometProject [cc_call_center_sk] + CometFilter [cc_call_center_sk,cc_county] + CometNativeScan: `spark_catalog`.`default`.`call_center` [cc_call_center_sk,cc_county] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q17.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q17.native_datafusion/explain.txt new file mode 100644 index 0000000000..022dab9eb9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q17.native_datafusion/explain.txt @@ -0,0 +1,224 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * ColumnarToRow (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (32) + : +- CometBroadcastHashJoin (31) + : :- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (8) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometProject (15) + : : : : +- CometFilter (14) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : : : +- CometBroadcastExchange (22) + : : : +- CometProject (21) + : : : +- CometFilter (20) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (19) + : : +- ReusedExchange (25) + : +- CometBroadcastExchange (30) + : +- CometFilter (29) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (28) + +- CometBroadcastExchange (35) + +- CometFilter (34) + +- CometNativeScan: `spark_catalog`.`default`.`item` (33) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(4) CometFilter +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(5) CometBroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Right output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4], [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] + +(8) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(9) CometFilter +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(10) CometBroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(11) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] +Right output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [sr_customer_sk#8, sr_item_sk#7], [cs_bill_customer_sk#12, cs_item_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#16, d_quarter_name#17] +Arguments: [d_date_sk#16, d_quarter_name#17] + +(14) CometFilter +Input [2]: [d_date_sk#16, d_quarter_name#17] +Condition : ((isnotnull(d_quarter_name#17) AND (d_quarter_name#17 = 2001Q1)) AND isnotnull(d_date_sk#16)) + +(15) CometProject +Input [2]: [d_date_sk#16, d_quarter_name#17] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(17) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [ss_sold_date_sk#6], [d_date_sk#16], Inner, BuildRight + +(18) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(19) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#18, d_quarter_name#19] +Arguments: [d_date_sk#18, d_quarter_name#19] + +(20) CometFilter +Input [2]: [d_date_sk#18, d_quarter_name#19] +Condition : (d_quarter_name#19 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#18)) + +(21) CometProject +Input [2]: [d_date_sk#18, d_quarter_name#19] +Arguments: [d_date_sk#18], [d_date_sk#18] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: [d_date_sk#18] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#18] +Arguments: [sr_returned_date_sk#11], [d_date_sk#18], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#18] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] + +(25) ReusedExchange [Reuses operator id: 22] +Output [1]: [d_date_sk#20] + +(26) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#20] +Arguments: [cs_sold_date_sk#15], [d_date_sk#20], Inner, BuildRight + +(27) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#20] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] + +(28) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#21, s_state#22] +Arguments: [s_store_sk#21, s_state#22] + +(29) CometFilter +Input [2]: [s_store_sk#21, s_state#22] +Condition : isnotnull(s_store_sk#21) + +(30) CometBroadcastExchange +Input [2]: [s_store_sk#21, s_state#22] +Arguments: [s_store_sk#21, s_state#22] + +(31) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] +Right output [2]: [s_store_sk#21, s_state#22] +Arguments: [ss_store_sk#3], [s_store_sk#21], Inner, BuildRight + +(32) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_sk#21, s_state#22] +Arguments: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22], [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22] + +(33) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [i_item_sk#23, i_item_id#24, i_item_desc#25] + +(34) CometFilter +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Condition : isnotnull(i_item_sk#23) + +(35) CometBroadcastExchange +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [i_item_sk#23, i_item_id#24, i_item_desc#25] + +(36) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22] +Right output [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [ss_item_sk#1], [i_item_sk#23], Inner, BuildRight + +(37) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25], [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] + +(38) ColumnarToRow [codegen id : 1] +Input [6]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] + +(39) HashAggregate [codegen id : 1] +Input [6]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#10), partial_avg(sr_return_quantity#10), partial_stddev_samp(cast(sr_return_quantity#10 as double)), partial_count(cs_quantity#14), partial_avg(cs_quantity#14), partial_stddev_samp(cast(cs_quantity#14 as double))] +Aggregate Attributes [18]: [count#26, sum#27, count#28, n#29, avg#30, m2#31, count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43] +Results [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] + +(40) Exchange +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] +Arguments: hashpartitioning(i_item_id#24, i_item_desc#25, s_state#22, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(41) HashAggregate [codegen id : 2] +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#10), avg(sr_return_quantity#10), stddev_samp(cast(sr_return_quantity#10 as double)), count(cs_quantity#14), avg(cs_quantity#14), stddev_samp(cast(cs_quantity#14 as double))] +Aggregate Attributes [9]: [count(ss_quantity#5)#62, avg(ss_quantity#5)#63, stddev_samp(cast(ss_quantity#5 as double))#64, count(sr_return_quantity#10)#65, avg(sr_return_quantity#10)#66, stddev_samp(cast(sr_return_quantity#10 as double))#67, count(cs_quantity#14)#68, avg(cs_quantity#14)#69, stddev_samp(cast(cs_quantity#14 as double))#70] +Results [15]: [i_item_id#24, i_item_desc#25, s_state#22, count(ss_quantity#5)#62 AS store_sales_quantitycount#71, avg(ss_quantity#5)#63 AS store_sales_quantityave#72, stddev_samp(cast(ss_quantity#5 as double))#64 AS store_sales_quantitystdev#73, (stddev_samp(cast(ss_quantity#5 as double))#64 / avg(ss_quantity#5)#63) AS store_sales_quantitycov#74, count(sr_return_quantity#10)#65 AS as_store_returns_quantitycount#75, avg(sr_return_quantity#10)#66 AS as_store_returns_quantityave#76, stddev_samp(cast(sr_return_quantity#10 as double))#67 AS as_store_returns_quantitystdev#77, (stddev_samp(cast(sr_return_quantity#10 as double))#67 / avg(sr_return_quantity#10)#66) AS store_returns_quantitycov#78, count(cs_quantity#14)#68 AS catalog_sales_quantitycount#79, avg(cs_quantity#14)#69 AS catalog_sales_quantityave#80, (stddev_samp(cast(cs_quantity#14 as double))#70 / avg(cs_quantity#14)#69) AS catalog_sales_quantitystdev#81, (stddev_samp(cast(cs_quantity#14 as double))#70 / avg(cs_quantity#14)#69) AS catalog_sales_quantitycov#82] + +(42) TakeOrderedAndProject +Input [15]: [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#71, store_sales_quantityave#72, store_sales_quantitystdev#73, store_sales_quantitycov#74, as_store_returns_quantitycount#75, as_store_returns_quantityave#76, as_store_returns_quantitystdev#77, store_returns_quantitycov#78, catalog_sales_quantitycount#79, catalog_sales_quantityave#80, catalog_sales_quantitystdev#81, catalog_sales_quantitycov#82] +Arguments: 100, [i_item_id#24 ASC NULLS FIRST, i_item_desc#25 ASC NULLS FIRST, s_state#22 ASC NULLS FIRST], [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#71, store_sales_quantityave#72, store_sales_quantitystdev#73, store_sales_quantitycov#74, as_store_returns_quantitycount#75, as_store_returns_quantityave#76, as_store_returns_quantitystdev#77, store_returns_quantitycov#78, catalog_sales_quantitycount#79, catalog_sales_quantityave#80, catalog_sales_quantitystdev#81, catalog_sales_quantitycov#82] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q17.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q17.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b79443c9a6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q17.native_datafusion/simplified.txt @@ -0,0 +1,46 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov] + WholeStageCodegen (2) + HashAggregate [i_item_id,i_item_desc,s_state,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] [count(ss_quantity),avg(ss_quantity),stddev_samp(cast(ss_quantity as double)),count(sr_return_quantity),avg(sr_return_quantity),stddev_samp(cast(sr_return_quantity as double)),count(cs_quantity),avg(cs_quantity),stddev_samp(cast(cs_quantity as double)),store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] + InputAdapter + Exchange [i_item_id,i_item_desc,s_state] #1 + WholeStageCodegen (1) + HashAggregate [i_item_id,i_item_desc,s_state,ss_quantity,sr_return_quantity,cs_quantity] [count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] + ColumnarToRow + InputAdapter + CometProject [ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_id,i_item_desc] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_sk,i_item_id,i_item_desc] + CometProject [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_sk,s_state] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] #3 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_quarter_name] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_quarter_name] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_quarter_name] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_quarter_name] + ReusedExchange [d_date_sk] #5 + CometBroadcastExchange [s_store_sk,s_state] #6 + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc] #7 + CometFilter [i_item_sk,i_item_id,i_item_desc] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q18.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q18.native_datafusion/explain.txt new file mode 100644 index 0000000000..bc0faccc0a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q18.native_datafusion/explain.txt @@ -0,0 +1,219 @@ +== Physical Plan == +TakeOrderedAndProject (41) ++- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * ColumnarToRow (37) + +- CometExpand (36) + +- CometProject (35) + +- CometBroadcastHashJoin (34) + :- CometProject (30) + : +- CometBroadcastHashJoin (29) + : :- CometProject (24) + : : +- CometBroadcastHashJoin (23) + : : :- CometProject (19) + : : : +- CometBroadcastHashJoin (18) + : : : :- CometProject (14) + : : : : +- CometBroadcastHashJoin (13) + : : : : :- CometProject (8) + : : : : : +- CometBroadcastHashJoin (7) + : : : : : :- CometFilter (2) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : : +- CometBroadcastExchange (6) + : : : : : +- CometProject (5) + : : : : : +- CometFilter (4) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (3) + : : : : +- CometBroadcastExchange (12) + : : : : +- CometProject (11) + : : : : +- CometFilter (10) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (9) + : : : +- CometBroadcastExchange (17) + : : : +- CometFilter (16) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (15) + : : +- CometBroadcastExchange (22) + : : +- CometFilter (21) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (20) + : +- CometBroadcastExchange (28) + : +- CometProject (27) + : +- CometFilter (26) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (25) + +- CometBroadcastExchange (33) + +- CometFilter (32) + +- CometNativeScan: `spark_catalog`.`default`.`item` (31) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Arguments: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(2) CometFilter +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(4) CometFilter +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = F)) AND (cd_education_status#12 = Unknown )) AND isnotnull(cd_demo_sk#10)) + +(5) CometProject +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_dep_count#13], [cd_demo_sk#10, cd_dep_count#13] + +(6) CometBroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_dep_count#13] + +(7) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Right output [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: [cs_bill_cdemo_sk#2], [cd_demo_sk#10], Inner, BuildRight + +(8) CometProject +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] +Arguments: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13], [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(10) CometFilter +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Condition : (((c_birth_month#17 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#14)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_addr_sk#16)) + +(11) CometProject +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18], [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(12) CometBroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(13) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Right output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#14], Inner, BuildRight + +(14) CometProject +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(15) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [1]: [cd_demo_sk#19] +Arguments: [cd_demo_sk#19] + +(16) CometFilter +Input [1]: [cd_demo_sk#19] +Condition : isnotnull(cd_demo_sk#19) + +(17) CometBroadcastExchange +Input [1]: [cd_demo_sk#19] +Arguments: [cd_demo_sk#19] + +(18) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Right output [1]: [cd_demo_sk#19] +Arguments: [c_current_cdemo_sk#15], [cd_demo_sk#19], Inner, BuildRight + +(19) CometProject +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] + +(20) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(21) CometFilter +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#20)) + +(22) CometBroadcastExchange +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(23) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Right output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [c_current_addr_sk#16], [ca_address_sk#20], Inner, BuildRight + +(24) CometProject +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] + +(25) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#24, d_year#25] +Arguments: [d_date_sk#24, d_year#25] + +(26) CometFilter +Input [2]: [d_date_sk#24, d_year#25] +Condition : ((isnotnull(d_year#25) AND (d_year#25 = 1998)) AND isnotnull(d_date_sk#24)) + +(27) CometProject +Input [2]: [d_date_sk#24, d_year#25] +Arguments: [d_date_sk#24], [d_date_sk#24] + +(28) CometBroadcastExchange +Input [1]: [d_date_sk#24] +Arguments: [d_date_sk#24] + +(29) CometBroadcastHashJoin +Left output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Right output [1]: [d_date_sk#24] +Arguments: [cs_sold_date_sk#9], [d_date_sk#24], Inner, BuildRight + +(30) CometProject +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, d_date_sk#24] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] + +(31) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#26, i_item_id#27] +Arguments: [i_item_sk#26, i_item_id#27] + +(32) CometFilter +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) + +(33) CometBroadcastExchange +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: [i_item_sk#26, i_item_id#27] + +(34) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Right output [2]: [i_item_sk#26, i_item_id#27] +Arguments: [cs_item_sk#3], [i_item_sk#26], Inner, BuildRight + +(35) CometProject +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] +Arguments: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21] + +(36) CometExpand +Input [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Arguments: [[cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21, 0], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, null, 1], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, null, null, 3], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, null, null, null, 7], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, null, null, null, null, 15]], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] + +(37) ColumnarToRow [codegen id : 1] +Input [12]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] + +(38) HashAggregate [codegen id : 1] +Input [12]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [partial_avg(cast(cs_quantity#4 as decimal(12,2))), partial_avg(cast(cs_list_price#5 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#7 as decimal(12,2))), partial_avg(cast(cs_sales_price#6 as decimal(12,2))), partial_avg(cast(cs_net_profit#8 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [14]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46] +Results [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] + +(39) Exchange +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] +Arguments: hashpartitioning(i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(40) HashAggregate [codegen id : 2] +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [avg(cast(cs_quantity#4 as decimal(12,2))), avg(cast(cs_list_price#5 as decimal(12,2))), avg(cast(cs_coupon_amt#7 as decimal(12,2))), avg(cast(cs_sales_price#6 as decimal(12,2))), avg(cast(cs_net_profit#8 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [7]: [avg(cast(cs_quantity#4 as decimal(12,2)))#61, avg(cast(cs_list_price#5 as decimal(12,2)))#62, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#63, avg(cast(cs_sales_price#6 as decimal(12,2)))#64, avg(cast(cs_net_profit#8 as decimal(12,2)))#65, avg(cast(c_birth_year#18 as decimal(12,2)))#66, avg(cast(cd_dep_count#13 as decimal(12,2)))#67] +Results [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, avg(cast(cs_quantity#4 as decimal(12,2)))#61 AS agg1#68, avg(cast(cs_list_price#5 as decimal(12,2)))#62 AS agg2#69, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#63 AS agg3#70, avg(cast(cs_sales_price#6 as decimal(12,2)))#64 AS agg4#71, avg(cast(cs_net_profit#8 as decimal(12,2)))#65 AS agg5#72, avg(cast(c_birth_year#18 as decimal(12,2)))#66 AS agg6#73, avg(cast(cd_dep_count#13 as decimal(12,2)))#67 AS agg7#74] + +(41) TakeOrderedAndProject +Input [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#68, agg2#69, agg3#70, agg4#71, agg5#72, agg6#73, agg7#74] +Arguments: 100, [ca_country#29 ASC NULLS FIRST, ca_state#30 ASC NULLS FIRST, ca_county#31 ASC NULLS FIRST, i_item_id#28 ASC NULLS FIRST], [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#68, agg2#69, agg3#70, agg4#71, agg5#72, agg6#73, agg7#74] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q18.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q18.native_datafusion/simplified.txt new file mode 100644 index 0000000000..87e5a9e480 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q18.native_datafusion/simplified.txt @@ -0,0 +1,45 @@ +TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + WholeStageCodegen (2) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id] #1 + WholeStageCodegen (1) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + ColumnarToRow + InputAdapter + CometExpand [i_item_id,ca_country,ca_state,ca_county] [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county,spark_grouping_id] + CometProject [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk,ca_county,ca_state,ca_country] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometBroadcastExchange [cd_demo_sk,cd_dep_count] #2 + CometProject [cd_demo_sk,cd_dep_count] + CometFilter [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + CometProject [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + CometBroadcastExchange [cd_demo_sk] #4 + CometFilter [cd_demo_sk] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk] + CometBroadcastExchange [ca_address_sk,ca_county,ca_state,ca_country] #5 + CometFilter [ca_address_sk,ca_county,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_county,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #6 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #7 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q19.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q19.native_datafusion/explain.txt new file mode 100644 index 0000000000..0f1446bb56 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q19.native_datafusion/explain.txt @@ -0,0 +1,172 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * ColumnarToRow (28) + +- CometProject (27) + +- CometBroadcastHashJoin (26) + :- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometProject (14) + : : : +- CometBroadcastHashJoin (13) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + : : : +- CometBroadcastExchange (12) + : : : +- CometProject (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + : : +- CometBroadcastExchange (17) + : : +- CometFilter (16) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (15) + : +- CometBroadcastExchange (22) + : +- CometFilter (21) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (20) + +- ReusedExchange (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2, d_moy#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1998)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1], [d_date_sk#1] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] + +(5) CometFilter +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_item_sk#4) AND isnotnull(ss_customer_sk#5)) AND isnotnull(ss_store_sk#6)) + +(6) CometBroadcastExchange +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] + +(7) CometBroadcastHashJoin +Left output [1]: [d_date_sk#1] +Right output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [d_date_sk#1], [ss_sold_date_sk#8], Inner, BuildRight + +(8) CometProject +Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7], [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Arguments: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] + +(10) CometFilter +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Condition : ((isnotnull(i_manager_id#14) AND (i_manager_id#14 = 8)) AND isnotnull(i_item_sk#9)) + +(11) CometProject +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Arguments: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13], [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(12) CometBroadcastExchange +Input [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7] +Right output [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: [ss_item_sk#4], [i_item_sk#9], Inner, BuildRight + +(14) CometProject +Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13], [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(15) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: [c_customer_sk#15, c_current_addr_sk#16] + +(16) CometFilter +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_current_addr_sk#16)) + +(17) CometBroadcastExchange +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: [c_customer_sk#15, c_current_addr_sk#16] + +(18) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Right output [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: [ss_customer_sk#5], [c_customer_sk#15], Inner, BuildRight + +(19) CometProject +Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_customer_sk#15, c_current_addr_sk#16] +Arguments: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16], [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16] + +(20) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#17, ca_zip#18] +Arguments: [ca_address_sk#17, ca_zip#18] + +(21) CometFilter +Input [2]: [ca_address_sk#17, ca_zip#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_zip#18)) + +(22) CometBroadcastExchange +Input [2]: [ca_address_sk#17, ca_zip#18] +Arguments: [ca_address_sk#17, ca_zip#18] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16] +Right output [2]: [ca_address_sk#17, ca_zip#18] +Arguments: [c_current_addr_sk#16], [ca_address_sk#17], Inner, BuildRight + +(24) CometProject +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16, ca_address_sk#17, ca_zip#18] +Arguments: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18], [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18] + +(25) ReusedExchange [Reuses operator id: 22] +Output [2]: [s_store_sk#19, s_zip#20] + +(26) CometBroadcastHashJoin +Left output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18] +Right output [2]: [s_store_sk#19, s_zip#20] +Arguments: [ss_store_sk#6], [s_store_sk#19], Inner, NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#20, 1, 5)), BuildRight + +(27) CometProject +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18, s_store_sk#19, s_zip#20] +Arguments: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13], [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(28) ColumnarToRow [codegen id : 1] +Input [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(29) HashAggregate [codegen id : 1] +Input [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum#21] +Results [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] + +(30) Exchange +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] +Arguments: hashpartitioning(i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(31) HashAggregate [codegen id : 2] +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#23] +Results [5]: [i_brand_id#10 AS brand_id#24, i_brand#11 AS brand#25, i_manufact_id#12, i_manufact#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#23,17,2) AS ext_price#26] + +(32) TakeOrderedAndProject +Input [5]: [brand_id#24, brand#25, i_manufact_id#12, i_manufact#13, ext_price#26] +Arguments: 100, [ext_price#26 DESC NULLS LAST, brand#25 ASC NULLS FIRST, brand_id#24 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST, i_manufact#13 ASC NULLS FIRST], [brand_id#24, brand#25, i_manufact_id#12, i_manufact#13, ext_price#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q19.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q19.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2be817efe9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q19.native_datafusion/simplified.txt @@ -0,0 +1,36 @@ +TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact] + WholeStageCodegen (2) + HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 + WholeStageCodegen (1) + HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,ca_zip,s_store_sk,s_zip] + CometProject [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,ca_zip] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_current_addr_sk,ca_address_sk,ca_zip] + CometProject [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_current_addr_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_customer_sk,c_current_addr_sk] + CometProject [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] #3 + CometProject [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #4 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk,ca_zip] #5 + CometFilter [ca_address_sk,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_zip] + ReusedExchange [s_store_sk,s_zip] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q2.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q2.native_datafusion/explain.txt new file mode 100644 index 0000000000..4239f9f403 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q2.native_datafusion/explain.txt @@ -0,0 +1,194 @@ +== Physical Plan == +* ColumnarToRow (36) ++- CometSort (35) + +- CometColumnarExchange (34) + +- * Project (33) + +- * BroadcastHashJoin Inner BuildRight (32) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * HashAggregate (14) + : : +- Exchange (13) + : : +- * HashAggregate (12) + : : +- * ColumnarToRow (11) + : : +- CometProject (10) + : : +- CometBroadcastHashJoin (9) + : : :- CometUnion (5) + : : : :- CometProject (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : +- CometProject (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (3) + : : +- CometBroadcastExchange (8) + : : +- CometFilter (7) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (6) + : +- BroadcastExchange (19) + : +- * ColumnarToRow (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (15) + +- BroadcastExchange (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * HashAggregate (23) + : +- ReusedExchange (22) + +- BroadcastExchange (28) + +- * ColumnarToRow (27) + +- CometProject (26) + +- CometFilter (25) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (24) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] +Arguments: [ws_ext_sales_price#1, ws_sold_date_sk#2] + +(2) CometProject +Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] +Arguments: [sold_date_sk#3, sales_price#4], [ws_sold_date_sk#2 AS sold_date_sk#3, ws_ext_sales_price#1 AS sales_price#4] + +(3) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] +Arguments: [cs_ext_sales_price#5, cs_sold_date_sk#6] + +(4) CometProject +Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] +Arguments: [sold_date_sk#7, sales_price#8], [cs_sold_date_sk#6 AS sold_date_sk#7, cs_ext_sales_price#5 AS sales_price#8] + +(5) CometUnion +Child 0 Input [2]: [sold_date_sk#3, sales_price#4] +Child 1 Input [2]: [sold_date_sk#7, sales_price#8] + +(6) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [d_date_sk#9, d_week_seq#10, d_day_name#11] + +(7) CometFilter +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) + +(8) CometBroadcastExchange +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [d_date_sk#9, d_week_seq#10, d_day_name#11] + +(9) CometBroadcastHashJoin +Left output [2]: [sold_date_sk#3, sales_price#4] +Right output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [sold_date_sk#3], [d_date_sk#9], Inner, BuildRight + +(10) CometProject +Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: [sales_price#4, d_week_seq#10, d_day_name#11], [sales_price#4, d_week_seq#10, d_day_name#11] + +(11) ColumnarToRow [codegen id : 1] +Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] + +(12) HashAggregate [codegen id : 1] +Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Keys [1]: [d_week_seq#10] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] +Aggregate Attributes [7]: [sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Results [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] + +(13) Exchange +Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(14) HashAggregate [codegen id : 6] +Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39] + +(15) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_week_seq#40, d_year#41] +Arguments: [d_week_seq#40, d_year#41] + +(16) CometFilter +Input [2]: [d_week_seq#40, d_year#41] +Condition : ((isnotnull(d_year#41) AND (d_year#41 = 2001)) AND isnotnull(d_week_seq#40)) + +(17) CometProject +Input [2]: [d_week_seq#40, d_year#41] +Arguments: [d_week_seq#40], [d_week_seq#40] + +(18) ColumnarToRow [codegen id : 2] +Input [1]: [d_week_seq#40] + +(19) BroadcastExchange +Input [1]: [d_week_seq#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(20) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#40] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 6] +Output [8]: [d_week_seq#10 AS d_week_seq1#42, sun_sales#33 AS sun_sales1#43, mon_sales#34 AS mon_sales1#44, tue_sales#35 AS tue_sales1#45, wed_sales#36 AS wed_sales1#46, thu_sales#37 AS thu_sales1#47, fri_sales#38 AS fri_sales1#48, sat_sales#39 AS sat_sales1#49] +Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#40] + +(22) ReusedExchange [Reuses operator id: 13] +Output [8]: [d_week_seq#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56, sum#57] + +(23) HashAggregate [codegen id : 5] +Input [8]: [d_week_seq#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56, sum#57] +Keys [1]: [d_week_seq#50] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#58 = Sunday ) THEN sales_price#59 END)), sum(UnscaledValue(CASE WHEN (d_day_name#58 = Monday ) THEN sales_price#59 END)), sum(UnscaledValue(CASE WHEN (d_day_name#58 = Tuesday ) THEN sales_price#59 END)), sum(UnscaledValue(CASE WHEN (d_day_name#58 = Wednesday) THEN sales_price#59 END)), sum(UnscaledValue(CASE WHEN (d_day_name#58 = Thursday ) THEN sales_price#59 END)), sum(UnscaledValue(CASE WHEN (d_day_name#58 = Friday ) THEN sales_price#59 END)), sum(UnscaledValue(CASE WHEN (d_day_name#58 = Saturday ) THEN sales_price#59 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#58 = Sunday ) THEN sales_price#59 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#58 = Monday ) THEN sales_price#59 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#58 = Tuesday ) THEN sales_price#59 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#58 = Wednesday) THEN sales_price#59 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#58 = Thursday ) THEN sales_price#59 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#58 = Friday ) THEN sales_price#59 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#58 = Saturday ) THEN sales_price#59 END))#32] +Results [8]: [d_week_seq#50, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#58 = Sunday ) THEN sales_price#59 END))#26,17,2) AS sun_sales#60, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#58 = Monday ) THEN sales_price#59 END))#27,17,2) AS mon_sales#61, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#58 = Tuesday ) THEN sales_price#59 END))#28,17,2) AS tue_sales#62, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#58 = Wednesday) THEN sales_price#59 END))#29,17,2) AS wed_sales#63, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#58 = Thursday ) THEN sales_price#59 END))#30,17,2) AS thu_sales#64, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#58 = Friday ) THEN sales_price#59 END))#31,17,2) AS fri_sales#65, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#58 = Saturday ) THEN sales_price#59 END))#32,17,2) AS sat_sales#66] + +(24) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_week_seq#67, d_year#68] +Arguments: [d_week_seq#67, d_year#68] + +(25) CometFilter +Input [2]: [d_week_seq#67, d_year#68] +Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2002)) AND isnotnull(d_week_seq#67)) + +(26) CometProject +Input [2]: [d_week_seq#67, d_year#68] +Arguments: [d_week_seq#67], [d_week_seq#67] + +(27) ColumnarToRow [codegen id : 4] +Input [1]: [d_week_seq#67] + +(28) BroadcastExchange +Input [1]: [d_week_seq#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [d_week_seq#50] +Right keys [1]: [d_week_seq#67] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [8]: [d_week_seq#50 AS d_week_seq2#69, sun_sales#60 AS sun_sales2#70, mon_sales#61 AS mon_sales2#71, tue_sales#62 AS tue_sales2#72, wed_sales#63 AS wed_sales2#73, thu_sales#64 AS thu_sales2#74, fri_sales#65 AS fri_sales2#75, sat_sales#66 AS sat_sales2#76] +Input [9]: [d_week_seq#50, sun_sales#60, mon_sales#61, tue_sales#62, wed_sales#63, thu_sales#64, fri_sales#65, sat_sales#66, d_week_seq#67] + +(31) BroadcastExchange +Input [8]: [d_week_seq2#69, sun_sales2#70, mon_sales2#71, tue_sales2#72, wed_sales2#73, thu_sales2#74, fri_sales2#75, sat_sales2#76] +Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [plan_id=4] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [d_week_seq1#42] +Right keys [1]: [(d_week_seq2#69 - 53)] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [8]: [d_week_seq1#42, round((sun_sales1#43 / sun_sales2#70), 2) AS round((sun_sales1 / sun_sales2), 2)#77, round((mon_sales1#44 / mon_sales2#71), 2) AS round((mon_sales1 / mon_sales2), 2)#78, round((tue_sales1#45 / tue_sales2#72), 2) AS round((tue_sales1 / tue_sales2), 2)#79, round((wed_sales1#46 / wed_sales2#73), 2) AS round((wed_sales1 / wed_sales2), 2)#80, round((thu_sales1#47 / thu_sales2#74), 2) AS round((thu_sales1 / thu_sales2), 2)#81, round((fri_sales1#48 / fri_sales2#75), 2) AS round((fri_sales1 / fri_sales2), 2)#82, round((sat_sales1#49 / sat_sales2#76), 2) AS round((sat_sales1 / sat_sales2), 2)#83] +Input [16]: [d_week_seq1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#69, sun_sales2#70, mon_sales2#71, tue_sales2#72, wed_sales2#73, thu_sales2#74, fri_sales2#75, sat_sales2#76] + +(34) CometColumnarExchange +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#77, round((mon_sales1 / mon_sales2), 2)#78, round((tue_sales1 / tue_sales2), 2)#79, round((wed_sales1 / wed_sales2), 2)#80, round((thu_sales1 / thu_sales2), 2)#81, round((fri_sales1 / fri_sales2), 2)#82, round((sat_sales1 / sat_sales2), 2)#83] +Arguments: rangepartitioning(d_week_seq1#42 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=5] + +(35) CometSort +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#77, round((mon_sales1 / mon_sales2), 2)#78, round((tue_sales1 / tue_sales2), 2)#79, round((wed_sales1 / wed_sales2), 2)#80, round((thu_sales1 / thu_sales2), 2)#81, round((fri_sales1 / fri_sales2), 2)#82, round((sat_sales1 / sat_sales2), 2)#83] +Arguments: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#77, round((mon_sales1 / mon_sales2), 2)#78, round((tue_sales1 / tue_sales2), 2)#79, round((wed_sales1 / wed_sales2), 2)#80, round((thu_sales1 / thu_sales2), 2)#81, round((fri_sales1 / fri_sales2), 2)#82, round((sat_sales1 / sat_sales2), 2)#83], [d_week_seq1#42 ASC NULLS FIRST] + +(36) ColumnarToRow [codegen id : 7] +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#77, round((mon_sales1 / mon_sales2), 2)#78, round((tue_sales1 / tue_sales2), 2)#79, round((wed_sales1 / wed_sales2), 2)#80, round((thu_sales1 / thu_sales2), 2)#81, round((fri_sales1 / fri_sales2), 2)#82, round((sat_sales1 / sat_sales2), 2)#83] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q2.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q2.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9137b04fe5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q2.native_datafusion/simplified.txt @@ -0,0 +1,51 @@ +WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometSort [d_week_seq1,round((sun_sales1 / sun_sales2), 2),round((mon_sales1 / mon_sales2), 2),round((tue_sales1 / tue_sales2), 2),round((wed_sales1 / wed_sales2), 2),round((thu_sales1 / thu_sales2), 2),round((fri_sales1 / fri_sales2), 2),round((sat_sales1 / sat_sales2), 2)] + CometColumnarExchange [d_week_seq1] #1 + WholeStageCodegen (6) + Project [d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] + BroadcastHashJoin [d_week_seq1,d_week_seq2] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [d_week_seq] #2 + WholeStageCodegen (1) + HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [sales_price,d_week_seq,d_day_name] + CometBroadcastHashJoin [sold_date_sk,sales_price,d_date_sk,d_week_seq,d_day_name] + CometUnion [sold_date_sk,sales_price] + CometProject [ws_sold_date_sk,ws_ext_sales_price] [sold_date_sk,sales_price] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_ext_sales_price,ws_sold_date_sk] + CometProject [cs_sold_date_sk,cs_ext_sales_price] [sold_date_sk,sales_price] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_week_seq,d_day_name] #3 + CometFilter [d_date_sk,d_week_seq,d_day_name] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_week_seq,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_week_seq,d_year] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q20.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q20.native_datafusion/explain.txt new file mode 100644 index 0000000000..81b0ce5c0b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q20.native_datafusion/explain.txt @@ -0,0 +1,120 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * Project (21) + +- Window (20) + +- * Sort (19) + +- Exchange (18) + +- * HashAggregate (17) + +- Exchange (16) + +- * HashAggregate (15) + +- * ColumnarToRow (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Arguments: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] + +(2) CometFilter +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [cs_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [cs_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) ColumnarToRow [codegen id : 1] +Input [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(15) HashAggregate [codegen id : 1] +Input [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(16) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(17) HashAggregate [codegen id : 2] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14] +Results [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#5] + +(18) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(19) Sort [codegen id : 3] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(20) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(21) Project [codegen id : 4] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5, _we0#17] + +(22) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q20.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q20.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2bc9570d10 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q20.native_datafusion/simplified.txt @@ -0,0 +1,30 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (4) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (3) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (2) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (1) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,cs_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #3 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q21.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q21.native_datafusion/explain.txt new file mode 100644 index 0000000000..180d4fd802 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q21.native_datafusion/explain.txt @@ -0,0 +1,131 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Filter (23) + +- * HashAggregate (22) + +- Exchange (21) + +- * HashAggregate (20) + +- * ColumnarToRow (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (3) + : +- CometBroadcastExchange (11) + : +- CometProject (10) + : +- CometFilter (9) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (8) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_warehouse_sk#2) AND isnotnull(inv_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [w_warehouse_sk#5, w_warehouse_name#6] + +(4) CometFilter +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Condition : isnotnull(w_warehouse_sk#5) + +(5) CometBroadcastExchange +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [w_warehouse_sk#5, w_warehouse_name#6] + +(6) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#5], Inner, BuildRight + +(7) CometProject +Input [6]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_sk#5, w_warehouse_name#6] +Arguments: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6], [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6] + +(8) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Arguments: [i_item_sk#7, i_item_id#8, i_current_price#9] + +(9) CometFilter +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Condition : (((isnotnull(i_current_price#9) AND (i_current_price#9 >= 0.99)) AND (i_current_price#9 <= 1.49)) AND isnotnull(i_item_sk#7)) + +(10) CometProject +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Arguments: [i_item_sk#7, i_item_id#8], [i_item_sk#7, i_item_id#8] + +(11) CometBroadcastExchange +Input [2]: [i_item_sk#7, i_item_id#8] +Arguments: [i_item_sk#7, i_item_id#8] + +(12) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6] +Right output [2]: [i_item_sk#7, i_item_id#8] +Arguments: [inv_item_sk#1], [i_item_sk#7], Inner, BuildRight + +(13) CometProject +Input [6]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_sk#7, i_item_id#8] +Arguments: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8], [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8] + +(14) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(15) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 2000-02-10)) AND (d_date#11 <= 2000-04-10)) AND isnotnull(d_date_sk#10)) + +(16) CometBroadcastExchange +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(17) CometBroadcastHashJoin +Left output [4]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8] +Right output [2]: [d_date_sk#10, d_date#11] +Arguments: [inv_date_sk#4], [d_date_sk#10], Inner, BuildRight + +(18) CometProject +Input [6]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8, d_date_sk#10, d_date#11] +Arguments: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11], [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] + +(19) ColumnarToRow [codegen id : 1] +Input [4]: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] + +(20) HashAggregate [codegen id : 1] +Input [4]: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] +Keys [2]: [w_warehouse_name#6, i_item_id#8] +Functions [2]: [partial_sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), partial_sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] +Aggregate Attributes [2]: [sum#12, sum#13] +Results [4]: [w_warehouse_name#6, i_item_id#8, sum#14, sum#15] + +(21) Exchange +Input [4]: [w_warehouse_name#6, i_item_id#8, sum#14, sum#15] +Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(22) HashAggregate [codegen id : 2] +Input [4]: [w_warehouse_name#6, i_item_id#8, sum#14, sum#15] +Keys [2]: [w_warehouse_name#6, i_item_id#8] +Functions [2]: [sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#16, sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#17] +Results [4]: [w_warehouse_name#6, i_item_id#8, sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#16 AS inv_before#18, sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#17 AS inv_after#19] + +(23) Filter [codegen id : 2] +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#18, inv_after#19] +Condition : (CASE WHEN (inv_before#18 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(inv_after#19 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(inv_before#18 as double)))))) >= 0.666667) END AND CASE WHEN (inv_before#18 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(inv_after#19 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(inv_before#18 as double)))))) <= 1.5) END) + +(24) TakeOrderedAndProject +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#18, inv_after#19] +Arguments: 100, [w_warehouse_name#6 ASC NULLS FIRST, i_item_id#8 ASC NULLS FIRST], [w_warehouse_name#6, i_item_id#8, inv_before#18, inv_after#19] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q21.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q21.native_datafusion/simplified.txt new file mode 100644 index 0000000000..4b5b8afc29 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q21.native_datafusion/simplified.txt @@ -0,0 +1,28 @@ +TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] + WholeStageCodegen (2) + Filter [inv_before,inv_after] + HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(CASE WHEN (d_date < 2000-03-11) THEN inv_quantity_on_hand ELSE 0 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN inv_quantity_on_hand ELSE 0 END),inv_before,inv_after,sum,sum] + InputAdapter + Exchange [w_warehouse_name,i_item_id] #1 + WholeStageCodegen (1) + HashAggregate [w_warehouse_name,i_item_id,d_date,inv_quantity_on_hand] [sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_id,d_date_sk,d_date] + CometProject [inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_id] + CometBroadcastHashJoin [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_sk,i_item_id] + CometProject [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_sk,w_warehouse_name] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #2 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [i_item_sk,i_item_id] #3 + CometProject [i_item_sk,i_item_id] + CometFilter [i_item_sk,i_item_id,i_current_price] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_current_price] + CometBroadcastExchange [d_date_sk,d_date] #4 + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q22.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q22.native_datafusion/explain.txt new file mode 100644 index 0000000000..c097db94ab --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q22.native_datafusion/explain.txt @@ -0,0 +1,131 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * ColumnarToRow (20) + +- CometExpand (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_month_seq#6] +Arguments: [d_date_sk#5, d_month_seq#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_month_seq#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [inv_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#5] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3], [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(10) CometFilter +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Condition : isnotnull(i_item_sk#7) + +(11) CometBroadcastExchange +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(12) CometBroadcastHashJoin +Left output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] +Right output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [inv_item_sk#1], [i_item_sk#7], Inner, BuildRight + +(13) CometProject +Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11], [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(14) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [1]: [w_warehouse_sk#12] +Arguments: [w_warehouse_sk#12] + +(15) CometFilter +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) + +(16) CometBroadcastExchange +Input [1]: [w_warehouse_sk#12] +Arguments: [w_warehouse_sk#12] + +(17) CometBroadcastHashJoin +Left output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Right output [1]: [w_warehouse_sk#12] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#12], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] +Arguments: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] + +(19) CometExpand +Input [5]: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] +Arguments: [[inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, null, 1], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, null, null, 3], [inv_quantity_on_hand#3, i_product_name#11, null, null, null, 7], [inv_quantity_on_hand#3, null, null, null, null, 15]], [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] + +(20) ColumnarToRow [codegen id : 1] +Input [6]: [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] + +(21) HashAggregate [codegen id : 1] +Input [6]: [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Functions [1]: [partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [sum#18, count#19] +Results [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] + +(22) Exchange +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Arguments: hashpartitioning(i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(23) HashAggregate [codegen id : 2] +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#22] +Results [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, avg(inv_quantity_on_hand#3)#22 AS qoh#23] + +(24) TakeOrderedAndProject +Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#23] +Arguments: 100, [qoh#23 ASC NULLS FIRST, i_product_name#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_category#16 ASC NULLS FIRST], [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q22.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q22.native_datafusion/simplified.txt new file mode 100644 index 0000000000..60fd5802d3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q22.native_datafusion/simplified.txt @@ -0,0 +1,28 @@ +TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] + WholeStageCodegen (2) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 + WholeStageCodegen (1) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count] + ColumnarToRow + InputAdapter + CometExpand [i_product_name,i_brand,i_class,i_category] [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category,spark_grouping_id] + CometProject [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name,w_warehouse_sk] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,d_date_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #3 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometBroadcastExchange [w_warehouse_sk] #4 + CometFilter [w_warehouse_sk] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23a.native_datafusion/explain.txt new file mode 100644 index 0000000000..67f7d13147 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23a.native_datafusion/explain.txt @@ -0,0 +1,484 @@ +== Physical Plan == +* HashAggregate (70) ++- Exchange (69) + +- * HashAggregate (68) + +- Union (67) + :- * Project (49) + : +- * BroadcastHashJoin Inner BuildRight (48) + : :- * Project (42) + : : +- * SortMergeJoin LeftSemi (41) + : : :- * ColumnarToRow (25) + : : : +- CometSort (24) + : : : +- CometExchange (23) + : : : +- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : +- CometBroadcastExchange (20) + : : : +- CometProject (19) + : : : +- CometFilter (18) + : : : +- CometHashAggregate (17) + : : : +- CometExchange (16) + : : : +- CometHashAggregate (15) + : : : +- CometProject (14) + : : : +- CometBroadcastHashJoin (13) + : : : :- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometFilter (3) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (2) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : : +- CometBroadcastExchange (12) + : : : +- CometFilter (11) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (10) + : : +- * Sort (40) + : : +- * Project (39) + : : +- * Filter (38) + : : +- * HashAggregate (37) + : : +- Exchange (36) + : : +- * HashAggregate (35) + : : +- * ColumnarToRow (34) + : : +- CometProject (33) + : : +- CometBroadcastHashJoin (32) + : : :- CometProject (28) + : : : +- CometFilter (27) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (26) + : : +- CometBroadcastExchange (31) + : : +- CometFilter (30) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (29) + : +- BroadcastExchange (47) + : +- * ColumnarToRow (46) + : +- CometProject (45) + : +- CometFilter (44) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (43) + +- * Project (66) + +- * BroadcastHashJoin Inner BuildRight (65) + :- * Project (63) + : +- * SortMergeJoin LeftSemi (62) + : :- * ColumnarToRow (56) + : : +- CometSort (55) + : : +- CometExchange (54) + : : +- CometProject (53) + : : +- CometBroadcastHashJoin (52) + : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (50) + : : +- ReusedExchange (51) + : +- * Sort (61) + : +- * Project (60) + : +- * Filter (59) + : +- * HashAggregate (58) + : +- ReusedExchange (57) + +- ReusedExchange (64) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(2) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#6, ss_sold_date_sk#7] + +(3) CometFilter +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_item_sk#6) + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Arguments: [d_date_sk#8, d_date#9, d_year#10] + +(5) CometFilter +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Arguments: [d_date_sk#8, d_date#9], [d_date_sk#8, d_date#9] + +(7) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: [d_date_sk#8, d_date#9] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_date#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [4]: [ss_item_sk#6, ss_sold_date_sk#7, d_date_sk#8, d_date#9] +Arguments: [ss_item_sk#6, d_date#9], [ss_item_sk#6, d_date#9] + +(10) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [i_item_sk#11, i_item_desc#12] + +(11) CometFilter +Input [2]: [i_item_sk#11, i_item_desc#12] +Condition : isnotnull(i_item_sk#11) + +(12) CometBroadcastExchange +Input [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [i_item_sk#11, i_item_desc#12] + +(13) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, d_date#9] +Right output [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [ss_item_sk#6], [i_item_sk#11], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#6, d_date#9, i_item_sk#11, i_item_desc#12] +Arguments: [d_date#9, i_item_sk#11, _groupingexpression#13], [d_date#9, i_item_sk#11, substr(i_item_desc#12, 1, 30) AS _groupingexpression#13] + +(15) CometHashAggregate +Input [3]: [d_date#9, i_item_sk#11, _groupingexpression#13] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [partial_count(1)] + +(16) CometExchange +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Arguments: hashpartitioning(_groupingexpression#13, i_item_sk#11, d_date#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(17) CometHashAggregate +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [count(1)] + +(18) CometFilter +Input [2]: [item_sk#15, cnt#16] +Condition : (cnt#16 > 4) + +(19) CometProject +Input [2]: [item_sk#15, cnt#16] +Arguments: [item_sk#15], [item_sk#15] + +(20) CometBroadcastExchange +Input [1]: [item_sk#15] +Arguments: [item_sk#15] + +(21) CometBroadcastHashJoin +Left output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [item_sk#15] +Arguments: [cs_item_sk#2], [item_sk#15], LeftSemi, BuildRight + +(22) CometProject +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(23) CometExchange +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(24) CometSort +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1 ASC NULLS FIRST] + +(25) ColumnarToRow [codegen id : 1] +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(26) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Arguments: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] + +(27) CometFilter +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Condition : isnotnull(ss_customer_sk#17) + +(28) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Arguments: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19], [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] + +(29) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21] + +(30) CometFilter +Input [1]: [c_customer_sk#21] +Condition : isnotnull(c_customer_sk#21) + +(31) CometBroadcastExchange +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21] + +(32) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] +Right output [1]: [c_customer_sk#21] +Arguments: [ss_customer_sk#17], [c_customer_sk#21], Inner, BuildRight + +(33) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Arguments: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21], [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] + +(34) ColumnarToRow [codegen id : 2] +Input [3]: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] + +(35) HashAggregate [codegen id : 2] +Input [3]: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Keys [1]: [c_customer_sk#21] +Functions [1]: [partial_sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] +Aggregate Attributes [2]: [sum#22, isEmpty#23] +Results [3]: [c_customer_sk#21, sum#24, isEmpty#25] + +(36) Exchange +Input [3]: [c_customer_sk#21, sum#24, isEmpty#25] +Arguments: hashpartitioning(c_customer_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(37) HashAggregate [codegen id : 3] +Input [3]: [c_customer_sk#21, sum#24, isEmpty#25] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))#26] +Results [2]: [c_customer_sk#21, sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))#26 AS ssales#27] + +(38) Filter [codegen id : 3] +Input [2]: [c_customer_sk#21, ssales#27] +Condition : (isnotnull(ssales#27) AND (cast(ssales#27 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#28, [id=#29]))) + +(39) Project [codegen id : 3] +Output [1]: [c_customer_sk#21] +Input [2]: [c_customer_sk#21, ssales#27] + +(40) Sort [codegen id : 3] +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0 + +(41) SortMergeJoin [codegen id : 5] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#21] +Join type: LeftSemi +Join condition: None + +(42) Project [codegen id : 5] +Output [3]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(43) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#30, d_year#31, d_moy#32] +Arguments: [d_date_sk#30, d_year#31, d_moy#32] + +(44) CometFilter +Input [3]: [d_date_sk#30, d_year#31, d_moy#32] +Condition : ((((isnotnull(d_year#31) AND isnotnull(d_moy#32)) AND (d_year#31 = 2000)) AND (d_moy#32 = 2)) AND isnotnull(d_date_sk#30)) + +(45) CometProject +Input [3]: [d_date_sk#30, d_year#31, d_moy#32] +Arguments: [d_date_sk#30], [d_date_sk#30] + +(46) ColumnarToRow [codegen id : 4] +Input [1]: [d_date_sk#30] + +(47) BroadcastExchange +Input [1]: [d_date_sk#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(48) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#30] +Join type: Inner +Join condition: None + +(49) Project [codegen id : 5] +Output [1]: [(cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4) AS sales#33] +Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#30] + +(50) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [5]: [ws_item_sk#34, ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38] +Arguments: [ws_item_sk#34, ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38] + +(51) ReusedExchange [Reuses operator id: 20] +Output [1]: [item_sk#39] + +(52) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#34, ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38] +Right output [1]: [item_sk#39] +Arguments: [ws_item_sk#34], [item_sk#39], LeftSemi, BuildRight + +(53) CometProject +Input [5]: [ws_item_sk#34, ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38] +Arguments: [ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38], [ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38] + +(54) CometExchange +Input [4]: [ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38] +Arguments: hashpartitioning(ws_bill_customer_sk#35, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(55) CometSort +Input [4]: [ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38] +Arguments: [ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38], [ws_bill_customer_sk#35 ASC NULLS FIRST] + +(56) ColumnarToRow [codegen id : 6] +Input [4]: [ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38] + +(57) ReusedExchange [Reuses operator id: 36] +Output [3]: [c_customer_sk#40, sum#41, isEmpty#42] + +(58) HashAggregate [codegen id : 8] +Input [3]: [c_customer_sk#40, sum#41, isEmpty#42] +Keys [1]: [c_customer_sk#40] +Functions [1]: [sum((cast(ss_quantity#43 as decimal(10,0)) * ss_sales_price#44))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#43 as decimal(10,0)) * ss_sales_price#44))#26] +Results [2]: [c_customer_sk#40, sum((cast(ss_quantity#43 as decimal(10,0)) * ss_sales_price#44))#26 AS ssales#45] + +(59) Filter [codegen id : 8] +Input [2]: [c_customer_sk#40, ssales#45] +Condition : (isnotnull(ssales#45) AND (cast(ssales#45 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#28, [id=#29]))) + +(60) Project [codegen id : 8] +Output [1]: [c_customer_sk#40] +Input [2]: [c_customer_sk#40, ssales#45] + +(61) Sort [codegen id : 8] +Input [1]: [c_customer_sk#40] +Arguments: [c_customer_sk#40 ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin [codegen id : 10] +Left keys [1]: [ws_bill_customer_sk#35] +Right keys [1]: [c_customer_sk#40] +Join type: LeftSemi +Join condition: None + +(63) Project [codegen id : 10] +Output [3]: [ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38] +Input [4]: [ws_bill_customer_sk#35, ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38] + +(64) ReusedExchange [Reuses operator id: 47] +Output [1]: [d_date_sk#46] + +(65) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#38] +Right keys [1]: [d_date_sk#46] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 10] +Output [1]: [(cast(ws_quantity#36 as decimal(10,0)) * ws_list_price#37) AS sales#47] +Input [4]: [ws_quantity#36, ws_list_price#37, ws_sold_date_sk#38, d_date_sk#46] + +(67) Union + +(68) HashAggregate [codegen id : 11] +Input [1]: [sales#33] +Keys: [] +Functions [1]: [partial_sum(sales#33)] +Aggregate Attributes [2]: [sum#48, isEmpty#49] +Results [2]: [sum#50, isEmpty#51] + +(69) Exchange +Input [2]: [sum#50, isEmpty#51] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(70) HashAggregate [codegen id : 12] +Input [2]: [sum#50, isEmpty#51] +Keys: [] +Functions [1]: [sum(sales#33)] +Aggregate Attributes [1]: [sum(sales#33)#52] +Results [1]: [sum(sales#33)#52 AS sum(sales)#53] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 38 Hosting Expression = Subquery scalar-subquery#28, [id=#29] +* HashAggregate (88) ++- Exchange (87) + +- * HashAggregate (86) + +- * HashAggregate (85) + +- Exchange (84) + +- * HashAggregate (83) + +- * ColumnarToRow (82) + +- CometProject (81) + +- CometBroadcastHashJoin (80) + :- CometProject (75) + : +- CometBroadcastHashJoin (74) + : :- CometFilter (72) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (71) + : +- ReusedExchange (73) + +- CometBroadcastExchange (79) + +- CometProject (78) + +- CometFilter (77) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (76) + + +(71) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#54, ss_quantity#55, ss_sales_price#56, ss_sold_date_sk#57] +Arguments: [ss_customer_sk#54, ss_quantity#55, ss_sales_price#56, ss_sold_date_sk#57] + +(72) CometFilter +Input [4]: [ss_customer_sk#54, ss_quantity#55, ss_sales_price#56, ss_sold_date_sk#57] +Condition : isnotnull(ss_customer_sk#54) + +(73) ReusedExchange [Reuses operator id: 31] +Output [1]: [c_customer_sk#58] + +(74) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#54, ss_quantity#55, ss_sales_price#56, ss_sold_date_sk#57] +Right output [1]: [c_customer_sk#58] +Arguments: [ss_customer_sk#54], [c_customer_sk#58], Inner, BuildRight + +(75) CometProject +Input [5]: [ss_customer_sk#54, ss_quantity#55, ss_sales_price#56, ss_sold_date_sk#57, c_customer_sk#58] +Arguments: [ss_quantity#55, ss_sales_price#56, ss_sold_date_sk#57, c_customer_sk#58], [ss_quantity#55, ss_sales_price#56, ss_sold_date_sk#57, c_customer_sk#58] + +(76) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#59, d_year#60] +Arguments: [d_date_sk#59, d_year#60] + +(77) CometFilter +Input [2]: [d_date_sk#59, d_year#60] +Condition : (d_year#60 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#59)) + +(78) CometProject +Input [2]: [d_date_sk#59, d_year#60] +Arguments: [d_date_sk#59], [d_date_sk#59] + +(79) CometBroadcastExchange +Input [1]: [d_date_sk#59] +Arguments: [d_date_sk#59] + +(80) CometBroadcastHashJoin +Left output [4]: [ss_quantity#55, ss_sales_price#56, ss_sold_date_sk#57, c_customer_sk#58] +Right output [1]: [d_date_sk#59] +Arguments: [ss_sold_date_sk#57], [d_date_sk#59], Inner, BuildRight + +(81) CometProject +Input [5]: [ss_quantity#55, ss_sales_price#56, ss_sold_date_sk#57, c_customer_sk#58, d_date_sk#59] +Arguments: [ss_quantity#55, ss_sales_price#56, c_customer_sk#58], [ss_quantity#55, ss_sales_price#56, c_customer_sk#58] + +(82) ColumnarToRow [codegen id : 1] +Input [3]: [ss_quantity#55, ss_sales_price#56, c_customer_sk#58] + +(83) HashAggregate [codegen id : 1] +Input [3]: [ss_quantity#55, ss_sales_price#56, c_customer_sk#58] +Keys [1]: [c_customer_sk#58] +Functions [1]: [partial_sum((cast(ss_quantity#55 as decimal(10,0)) * ss_sales_price#56))] +Aggregate Attributes [2]: [sum#61, isEmpty#62] +Results [3]: [c_customer_sk#58, sum#63, isEmpty#64] + +(84) Exchange +Input [3]: [c_customer_sk#58, sum#63, isEmpty#64] +Arguments: hashpartitioning(c_customer_sk#58, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(85) HashAggregate [codegen id : 2] +Input [3]: [c_customer_sk#58, sum#63, isEmpty#64] +Keys [1]: [c_customer_sk#58] +Functions [1]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_sales_price#56))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_sales_price#56))#65] +Results [1]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_sales_price#56))#65 AS csales#66] + +(86) HashAggregate [codegen id : 2] +Input [1]: [csales#66] +Keys: [] +Functions [1]: [partial_max(csales#66)] +Aggregate Attributes [1]: [max#67] +Results [1]: [max#68] + +(87) Exchange +Input [1]: [max#68] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(88) HashAggregate [codegen id : 3] +Input [1]: [max#68] +Keys: [] +Functions [1]: [max(csales#66)] +Aggregate Attributes [1]: [max(csales#66)#69] +Results [1]: [max(csales#66)#69 AS tpcds_cmax#70] + +Subquery:2 Hosting operator id = 59 Hosting Expression = ReusedSubquery Subquery scalar-subquery#28, [id=#29] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..aa333d7ff5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23a.native_datafusion/simplified.txt @@ -0,0 +1,120 @@ +WholeStageCodegen (12) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (11) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (5) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_list_price,cs_sold_date_sk] + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometExchange [cs_bill_customer_sk] #2 + CometProject [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk,item_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastExchange [item_sk] #3 + CometProject [item_sk] + CometFilter [item_sk,cnt] + CometHashAggregate [item_sk,cnt,_groupingexpression,i_item_sk,d_date,count,count(1)] + CometExchange [_groupingexpression,i_item_sk,d_date] #4 + CometHashAggregate [_groupingexpression,i_item_sk,d_date,count] + CometProject [i_item_desc] [d_date,i_item_sk,_groupingexpression] + CometBroadcastHashJoin [ss_item_sk,d_date,i_item_sk,i_item_desc] + CometProject [ss_item_sk,d_date] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #5 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_year] + CometBroadcastExchange [i_item_sk,i_item_desc] #6 + CometFilter [i_item_sk,i_item_desc] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_desc] + InputAdapter + WholeStageCodegen (3) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + Subquery #1 + WholeStageCodegen (3) + HashAggregate [max] [max(csales),tpcds_cmax,max] + InputAdapter + Exchange #9 + WholeStageCodegen (2) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),csales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (1) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk,d_date_sk] + CometProject [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + ReusedExchange [c_customer_sk] #8 + CometBroadcastExchange [d_date_sk] #11 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (2) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,c_customer_sk] + CometProject [ss_customer_sk,ss_quantity,ss_sales_price] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [c_customer_sk] #8 + CometFilter [c_customer_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + WholeStageCodegen (10) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk] + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (6) + ColumnarToRow + InputAdapter + CometSort [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometExchange [ws_bill_customer_sk] #13 + CometProject [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,item_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [item_sk] #3 + InputAdapter + WholeStageCodegen (8) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + ReusedExchange [d_date_sk] #12 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23b.native_datafusion/explain.txt new file mode 100644 index 0000000000..f02a51d2f8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23b.native_datafusion/explain.txt @@ -0,0 +1,603 @@ +== Physical Plan == +TakeOrderedAndProject (91) ++- Union (90) + :- * HashAggregate (66) + : +- Exchange (65) + : +- * HashAggregate (64) + : +- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- * Project (56) + : : +- * BroadcastHashJoin Inner BuildRight (55) + : : :- * SortMergeJoin LeftSemi (42) + : : : :- * ColumnarToRow (26) + : : : : +- CometSort (25) + : : : : +- CometExchange (24) + : : : : +- CometProject (23) + : : : : +- CometBroadcastHashJoin (22) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : +- CometBroadcastExchange (21) + : : : : +- CometProject (20) + : : : : +- CometFilter (19) + : : : : +- CometHashAggregate (18) + : : : : +- CometExchange (17) + : : : : +- CometHashAggregate (16) + : : : : +- CometProject (15) + : : : : +- CometBroadcastHashJoin (14) + : : : : :- CometProject (10) + : : : : : +- CometBroadcastHashJoin (9) + : : : : : :- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : : +- CometBroadcastExchange (8) + : : : : : +- CometProject (7) + : : : : : +- CometFilter (6) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (5) + : : : : +- CometBroadcastExchange (13) + : : : : +- CometFilter (12) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (11) + : : : +- * Sort (41) + : : : +- * Project (40) + : : : +- * Filter (39) + : : : +- * HashAggregate (38) + : : : +- Exchange (37) + : : : +- * HashAggregate (36) + : : : +- * ColumnarToRow (35) + : : : +- CometProject (34) + : : : +- CometBroadcastHashJoin (33) + : : : :- CometProject (29) + : : : : +- CometFilter (28) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (27) + : : : +- CometBroadcastExchange (32) + : : : +- CometFilter (31) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (30) + : : +- BroadcastExchange (54) + : : +- * SortMergeJoin LeftSemi (53) + : : :- * ColumnarToRow (47) + : : : +- CometSort (46) + : : : +- CometExchange (45) + : : : +- CometFilter (44) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (43) + : : +- * Sort (52) + : : +- * Project (51) + : : +- * Filter (50) + : : +- * HashAggregate (49) + : : +- ReusedExchange (48) + : +- BroadcastExchange (61) + : +- * ColumnarToRow (60) + : +- CometProject (59) + : +- CometFilter (58) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (57) + +- * HashAggregate (89) + +- Exchange (88) + +- * HashAggregate (87) + +- * Project (86) + +- * BroadcastHashJoin Inner BuildRight (85) + :- * Project (83) + : +- * BroadcastHashJoin Inner BuildRight (82) + : :- * SortMergeJoin LeftSemi (80) + : : :- * ColumnarToRow (74) + : : : +- CometSort (73) + : : : +- CometExchange (72) + : : : +- CometProject (71) + : : : +- CometBroadcastHashJoin (70) + : : : :- CometFilter (68) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (67) + : : : +- ReusedExchange (69) + : : +- * Sort (79) + : : +- * Project (78) + : : +- * Filter (77) + : : +- * HashAggregate (76) + : : +- ReusedExchange (75) + : +- ReusedExchange (81) + +- ReusedExchange (84) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(2) CometFilter +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_bill_customer_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#6, ss_sold_date_sk#7] + +(4) CometFilter +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_item_sk#6) + +(5) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Arguments: [d_date_sk#8, d_date#9, d_year#10] + +(6) CometFilter +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(7) CometProject +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Arguments: [d_date_sk#8, d_date#9], [d_date_sk#8, d_date#9] + +(8) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: [d_date_sk#8, d_date#9] + +(9) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_date#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(10) CometProject +Input [4]: [ss_item_sk#6, ss_sold_date_sk#7, d_date_sk#8, d_date#9] +Arguments: [ss_item_sk#6, d_date#9], [ss_item_sk#6, d_date#9] + +(11) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [i_item_sk#11, i_item_desc#12] + +(12) CometFilter +Input [2]: [i_item_sk#11, i_item_desc#12] +Condition : isnotnull(i_item_sk#11) + +(13) CometBroadcastExchange +Input [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [i_item_sk#11, i_item_desc#12] + +(14) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#6, d_date#9] +Right output [2]: [i_item_sk#11, i_item_desc#12] +Arguments: [ss_item_sk#6], [i_item_sk#11], Inner, BuildRight + +(15) CometProject +Input [4]: [ss_item_sk#6, d_date#9, i_item_sk#11, i_item_desc#12] +Arguments: [d_date#9, i_item_sk#11, _groupingexpression#13], [d_date#9, i_item_sk#11, substr(i_item_desc#12, 1, 30) AS _groupingexpression#13] + +(16) CometHashAggregate +Input [3]: [d_date#9, i_item_sk#11, _groupingexpression#13] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [partial_count(1)] + +(17) CometExchange +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Arguments: hashpartitioning(_groupingexpression#13, i_item_sk#11, d_date#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(18) CometHashAggregate +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#14] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [count(1)] + +(19) CometFilter +Input [2]: [item_sk#15, cnt#16] +Condition : (cnt#16 > 4) + +(20) CometProject +Input [2]: [item_sk#15, cnt#16] +Arguments: [item_sk#15], [item_sk#15] + +(21) CometBroadcastExchange +Input [1]: [item_sk#15] +Arguments: [item_sk#15] + +(22) CometBroadcastHashJoin +Left output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Right output [1]: [item_sk#15] +Arguments: [cs_item_sk#2], [item_sk#15], LeftSemi, BuildRight + +(23) CometProject +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(24) CometExchange +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(25) CometSort +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5], [cs_bill_customer_sk#1 ASC NULLS FIRST] + +(26) ColumnarToRow [codegen id : 1] +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(27) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Arguments: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] + +(28) CometFilter +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Condition : isnotnull(ss_customer_sk#17) + +(29) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, ss_sold_date_sk#20] +Arguments: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19], [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21] + +(31) CometFilter +Input [1]: [c_customer_sk#21] +Condition : isnotnull(c_customer_sk#21) + +(32) CometBroadcastExchange +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21] + +(33) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19] +Right output [1]: [c_customer_sk#21] +Arguments: [ss_customer_sk#17], [c_customer_sk#21], Inner, BuildRight + +(34) CometProject +Input [4]: [ss_customer_sk#17, ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Arguments: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21], [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] + +(35) ColumnarToRow [codegen id : 2] +Input [3]: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] + +(36) HashAggregate [codegen id : 2] +Input [3]: [ss_quantity#18, ss_sales_price#19, c_customer_sk#21] +Keys [1]: [c_customer_sk#21] +Functions [1]: [partial_sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] +Aggregate Attributes [2]: [sum#22, isEmpty#23] +Results [3]: [c_customer_sk#21, sum#24, isEmpty#25] + +(37) Exchange +Input [3]: [c_customer_sk#21, sum#24, isEmpty#25] +Arguments: hashpartitioning(c_customer_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(38) HashAggregate [codegen id : 3] +Input [3]: [c_customer_sk#21, sum#24, isEmpty#25] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))#26] +Results [2]: [c_customer_sk#21, sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))#26 AS ssales#27] + +(39) Filter [codegen id : 3] +Input [2]: [c_customer_sk#21, ssales#27] +Condition : (isnotnull(ssales#27) AND (cast(ssales#27 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#28, [id=#29]))) + +(40) Project [codegen id : 3] +Output [1]: [c_customer_sk#21] +Input [2]: [c_customer_sk#21, ssales#27] + +(41) Sort [codegen id : 3] +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0 + +(42) SortMergeJoin [codegen id : 9] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#21] +Join type: LeftSemi +Join condition: None + +(43) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Arguments: [c_customer_sk#30, c_first_name#31, c_last_name#32] + +(44) CometFilter +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Condition : isnotnull(c_customer_sk#30) + +(45) CometExchange +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Arguments: hashpartitioning(c_customer_sk#30, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(46) CometSort +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Arguments: [c_customer_sk#30, c_first_name#31, c_last_name#32], [c_customer_sk#30 ASC NULLS FIRST] + +(47) ColumnarToRow [codegen id : 4] +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] + +(48) ReusedExchange [Reuses operator id: 37] +Output [3]: [c_customer_sk#21, sum#24, isEmpty#25] + +(49) HashAggregate [codegen id : 6] +Input [3]: [c_customer_sk#21, sum#24, isEmpty#25] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))#26] +Results [2]: [c_customer_sk#21, sum((cast(ss_quantity#18 as decimal(10,0)) * ss_sales_price#19))#26 AS ssales#27] + +(50) Filter [codegen id : 6] +Input [2]: [c_customer_sk#21, ssales#27] +Condition : (isnotnull(ssales#27) AND (cast(ssales#27 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#28, [id=#29]))) + +(51) Project [codegen id : 6] +Output [1]: [c_customer_sk#21] +Input [2]: [c_customer_sk#21, ssales#27] + +(52) Sort [codegen id : 6] +Input [1]: [c_customer_sk#21] +Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin [codegen id : 7] +Left keys [1]: [c_customer_sk#30] +Right keys [1]: [c_customer_sk#21] +Join type: LeftSemi +Join condition: None + +(54) BroadcastExchange +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(55) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#30] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 9] +Output [5]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#31, c_last_name#32] +Input [7]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_customer_sk#30, c_first_name#31, c_last_name#32] + +(57) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Arguments: [d_date_sk#33, d_year#34, d_moy#35] + +(58) CometFilter +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2000)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(59) CometProject +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Arguments: [d_date_sk#33], [d_date_sk#33] + +(60) ColumnarToRow [codegen id : 8] +Input [1]: [d_date_sk#33] + +(61) BroadcastExchange +Input [1]: [d_date_sk#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(62) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 9] +Output [4]: [cs_quantity#3, cs_list_price#4, c_first_name#31, c_last_name#32] +Input [6]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#31, c_last_name#32, d_date_sk#33] + +(64) HashAggregate [codegen id : 9] +Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#31, c_last_name#32] +Keys [2]: [c_last_name#32, c_first_name#31] +Functions [1]: [partial_sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [4]: [c_last_name#32, c_first_name#31, sum#38, isEmpty#39] + +(65) Exchange +Input [4]: [c_last_name#32, c_first_name#31, sum#38, isEmpty#39] +Arguments: hashpartitioning(c_last_name#32, c_first_name#31, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(66) HashAggregate [codegen id : 10] +Input [4]: [c_last_name#32, c_first_name#31, sum#38, isEmpty#39] +Keys [2]: [c_last_name#32, c_first_name#31] +Functions [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))] +Aggregate Attributes [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))#40] +Results [3]: [c_last_name#32, c_first_name#31, sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))#40 AS sales#41] + +(67) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [5]: [ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46] +Arguments: [ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46] + +(68) CometFilter +Input [5]: [ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46] +Condition : isnotnull(ws_bill_customer_sk#43) + +(69) ReusedExchange [Reuses operator id: 21] +Output [1]: [item_sk#47] + +(70) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46] +Right output [1]: [item_sk#47] +Arguments: [ws_item_sk#42], [item_sk#47], LeftSemi, BuildRight + +(71) CometProject +Input [5]: [ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46] +Arguments: [ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46], [ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46] + +(72) CometExchange +Input [4]: [ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46] +Arguments: hashpartitioning(ws_bill_customer_sk#43, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(73) CometSort +Input [4]: [ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46] +Arguments: [ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46], [ws_bill_customer_sk#43 ASC NULLS FIRST] + +(74) ColumnarToRow [codegen id : 11] +Input [4]: [ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46] + +(75) ReusedExchange [Reuses operator id: 37] +Output [3]: [c_customer_sk#48, sum#49, isEmpty#50] + +(76) HashAggregate [codegen id : 13] +Input [3]: [c_customer_sk#48, sum#49, isEmpty#50] +Keys [1]: [c_customer_sk#48] +Functions [1]: [sum((cast(ss_quantity#51 as decimal(10,0)) * ss_sales_price#52))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#51 as decimal(10,0)) * ss_sales_price#52))#26] +Results [2]: [c_customer_sk#48, sum((cast(ss_quantity#51 as decimal(10,0)) * ss_sales_price#52))#26 AS ssales#53] + +(77) Filter [codegen id : 13] +Input [2]: [c_customer_sk#48, ssales#53] +Condition : (isnotnull(ssales#53) AND (cast(ssales#53 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#28, [id=#29]))) + +(78) Project [codegen id : 13] +Output [1]: [c_customer_sk#48] +Input [2]: [c_customer_sk#48, ssales#53] + +(79) Sort [codegen id : 13] +Input [1]: [c_customer_sk#48] +Arguments: [c_customer_sk#48 ASC NULLS FIRST], false, 0 + +(80) SortMergeJoin [codegen id : 19] +Left keys [1]: [ws_bill_customer_sk#43] +Right keys [1]: [c_customer_sk#48] +Join type: LeftSemi +Join condition: None + +(81) ReusedExchange [Reuses operator id: 54] +Output [3]: [c_customer_sk#54, c_first_name#55, c_last_name#56] + +(82) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ws_bill_customer_sk#43] +Right keys [1]: [c_customer_sk#54] +Join type: Inner +Join condition: None + +(83) Project [codegen id : 19] +Output [5]: [ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46, c_first_name#55, c_last_name#56] +Input [7]: [ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46, c_customer_sk#54, c_first_name#55, c_last_name#56] + +(84) ReusedExchange [Reuses operator id: 61] +Output [1]: [d_date_sk#57] + +(85) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ws_sold_date_sk#46] +Right keys [1]: [d_date_sk#57] +Join type: Inner +Join condition: None + +(86) Project [codegen id : 19] +Output [4]: [ws_quantity#44, ws_list_price#45, c_first_name#55, c_last_name#56] +Input [6]: [ws_quantity#44, ws_list_price#45, ws_sold_date_sk#46, c_first_name#55, c_last_name#56, d_date_sk#57] + +(87) HashAggregate [codegen id : 19] +Input [4]: [ws_quantity#44, ws_list_price#45, c_first_name#55, c_last_name#56] +Keys [2]: [c_last_name#56, c_first_name#55] +Functions [1]: [partial_sum((cast(ws_quantity#44 as decimal(10,0)) * ws_list_price#45))] +Aggregate Attributes [2]: [sum#58, isEmpty#59] +Results [4]: [c_last_name#56, c_first_name#55, sum#60, isEmpty#61] + +(88) Exchange +Input [4]: [c_last_name#56, c_first_name#55, sum#60, isEmpty#61] +Arguments: hashpartitioning(c_last_name#56, c_first_name#55, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(89) HashAggregate [codegen id : 20] +Input [4]: [c_last_name#56, c_first_name#55, sum#60, isEmpty#61] +Keys [2]: [c_last_name#56, c_first_name#55] +Functions [1]: [sum((cast(ws_quantity#44 as decimal(10,0)) * ws_list_price#45))] +Aggregate Attributes [1]: [sum((cast(ws_quantity#44 as decimal(10,0)) * ws_list_price#45))#62] +Results [3]: [c_last_name#56, c_first_name#55, sum((cast(ws_quantity#44 as decimal(10,0)) * ws_list_price#45))#62 AS sales#63] + +(90) Union + +(91) TakeOrderedAndProject +Input [3]: [c_last_name#32, c_first_name#31, sales#41] +Arguments: 100, [c_last_name#32 ASC NULLS FIRST, c_first_name#31 ASC NULLS FIRST, sales#41 ASC NULLS FIRST], [c_last_name#32, c_first_name#31, sales#41] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 39 Hosting Expression = Subquery scalar-subquery#28, [id=#29] +* HashAggregate (109) ++- Exchange (108) + +- * HashAggregate (107) + +- * HashAggregate (106) + +- Exchange (105) + +- * HashAggregate (104) + +- * ColumnarToRow (103) + +- CometProject (102) + +- CometBroadcastHashJoin (101) + :- CometProject (96) + : +- CometBroadcastHashJoin (95) + : :- CometFilter (93) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (92) + : +- ReusedExchange (94) + +- CometBroadcastExchange (100) + +- CometProject (99) + +- CometFilter (98) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (97) + + +(92) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#64, ss_quantity#65, ss_sales_price#66, ss_sold_date_sk#67] +Arguments: [ss_customer_sk#64, ss_quantity#65, ss_sales_price#66, ss_sold_date_sk#67] + +(93) CometFilter +Input [4]: [ss_customer_sk#64, ss_quantity#65, ss_sales_price#66, ss_sold_date_sk#67] +Condition : isnotnull(ss_customer_sk#64) + +(94) ReusedExchange [Reuses operator id: 32] +Output [1]: [c_customer_sk#68] + +(95) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#64, ss_quantity#65, ss_sales_price#66, ss_sold_date_sk#67] +Right output [1]: [c_customer_sk#68] +Arguments: [ss_customer_sk#64], [c_customer_sk#68], Inner, BuildRight + +(96) CometProject +Input [5]: [ss_customer_sk#64, ss_quantity#65, ss_sales_price#66, ss_sold_date_sk#67, c_customer_sk#68] +Arguments: [ss_quantity#65, ss_sales_price#66, ss_sold_date_sk#67, c_customer_sk#68], [ss_quantity#65, ss_sales_price#66, ss_sold_date_sk#67, c_customer_sk#68] + +(97) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#69, d_year#70] +Arguments: [d_date_sk#69, d_year#70] + +(98) CometFilter +Input [2]: [d_date_sk#69, d_year#70] +Condition : (d_year#70 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#69)) + +(99) CometProject +Input [2]: [d_date_sk#69, d_year#70] +Arguments: [d_date_sk#69], [d_date_sk#69] + +(100) CometBroadcastExchange +Input [1]: [d_date_sk#69] +Arguments: [d_date_sk#69] + +(101) CometBroadcastHashJoin +Left output [4]: [ss_quantity#65, ss_sales_price#66, ss_sold_date_sk#67, c_customer_sk#68] +Right output [1]: [d_date_sk#69] +Arguments: [ss_sold_date_sk#67], [d_date_sk#69], Inner, BuildRight + +(102) CometProject +Input [5]: [ss_quantity#65, ss_sales_price#66, ss_sold_date_sk#67, c_customer_sk#68, d_date_sk#69] +Arguments: [ss_quantity#65, ss_sales_price#66, c_customer_sk#68], [ss_quantity#65, ss_sales_price#66, c_customer_sk#68] + +(103) ColumnarToRow [codegen id : 1] +Input [3]: [ss_quantity#65, ss_sales_price#66, c_customer_sk#68] + +(104) HashAggregate [codegen id : 1] +Input [3]: [ss_quantity#65, ss_sales_price#66, c_customer_sk#68] +Keys [1]: [c_customer_sk#68] +Functions [1]: [partial_sum((cast(ss_quantity#65 as decimal(10,0)) * ss_sales_price#66))] +Aggregate Attributes [2]: [sum#71, isEmpty#72] +Results [3]: [c_customer_sk#68, sum#73, isEmpty#74] + +(105) Exchange +Input [3]: [c_customer_sk#68, sum#73, isEmpty#74] +Arguments: hashpartitioning(c_customer_sk#68, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(106) HashAggregate [codegen id : 2] +Input [3]: [c_customer_sk#68, sum#73, isEmpty#74] +Keys [1]: [c_customer_sk#68] +Functions [1]: [sum((cast(ss_quantity#65 as decimal(10,0)) * ss_sales_price#66))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#65 as decimal(10,0)) * ss_sales_price#66))#75] +Results [1]: [sum((cast(ss_quantity#65 as decimal(10,0)) * ss_sales_price#66))#75 AS csales#76] + +(107) HashAggregate [codegen id : 2] +Input [1]: [csales#76] +Keys: [] +Functions [1]: [partial_max(csales#76)] +Aggregate Attributes [1]: [max#77] +Results [1]: [max#78] + +(108) Exchange +Input [1]: [max#78] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] + +(109) HashAggregate [codegen id : 3] +Input [1]: [max#78] +Keys: [] +Functions [1]: [max(csales#76)] +Aggregate Attributes [1]: [max(csales#76)#79] +Results [1]: [max(csales#76)#79 AS tpcds_cmax#80] + +Subquery:2 Hosting operator id = 50 Hosting Expression = ReusedSubquery Subquery scalar-subquery#28, [id=#29] + +Subquery:3 Hosting operator id = 77 Hosting Expression = ReusedSubquery Subquery scalar-subquery#28, [id=#29] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f29c0e5e18 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q23b.native_datafusion/simplified.txt @@ -0,0 +1,151 @@ +TakeOrderedAndProject [c_last_name,c_first_name,sales] + Union + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),sales,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name] #1 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,cs_quantity,cs_list_price] [sum,isEmpty,sum,isEmpty] + Project [cs_quantity,cs_list_price,c_first_name,c_last_name] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_list_price,cs_sold_date_sk,c_first_name,c_last_name] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometExchange [cs_bill_customer_sk] #2 + CometProject [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk,item_sk] + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastExchange [item_sk] #3 + CometProject [item_sk] + CometFilter [item_sk,cnt] + CometHashAggregate [item_sk,cnt,_groupingexpression,i_item_sk,d_date,count,count(1)] + CometExchange [_groupingexpression,i_item_sk,d_date] #4 + CometHashAggregate [_groupingexpression,i_item_sk,d_date,count] + CometProject [i_item_desc] [d_date,i_item_sk,_groupingexpression] + CometBroadcastHashJoin [ss_item_sk,d_date,i_item_sk,i_item_desc] + CometProject [ss_item_sk,d_date] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #5 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_year] + CometBroadcastExchange [i_item_sk,i_item_desc] #6 + CometFilter [i_item_sk,i_item_desc] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_desc] + InputAdapter + WholeStageCodegen (3) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + Subquery #1 + WholeStageCodegen (3) + HashAggregate [max] [max(csales),tpcds_cmax,max] + InputAdapter + Exchange #9 + WholeStageCodegen (2) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),csales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (1) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk,d_date_sk] + CometProject [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + ReusedExchange [c_customer_sk] #8 + CometBroadcastExchange [d_date_sk] #11 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (2) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [ss_quantity,ss_sales_price,c_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,c_customer_sk] + CometProject [ss_customer_sk,ss_quantity,ss_sales_price] + CometFilter [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [c_customer_sk] #8 + CometFilter [c_customer_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (7) + SortMergeJoin [c_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometSort [c_customer_sk,c_first_name,c_last_name] + CometExchange [c_customer_sk] #13 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (6) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (8) + ColumnarToRow + InputAdapter + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + WholeStageCodegen (20) + HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),sales,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name] #15 + WholeStageCodegen (19) + HashAggregate [c_last_name,c_first_name,ws_quantity,ws_list_price] [sum,isEmpty,sum,isEmpty] + Project [ws_quantity,ws_list_price,c_first_name,c_last_name] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk,c_first_name,c_last_name] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (11) + ColumnarToRow + InputAdapter + CometSort [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometExchange [ws_bill_customer_sk] #16 + CometProject [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk,item_sk] + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [item_sk] #3 + InputAdapter + WholeStageCodegen (13) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #12 + InputAdapter + ReusedExchange [d_date_sk] #14 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24a.native_datafusion/explain.txt new file mode 100644 index 0000000000..5d63cafbe5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24a.native_datafusion/explain.txt @@ -0,0 +1,382 @@ +== Physical Plan == +* Filter (42) ++- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * ColumnarToRow (29) + : +- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (12) + : : : : +- CometSortMergeJoin (11) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometSort (10) + : : : : +- CometExchange (9) + : : : : +- CometProject (8) + : : : : +- CometFilter (7) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (6) + : : : +- CometBroadcastExchange (16) + : : : +- CometProject (15) + : : : +- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (13) + : : +- CometBroadcastExchange (21) + : : +- CometFilter (20) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (19) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(3) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(4) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(7) CometFilter +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(8) CometProject +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_item_sk#7, sr_ticket_number#8] + +(9) CometExchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST] + +(11) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Right output [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_ticket_number#4, ss_item_sk#1], [sr_ticket_number#8, sr_item_sk#7], Inner + +(12) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] + +(13) CometNativeScan: `spark_catalog`.`default`.`store` +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(14) CometFilter +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(15) CometProject +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14], [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(16) CometBroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Right output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(18) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14], [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] + +(19) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(20) CometFilter +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) + +(21) CometBroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(22) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Right output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(23) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20], [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(25) CometFilter +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) + +(26) CometBroadcastExchange +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(27) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Right output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_customer_sk#2], [c_customer_sk#21], Inner, BuildRight + +(28) CometProject +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24], [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(29) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: [ca_state#25, ca_zip#26, ca_country#27] + +(31) CometFilter +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(32) ColumnarToRow [codegen id : 1] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(33) BroadcastExchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 2] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(36) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] + +(37) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(39) HashAggregate [codegen id : 3] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] + +(40) Exchange +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(41) HashAggregate [codegen id : 4] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] + +(42) Filter [codegen id : 4] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (69) ++- Exchange (68) + +- * HashAggregate (67) + +- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * Project (63) + +- * BroadcastHashJoin Inner BuildRight (62) + :- * ColumnarToRow (60) + : +- CometProject (59) + : +- CometBroadcastHashJoin (58) + : :- CometProject (56) + : : +- CometBroadcastHashJoin (55) + : : :- CometProject (51) + : : : +- CometBroadcastHashJoin (50) + : : : :- CometProject (48) + : : : : +- CometSortMergeJoin (47) + : : : : :- CometSort (44) + : : : : : +- ReusedExchange (43) + : : : : +- CometSort (46) + : : : : +- ReusedExchange (45) + : : : +- ReusedExchange (49) + : : +- CometBroadcastExchange (54) + : : +- CometFilter (53) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (52) + : +- ReusedExchange (57) + +- ReusedExchange (61) + + +(43) ReusedExchange [Reuses operator id: 4] +Output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] + +(44) CometSort +Input [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44], [ss_ticket_number#43 ASC NULLS FIRST, ss_item_sk#40 ASC NULLS FIRST] + +(45) ReusedExchange [Reuses operator id: 9] +Output [2]: [sr_item_sk#45, sr_ticket_number#46] + +(46) CometSort +Input [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [sr_item_sk#45, sr_ticket_number#46], [sr_ticket_number#46 ASC NULLS FIRST, sr_item_sk#45 ASC NULLS FIRST] + +(47) CometSortMergeJoin +Left output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Right output [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_ticket_number#43, ss_item_sk#40], [sr_ticket_number#46, sr_item_sk#45], Inner + +(48) CometProject +Input [7]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44, sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44], [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] + +(49) ReusedExchange [Reuses operator id: 16] +Output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] + +(50) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] +Right output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_store_sk#42], [s_store_sk#47], Inner, BuildRight + +(51) CometProject +Input [8]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44, s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50], [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] + +(52) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(53) CometFilter +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Condition : isnotnull(i_item_sk#51) + +(54) CometBroadcastExchange +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(55) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] +Right output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_item_sk#40], [i_item_sk#51], Inner, BuildRight + +(56) CometProject +Input [12]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56], [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(57) ReusedExchange [Reuses operator id: 26] +Output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] + +(58) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Right output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_customer_sk#41], [c_customer_sk#57], Inner, BuildRight + +(59) CometProject +Input [14]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60], [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(60) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(61) ReusedExchange [Reuses operator id: 33] +Output [3]: [ca_state#61, ca_zip#62, ca_country#63] + +(62) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#60, s_zip#50] +Right keys [2]: [upper(ca_country#63), ca_zip#62] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 2] +Output [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Input [15]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60, ca_state#61, ca_zip#62, ca_country#63] + +(64) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum#64] +Results [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] + +(65) Exchange +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Arguments: hashpartitioning(c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(66) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#44))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#44))#30,17,2) AS netpaid#66] + +(67) HashAggregate [codegen id : 3] +Input [1]: [netpaid#66] +Keys: [] +Functions [1]: [partial_avg(netpaid#66)] +Aggregate Attributes [2]: [sum#67, count#68] +Results [2]: [sum#69, count#70] + +(68) Exchange +Input [2]: [sum#69, count#70] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(69) HashAggregate [codegen id : 4] +Input [2]: [sum#69, count#70] +Keys: [] +Functions [1]: [avg(netpaid#66)] +Aggregate Attributes [1]: [avg(netpaid#66)#71] +Results [1]: [(0.05 * avg(netpaid#66)#71) AS (0.05 * avg(netpaid))#72] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c1bf41667f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24a.native_datafusion/simplified.txt @@ -0,0 +1,86 @@ +WholeStageCodegen (4) + Filter [paid] + Subquery #1 + WholeStageCodegen (4) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen (3) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #10 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #3 + CometSort [sr_item_sk,sr_ticket_number] + ReusedExchange [sr_item_sk,sr_ticket_number] #4 + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #11 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + InputAdapter + ReusedExchange [ca_state,ca_zip,ca_country] #8 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometExchange [ss_ticket_number,ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #4 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometProject [s_store_sk,s_store_name,s_state,s_zip] + CometFilter [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #6 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + CometFilter [c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometFilter [ca_state,ca_zip,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24b.native_datafusion/explain.txt new file mode 100644 index 0000000000..829f96c1fb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24b.native_datafusion/explain.txt @@ -0,0 +1,382 @@ +== Physical Plan == +* Filter (42) ++- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * ColumnarToRow (29) + : +- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (12) + : : : : +- CometSortMergeJoin (11) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometSort (10) + : : : : +- CometExchange (9) + : : : : +- CometProject (8) + : : : : +- CometFilter (7) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (6) + : : : +- CometBroadcastExchange (16) + : : : +- CometProject (15) + : : : +- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (13) + : : +- CometBroadcastExchange (21) + : : +- CometFilter (20) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (19) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(3) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(4) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(7) CometFilter +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(8) CometProject +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_item_sk#7, sr_ticket_number#8] + +(9) CometExchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST] + +(11) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Right output [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_ticket_number#4, ss_item_sk#1], [sr_ticket_number#8, sr_item_sk#7], Inner + +(12) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] + +(13) CometNativeScan: `spark_catalog`.`default`.`store` +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(14) CometFilter +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(15) CometProject +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14], [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(16) CometBroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Right output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(18) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14], [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] + +(19) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(20) CometFilter +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = chiffon )) AND isnotnull(i_item_sk#15)) + +(21) CometBroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(22) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Right output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(23) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20], [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(25) CometFilter +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) + +(26) CometBroadcastExchange +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(27) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Right output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_customer_sk#2], [c_customer_sk#21], Inner, BuildRight + +(28) CometProject +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24], [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(29) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: [ca_state#25, ca_zip#26, ca_country#27] + +(31) CometFilter +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(32) ColumnarToRow [codegen id : 1] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(33) BroadcastExchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 2] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(36) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] + +(37) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(39) HashAggregate [codegen id : 3] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] + +(40) Exchange +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(41) HashAggregate [codegen id : 4] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] + +(42) Filter [codegen id : 4] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (69) ++- Exchange (68) + +- * HashAggregate (67) + +- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * Project (63) + +- * BroadcastHashJoin Inner BuildRight (62) + :- * ColumnarToRow (60) + : +- CometProject (59) + : +- CometBroadcastHashJoin (58) + : :- CometProject (56) + : : +- CometBroadcastHashJoin (55) + : : :- CometProject (51) + : : : +- CometBroadcastHashJoin (50) + : : : :- CometProject (48) + : : : : +- CometSortMergeJoin (47) + : : : : :- CometSort (44) + : : : : : +- ReusedExchange (43) + : : : : +- CometSort (46) + : : : : +- ReusedExchange (45) + : : : +- ReusedExchange (49) + : : +- CometBroadcastExchange (54) + : : +- CometFilter (53) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (52) + : +- ReusedExchange (57) + +- ReusedExchange (61) + + +(43) ReusedExchange [Reuses operator id: 4] +Output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] + +(44) CometSort +Input [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44], [ss_ticket_number#43 ASC NULLS FIRST, ss_item_sk#40 ASC NULLS FIRST] + +(45) ReusedExchange [Reuses operator id: 9] +Output [2]: [sr_item_sk#45, sr_ticket_number#46] + +(46) CometSort +Input [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [sr_item_sk#45, sr_ticket_number#46], [sr_ticket_number#46 ASC NULLS FIRST, sr_item_sk#45 ASC NULLS FIRST] + +(47) CometSortMergeJoin +Left output [5]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44] +Right output [2]: [sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_ticket_number#43, ss_item_sk#40], [sr_ticket_number#46, sr_item_sk#45], Inner + +(48) CometProject +Input [7]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_ticket_number#43, ss_net_paid#44, sr_item_sk#45, sr_ticket_number#46] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44], [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] + +(49) ReusedExchange [Reuses operator id: 16] +Output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] + +(50) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44] +Right output [4]: [s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_store_sk#42], [s_store_sk#47], Inner, BuildRight + +(51) CometProject +Input [8]: [ss_item_sk#40, ss_customer_sk#41, ss_store_sk#42, ss_net_paid#44, s_store_sk#47, s_store_name#48, s_state#49, s_zip#50] +Arguments: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50], [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] + +(52) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(53) CometFilter +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Condition : isnotnull(i_item_sk#51) + +(54) CometBroadcastExchange +Input [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(55) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50] +Right output [6]: [i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_item_sk#40], [i_item_sk#51], Inner, BuildRight + +(56) CometProject +Input [12]: [ss_item_sk#40, ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_item_sk#51, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Arguments: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56], [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] + +(57) ReusedExchange [Reuses operator id: 26] +Output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] + +(58) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56] +Right output [4]: [c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_customer_sk#41], [c_customer_sk#57], Inner, BuildRight + +(59) CometProject +Input [14]: [ss_customer_sk#41, ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_customer_sk#57, c_first_name#58, c_last_name#59, c_birth_country#60] +Arguments: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60], [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(60) ColumnarToRow [codegen id : 2] +Input [12]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60] + +(61) ReusedExchange [Reuses operator id: 33] +Output [3]: [ca_state#61, ca_zip#62, ca_country#63] + +(62) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [c_birth_country#60, s_zip#50] +Right keys [2]: [upper(ca_country#63), ca_zip#62] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 2] +Output [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Input [15]: [ss_net_paid#44, s_store_name#48, s_state#49, s_zip#50, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, c_birth_country#60, ca_state#61, ca_zip#62, ca_country#63] + +(64) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#44, s_store_name#48, s_state#49, i_current_price#52, i_size#53, i_color#54, i_units#55, i_manager_id#56, c_first_name#58, c_last_name#59, ca_state#61] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum#64] +Results [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] + +(65) Exchange +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Arguments: hashpartitioning(c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(66) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53, sum#65] +Keys [10]: [c_last_name#59, c_first_name#58, s_store_name#48, ca_state#61, s_state#49, i_color#54, i_current_price#52, i_manager_id#56, i_units#55, i_size#53] +Functions [1]: [sum(UnscaledValue(ss_net_paid#44))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#44))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#44))#30,17,2) AS netpaid#66] + +(67) HashAggregate [codegen id : 3] +Input [1]: [netpaid#66] +Keys: [] +Functions [1]: [partial_avg(netpaid#66)] +Aggregate Attributes [2]: [sum#67, count#68] +Results [2]: [sum#69, count#70] + +(68) Exchange +Input [2]: [sum#69, count#70] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(69) HashAggregate [codegen id : 4] +Input [2]: [sum#69, count#70] +Keys: [] +Functions [1]: [avg(netpaid#66)] +Aggregate Attributes [1]: [avg(netpaid#66)#71] +Results [1]: [(0.05 * avg(netpaid#66)#71) AS (0.05 * avg(netpaid))#72] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c1bf41667f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q24b.native_datafusion/simplified.txt @@ -0,0 +1,86 @@ +WholeStageCodegen (4) + Filter [paid] + Subquery #1 + WholeStageCodegen (4) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen (3) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #10 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #3 + CometSort [sr_item_sk,sr_ticket_number] + ReusedExchange [sr_item_sk,sr_ticket_number] #4 + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #11 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + InputAdapter + ReusedExchange [ca_state,ca_zip,ca_country] #8 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometExchange [ss_ticket_number,ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #4 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + CometProject [s_store_sk,s_store_name,s_state,s_zip] + CometFilter [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #6 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + CometFilter [c_customer_sk,c_first_name,c_last_name,c_birth_country] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometFilter [ca_state,ca_zip,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q25.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q25.native_datafusion/explain.txt new file mode 100644 index 0000000000..9bd1ec11cd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q25.native_datafusion/explain.txt @@ -0,0 +1,213 @@ +== Physical Plan == +TakeOrderedAndProject (40) ++- * HashAggregate (39) + +- Exchange (38) + +- * HashAggregate (37) + +- * ColumnarToRow (36) + +- CometProject (35) + +- CometBroadcastHashJoin (34) + :- CometProject (32) + : +- CometBroadcastHashJoin (31) + : :- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (8) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometProject (15) + : : : : +- CometFilter (14) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : : : +- CometBroadcastExchange (22) + : : : +- CometProject (21) + : : : +- CometFilter (20) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (19) + : : +- ReusedExchange (25) + : +- CometBroadcastExchange (30) + : +- CometFilter (29) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (28) + +- ReusedExchange (33) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] + +(4) CometFilter +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(5) CometBroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] + +(6) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Right output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4], [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11] + +(8) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] + +(9) CometFilter +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(10) CometBroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] + +(11) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11] +Right output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [sr_customer_sk#8, sr_item_sk#7], [cs_bill_customer_sk#12, cs_item_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16, d_year#17, d_moy#18] + +(14) CometFilter +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 4)) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) + +(15) CometProject +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(17) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [ss_sold_date_sk#6], [d_date_sk#16], Inner, BuildRight + +(18) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] + +(19) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Arguments: [d_date_sk#19, d_year#20, d_moy#21] + +(20) CometFilter +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : (((((isnotnull(d_moy#21) AND isnotnull(d_year#20)) AND (d_moy#21 >= 4)) AND (d_moy#21 <= 10)) AND (d_year#20 = 2001)) AND isnotnull(d_date_sk#19)) + +(21) CometProject +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Arguments: [d_date_sk#19], [d_date_sk#19] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: [d_date_sk#19] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#19] +Arguments: [sr_returned_date_sk#11], [d_date_sk#19], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#19] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15] + +(25) ReusedExchange [Reuses operator id: 22] +Output [1]: [d_date_sk#22] + +(26) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#22] +Arguments: [cs_sold_date_sk#15], [d_date_sk#22], Inner, BuildRight + +(27) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#22] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14], [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14] + +(28) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [s_store_sk#23, s_store_id#24, s_store_name#25] + +(29) CometFilter +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Condition : isnotnull(s_store_sk#23) + +(30) CometBroadcastExchange +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [s_store_sk#23, s_store_id#24, s_store_name#25] + +(31) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14] +Right output [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [ss_store_sk#3], [s_store_sk#23], Inner, BuildRight + +(32) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25], [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25] + +(33) ReusedExchange [Reuses operator id: 30] +Output [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] + +(34) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25] +Right output [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Arguments: [ss_item_sk#1], [i_item_sk#26], Inner, BuildRight + +(35) CometProject +Input [9]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_sk#26, i_item_id#27, i_item_desc#28] +Arguments: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28], [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] + +(36) ColumnarToRow [codegen id : 1] +Input [7]: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] + +(37) HashAggregate [codegen id : 1] +Input [7]: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] +Keys [4]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#10)), partial_sum(UnscaledValue(cs_net_profit#14))] +Aggregate Attributes [3]: [sum#29, sum#30, sum#31] +Results [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#32, sum#33, sum#34] + +(38) Exchange +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(39) HashAggregate [codegen id : 2] +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#32, sum#33, sum#34] +Keys [4]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25] +Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#10)), sum(UnscaledValue(cs_net_profit#14))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#5))#35, sum(UnscaledValue(sr_net_loss#10))#36, sum(UnscaledValue(cs_net_profit#14))#37] +Results [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#35,17,2) AS store_sales_profit#38, MakeDecimal(sum(UnscaledValue(sr_net_loss#10))#36,17,2) AS store_returns_loss#39, MakeDecimal(sum(UnscaledValue(cs_net_profit#14))#37,17,2) AS catalog_sales_profit#40] + +(40) TakeOrderedAndProject +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#38, store_returns_loss#39, catalog_sales_profit#40] +Arguments: 100, [i_item_id#27 ASC NULLS FIRST, i_item_desc#28 ASC NULLS FIRST, s_store_id#24 ASC NULLS FIRST, s_store_name#25 ASC NULLS FIRST], [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#38, store_returns_loss#39, catalog_sales_profit#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q25.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q25.native_datafusion/simplified.txt new file mode 100644 index 0000000000..d51ec29fe4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q25.native_datafusion/simplified.txt @@ -0,0 +1,44 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_profit,store_returns_loss,catalog_sales_profit] + WholeStageCodegen (2) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(cs_net_profit)),store_sales_profit,store_returns_loss,catalog_sales_profit,sum,sum,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen (1) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_net_profit,sr_net_loss,cs_net_profit] [sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc] + CometBroadcastHashJoin [ss_item_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_sk,i_item_id,i_item_desc] + CometProject [ss_item_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_sk,s_store_id,s_store_name] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_net_loss,sr_returned_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_net_loss,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] #3 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + ReusedExchange [d_date_sk] #5 + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #6 + CometFilter [s_store_sk,s_store_id,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id,s_store_name] + ReusedExchange [i_item_sk,i_item_id,i_item_desc] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q26.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q26.native_datafusion/explain.txt new file mode 100644 index 0000000000..6edda09877 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q26.native_datafusion/explain.txt @@ -0,0 +1,162 @@ +== Physical Plan == +TakeOrderedAndProject (30) ++- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * ColumnarToRow (26) + +- CometProject (25) + +- CometBroadcastHashJoin (24) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (15) + +- CometBroadcastExchange (23) + +- CometProject (22) + +- CometFilter (21) + +- CometNativeScan: `spark_catalog`.`default`.`promotion` (20) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Arguments: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] + +(2) CometFilter +Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Condition : ((isnotnull(cs_bill_cdemo_sk#1) AND isnotnull(cs_item_sk#2)) AND isnotnull(cs_promo_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [cs_bill_cdemo_sk#1], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, cd_demo_sk#9] +Arguments: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8], [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [cs_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#13] +Arguments: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7], [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7] + +(15) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#15, i_item_id#16] +Arguments: [i_item_sk#15, i_item_id#16] + +(16) CometFilter +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) + +(17) CometBroadcastExchange +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: [i_item_sk#15, i_item_id#16] + +(18) CometBroadcastHashJoin +Left output [6]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7] +Right output [2]: [i_item_sk#15, i_item_id#16] +Arguments: [cs_item_sk#2], [i_item_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_sk#15, i_item_id#16] +Arguments: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16], [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] + +(20) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Arguments: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(21) CometFilter +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) + +(22) CometProject +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Arguments: [p_promo_sk#17], [p_promo_sk#17] + +(23) CometBroadcastExchange +Input [1]: [p_promo_sk#17] +Arguments: [p_promo_sk#17] + +(24) CometBroadcastHashJoin +Left output [6]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Right output [1]: [p_promo_sk#17] +Arguments: [cs_promo_sk#3], [p_promo_sk#17], Inner, BuildRight + +(25) CometProject +Input [7]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16, p_promo_sk#17] +Arguments: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] + +(26) ColumnarToRow [codegen id : 1] +Input [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] + +(27) HashAggregate [codegen id : 1] +Input [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] +Functions [4]: [partial_avg(cs_quantity#4), partial_avg(UnscaledValue(cs_list_price#5)), partial_avg(UnscaledValue(cs_coupon_amt#7)), partial_avg(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] + +(28) Exchange +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(29) HashAggregate [codegen id : 2] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#16] +Functions [4]: [avg(cs_quantity#4), avg(UnscaledValue(cs_list_price#5)), avg(UnscaledValue(cs_coupon_amt#7)), avg(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [4]: [avg(cs_quantity#4)#36, avg(UnscaledValue(cs_list_price#5))#37, avg(UnscaledValue(cs_coupon_amt#7))#38, avg(UnscaledValue(cs_sales_price#6))#39] +Results [5]: [i_item_id#16, avg(cs_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(cs_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(cs_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(cs_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] + +(30) TakeOrderedAndProject +Input [5]: [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q26.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q26.native_datafusion/simplified.txt new file mode 100644 index 0000000000..39633af4ee --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q26.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + WholeStageCodegen (2) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(cs_quantity),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_sales_price)),agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (1) + HashAggregate [i_item_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + CometBroadcastHashJoin [cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id,p_promo_sk] + CometProject [cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk,d_date_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + CometBroadcastHashJoin [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk,cd_demo_sk] + CometFilter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #4 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [p_promo_sk] #5 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_email,p_channel_event] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk,p_channel_email,p_channel_event] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q27.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q27.native_datafusion/explain.txt new file mode 100644 index 0000000000..df9ba7f4b5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q27.native_datafusion/explain.txt @@ -0,0 +1,162 @@ +== Physical Plan == +TakeOrderedAndProject (30) ++- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * ColumnarToRow (26) + +- CometExpand (25) + +- CometProject (24) + +- CometBroadcastHashJoin (23) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (15) + +- CometBroadcastExchange (22) + +- CometFilter (21) + +- CometNativeScan: `spark_catalog`.`default`.`item` (20) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [ss_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8], [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7], [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] + +(15) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#15, s_state#16] +Arguments: [s_store_sk#15, s_state#16] + +(16) CometFilter +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(17) CometBroadcastExchange +Input [2]: [s_store_sk#15, s_state#16] +Arguments: [s_store_sk#15, s_state#16] + +(18) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Right output [2]: [s_store_sk#15, s_state#16] +Arguments: [ss_store_sk#3], [s_store_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] +Arguments: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16], [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] + +(20) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#17, i_item_id#18] +Arguments: [i_item_sk#17, i_item_id#18] + +(21) CometFilter +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: [i_item_sk#17, i_item_id#18] + +(23) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Right output [2]: [i_item_sk#17, i_item_id#18] +Arguments: [ss_item_sk#1], [i_item_sk#17], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] +Arguments: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] + +(25) CometExpand +Input [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Arguments: [[ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16, 0], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, null, 1], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, null, null, 3]], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] + +(26) ColumnarToRow [codegen id : 1] +Input [7]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] + +(27) HashAggregate [codegen id : 1] +Input [7]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] +Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [8]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28, count#29] +Results [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] + +(28) Exchange +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] +Arguments: hashpartitioning(i_item_id#19, s_state#20, spark_grouping_id#21, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(29) HashAggregate [codegen id : 2] +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] +Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [4]: [avg(ss_quantity#4)#38, avg(UnscaledValue(ss_list_price#5))#39, avg(UnscaledValue(ss_coupon_amt#7))#40, avg(UnscaledValue(ss_sales_price#6))#41] +Results [7]: [i_item_id#19, s_state#20, cast((shiftright(spark_grouping_id#21, 0) & 1) as tinyint) AS g_state#42, avg(ss_quantity#4)#38 AS agg1#43, cast((avg(UnscaledValue(ss_list_price#5))#39 / 100.0) as decimal(11,6)) AS agg2#44, cast((avg(UnscaledValue(ss_coupon_amt#7))#40 / 100.0) as decimal(11,6)) AS agg3#45, cast((avg(UnscaledValue(ss_sales_price#6))#41 / 100.0) as decimal(11,6)) AS agg4#46] + +(30) TakeOrderedAndProject +Input [7]: [i_item_id#19, s_state#20, g_state#42, agg1#43, agg2#44, agg3#45, agg4#46] +Arguments: 100, [i_item_id#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST], [i_item_id#19, s_state#20, g_state#42, agg1#43, agg2#44, agg3#45, agg4#46] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q27.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q27.native_datafusion/simplified.txt new file mode 100644 index 0000000000..e39b06308f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q27.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + WholeStageCodegen (2) + HashAggregate [i_item_id,s_state,spark_grouping_id,sum,count,sum,count,sum,count,sum,count] [avg(ss_quantity),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,s_state,spark_grouping_id] #1 + WholeStageCodegen (1) + HashAggregate [i_item_id,s_state,spark_grouping_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + ColumnarToRow + InputAdapter + CometExpand [i_item_id,s_state] [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state,spark_grouping_id] + CometProject [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_store_sk,s_state] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_state] #4 + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q28.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q28.native_datafusion/explain.txt new file mode 100644 index 0000000000..a917d8262b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q28.native_datafusion/explain.txt @@ -0,0 +1,419 @@ +== Physical Plan == +* BroadcastNestedLoopJoin Inner BuildRight (70) +:- * BroadcastNestedLoopJoin Inner BuildRight (58) +: :- * BroadcastNestedLoopJoin Inner BuildRight (46) +: : :- * BroadcastNestedLoopJoin Inner BuildRight (34) +: : : :- * BroadcastNestedLoopJoin Inner BuildRight (22) +: : : : :- * HashAggregate (10) +: : : : : +- Exchange (9) +: : : : : +- * HashAggregate (8) +: : : : : +- * HashAggregate (7) +: : : : : +- Exchange (6) +: : : : : +- * HashAggregate (5) +: : : : : +- * ColumnarToRow (4) +: : : : : +- CometProject (3) +: : : : : +- CometFilter (2) +: : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) +: : : : +- BroadcastExchange (21) +: : : : +- * HashAggregate (20) +: : : : +- Exchange (19) +: : : : +- * HashAggregate (18) +: : : : +- * HashAggregate (17) +: : : : +- Exchange (16) +: : : : +- * HashAggregate (15) +: : : : +- * ColumnarToRow (14) +: : : : +- CometProject (13) +: : : : +- CometFilter (12) +: : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (11) +: : : +- BroadcastExchange (33) +: : : +- * HashAggregate (32) +: : : +- Exchange (31) +: : : +- * HashAggregate (30) +: : : +- * HashAggregate (29) +: : : +- Exchange (28) +: : : +- * HashAggregate (27) +: : : +- * ColumnarToRow (26) +: : : +- CometProject (25) +: : : +- CometFilter (24) +: : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (23) +: : +- BroadcastExchange (45) +: : +- * HashAggregate (44) +: : +- Exchange (43) +: : +- * HashAggregate (42) +: : +- * HashAggregate (41) +: : +- Exchange (40) +: : +- * HashAggregate (39) +: : +- * ColumnarToRow (38) +: : +- CometProject (37) +: : +- CometFilter (36) +: : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (35) +: +- BroadcastExchange (57) +: +- * HashAggregate (56) +: +- Exchange (55) +: +- * HashAggregate (54) +: +- * HashAggregate (53) +: +- Exchange (52) +: +- * HashAggregate (51) +: +- * ColumnarToRow (50) +: +- CometProject (49) +: +- CometFilter (48) +: +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (47) ++- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * ColumnarToRow (62) + +- CometProject (61) + +- CometFilter (60) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (59) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Arguments: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (ss_list_price#3 <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (ss_coupon_amt#4 <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (ss_wholesale_cost#2 <= 77.00)))) + +(3) CometProject +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Arguments: [ss_list_price#3], [ss_list_price#3] + +(4) ColumnarToRow [codegen id : 1] +Input [1]: [ss_list_price#3] + +(5) HashAggregate [codegen id : 1] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7] +Results [4]: [ss_list_price#3, sum#8, count#9, count#10] + +(6) Exchange +Input [4]: [ss_list_price#3, sum#8, count#9, count#10] +Arguments: hashpartitioning(ss_list_price#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(7) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#8, count#9, count#10] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7] +Results [4]: [ss_list_price#3, sum#8, count#9, count#10] + +(8) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#8, count#9, count#10] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11] +Results [4]: [sum#8, count#9, count#10, count#12] + +(9) Exchange +Input [4]: [sum#8, count#9, count#10, count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(10) HashAggregate [codegen id : 18] +Input [4]: [sum#8, count#9, count#10, count#12] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#6 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#7 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15] + +(11) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Arguments: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] + +(12) CometFilter +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Condition : (((isnotnull(ss_quantity#16) AND (ss_quantity#16 >= 6)) AND (ss_quantity#16 <= 10)) AND ((((ss_list_price#18 >= 90.00) AND (ss_list_price#18 <= 100.00)) OR ((ss_coupon_amt#19 >= 2323.00) AND (ss_coupon_amt#19 <= 3323.00))) OR ((ss_wholesale_cost#17 >= 31.00) AND (ss_wholesale_cost#17 <= 51.00)))) + +(13) CometProject +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Arguments: [ss_list_price#18], [ss_list_price#18] + +(14) ColumnarToRow [codegen id : 3] +Input [1]: [ss_list_price#18] + +(15) HashAggregate [codegen id : 3] +Input [1]: [ss_list_price#18] +Keys [1]: [ss_list_price#18] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#18)), partial_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22] +Results [4]: [ss_list_price#18, sum#23, count#24, count#25] + +(16) Exchange +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Arguments: hashpartitioning(ss_list_price#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(17) HashAggregate [codegen id : 4] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Keys [1]: [ss_list_price#18] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22] +Results [4]: [ss_list_price#18, sum#23, count#24, count#25] + +(18) HashAggregate [codegen id : 4] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18), partial_count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22, count(ss_list_price#18)#26] +Results [4]: [sum#23, count#24, count#25, count#27] + +(19) Exchange +Input [4]: [sum#23, count#24, count#25, count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(20) HashAggregate [codegen id : 5] +Input [4]: [sum#23, count#24, count#25, count#27] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#18)), count(ss_list_price#18), count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22, count(ss_list_price#18)#26] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#18))#21 / 100.0) as decimal(11,6)) AS B2_LP#28, count(ss_list_price#18)#22 AS B2_CNT#29, count(ss_list_price#18)#26 AS B2_CNTD#30] + +(21) BroadcastExchange +Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] +Arguments: IdentityBroadcastMode, [plan_id=5] + +(22) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(23) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Arguments: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] + +(24) CometFilter +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Condition : (((isnotnull(ss_quantity#31) AND (ss_quantity#31 >= 11)) AND (ss_quantity#31 <= 15)) AND ((((ss_list_price#33 >= 142.00) AND (ss_list_price#33 <= 152.00)) OR ((ss_coupon_amt#34 >= 12214.00) AND (ss_coupon_amt#34 <= 13214.00))) OR ((ss_wholesale_cost#32 >= 79.00) AND (ss_wholesale_cost#32 <= 99.00)))) + +(25) CometProject +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Arguments: [ss_list_price#33], [ss_list_price#33] + +(26) ColumnarToRow [codegen id : 6] +Input [1]: [ss_list_price#33] + +(27) HashAggregate [codegen id : 6] +Input [1]: [ss_list_price#33] +Keys [1]: [ss_list_price#33] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#33)), partial_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37] +Results [4]: [ss_list_price#33, sum#38, count#39, count#40] + +(28) Exchange +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Arguments: hashpartitioning(ss_list_price#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(29) HashAggregate [codegen id : 7] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Keys [1]: [ss_list_price#33] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37] +Results [4]: [ss_list_price#33, sum#38, count#39, count#40] + +(30) HashAggregate [codegen id : 7] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33), partial_count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37, count(ss_list_price#33)#41] +Results [4]: [sum#38, count#39, count#40, count#42] + +(31) Exchange +Input [4]: [sum#38, count#39, count#40, count#42] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(32) HashAggregate [codegen id : 8] +Input [4]: [sum#38, count#39, count#40, count#42] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#33)), count(ss_list_price#33), count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37, count(ss_list_price#33)#41] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#33))#36 / 100.0) as decimal(11,6)) AS B3_LP#43, count(ss_list_price#33)#37 AS B3_CNT#44, count(ss_list_price#33)#41 AS B3_CNTD#45] + +(33) BroadcastExchange +Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] +Arguments: IdentityBroadcastMode, [plan_id=8] + +(34) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(35) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Arguments: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] + +(36) CometFilter +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Condition : (((isnotnull(ss_quantity#46) AND (ss_quantity#46 >= 16)) AND (ss_quantity#46 <= 20)) AND ((((ss_list_price#48 >= 135.00) AND (ss_list_price#48 <= 145.00)) OR ((ss_coupon_amt#49 >= 6071.00) AND (ss_coupon_amt#49 <= 7071.00))) OR ((ss_wholesale_cost#47 >= 38.00) AND (ss_wholesale_cost#47 <= 58.00)))) + +(37) CometProject +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Arguments: [ss_list_price#48], [ss_list_price#48] + +(38) ColumnarToRow [codegen id : 9] +Input [1]: [ss_list_price#48] + +(39) HashAggregate [codegen id : 9] +Input [1]: [ss_list_price#48] +Keys [1]: [ss_list_price#48] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#48)), partial_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52] +Results [4]: [ss_list_price#48, sum#53, count#54, count#55] + +(40) Exchange +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Arguments: hashpartitioning(ss_list_price#48, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(41) HashAggregate [codegen id : 10] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Keys [1]: [ss_list_price#48] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52] +Results [4]: [ss_list_price#48, sum#53, count#54, count#55] + +(42) HashAggregate [codegen id : 10] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48), partial_count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52, count(ss_list_price#48)#56] +Results [4]: [sum#53, count#54, count#55, count#57] + +(43) Exchange +Input [4]: [sum#53, count#54, count#55, count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(44) HashAggregate [codegen id : 11] +Input [4]: [sum#53, count#54, count#55, count#57] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#48)), count(ss_list_price#48), count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52, count(ss_list_price#48)#56] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#48))#51 / 100.0) as decimal(11,6)) AS B4_LP#58, count(ss_list_price#48)#52 AS B4_CNT#59, count(ss_list_price#48)#56 AS B4_CNTD#60] + +(45) BroadcastExchange +Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] +Arguments: IdentityBroadcastMode, [plan_id=11] + +(46) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(47) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Arguments: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] + +(48) CometFilter +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Condition : (((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 21)) AND (ss_quantity#61 <= 25)) AND ((((ss_list_price#63 >= 122.00) AND (ss_list_price#63 <= 132.00)) OR ((ss_coupon_amt#64 >= 836.00) AND (ss_coupon_amt#64 <= 1836.00))) OR ((ss_wholesale_cost#62 >= 17.00) AND (ss_wholesale_cost#62 <= 37.00)))) + +(49) CometProject +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Arguments: [ss_list_price#63], [ss_list_price#63] + +(50) ColumnarToRow [codegen id : 12] +Input [1]: [ss_list_price#63] + +(51) HashAggregate [codegen id : 12] +Input [1]: [ss_list_price#63] +Keys [1]: [ss_list_price#63] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#63)), partial_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67] +Results [4]: [ss_list_price#63, sum#68, count#69, count#70] + +(52) Exchange +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Arguments: hashpartitioning(ss_list_price#63, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(53) HashAggregate [codegen id : 13] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Keys [1]: [ss_list_price#63] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67] +Results [4]: [ss_list_price#63, sum#68, count#69, count#70] + +(54) HashAggregate [codegen id : 13] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63), partial_count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67, count(ss_list_price#63)#71] +Results [4]: [sum#68, count#69, count#70, count#72] + +(55) Exchange +Input [4]: [sum#68, count#69, count#70, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] + +(56) HashAggregate [codegen id : 14] +Input [4]: [sum#68, count#69, count#70, count#72] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#63)), count(ss_list_price#63), count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67, count(ss_list_price#63)#71] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#63))#66 / 100.0) as decimal(11,6)) AS B5_LP#73, count(ss_list_price#63)#67 AS B5_CNT#74, count(ss_list_price#63)#71 AS B5_CNTD#75] + +(57) BroadcastExchange +Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] +Arguments: IdentityBroadcastMode, [plan_id=14] + +(58) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(59) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Arguments: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] + +(60) CometFilter +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Condition : (((isnotnull(ss_quantity#76) AND (ss_quantity#76 >= 26)) AND (ss_quantity#76 <= 30)) AND ((((ss_list_price#78 >= 154.00) AND (ss_list_price#78 <= 164.00)) OR ((ss_coupon_amt#79 >= 7326.00) AND (ss_coupon_amt#79 <= 8326.00))) OR ((ss_wholesale_cost#77 >= 7.00) AND (ss_wholesale_cost#77 <= 27.00)))) + +(61) CometProject +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Arguments: [ss_list_price#78], [ss_list_price#78] + +(62) ColumnarToRow [codegen id : 15] +Input [1]: [ss_list_price#78] + +(63) HashAggregate [codegen id : 15] +Input [1]: [ss_list_price#78] +Keys [1]: [ss_list_price#78] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#78)), partial_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82] +Results [4]: [ss_list_price#78, sum#83, count#84, count#85] + +(64) Exchange +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Arguments: hashpartitioning(ss_list_price#78, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(65) HashAggregate [codegen id : 16] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Keys [1]: [ss_list_price#78] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82] +Results [4]: [ss_list_price#78, sum#83, count#84, count#85] + +(66) HashAggregate [codegen id : 16] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78), partial_count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82, count(ss_list_price#78)#86] +Results [4]: [sum#83, count#84, count#85, count#87] + +(67) Exchange +Input [4]: [sum#83, count#84, count#85, count#87] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] + +(68) HashAggregate [codegen id : 17] +Input [4]: [sum#83, count#84, count#85, count#87] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#78)), count(ss_list_price#78), count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82, count(ss_list_price#78)#86] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#78))#81 / 100.0) as decimal(11,6)) AS B6_LP#88, count(ss_list_price#78)#82 AS B6_CNT#89, count(ss_list_price#78)#86 AS B6_CNTD#90] + +(69) BroadcastExchange +Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] +Arguments: IdentityBroadcastMode, [plan_id=17] + +(70) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q28.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q28.native_datafusion/simplified.txt new file mode 100644 index 0000000000..4a969c97db --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q28.native_datafusion/simplified.txt @@ -0,0 +1,111 @@ +WholeStageCodegen (18) + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #2 + WholeStageCodegen (1) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (5) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] + InputAdapter + Exchange #4 + WholeStageCodegen (4) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #5 + WholeStageCodegen (3) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] + InputAdapter + Exchange #7 + WholeStageCodegen (7) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #8 + WholeStageCodegen (6) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (11) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] + InputAdapter + Exchange #10 + WholeStageCodegen (10) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #11 + WholeStageCodegen (9) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (14) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] + InputAdapter + Exchange #13 + WholeStageCodegen (13) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #14 + WholeStageCodegen (12) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (17) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + InputAdapter + Exchange #16 + WholeStageCodegen (16) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #17 + WholeStageCodegen (15) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + ColumnarToRow + InputAdapter + CometProject [ss_list_price] + CometFilter [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q29.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q29.native_datafusion/explain.txt new file mode 100644 index 0000000000..37387c2435 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q29.native_datafusion/explain.txt @@ -0,0 +1,229 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * ColumnarToRow (39) + +- CometProject (38) + +- CometBroadcastHashJoin (37) + :- CometProject (35) + : +- CometBroadcastHashJoin (34) + : :- CometProject (30) + : : +- CometBroadcastHashJoin (29) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (8) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometProject (15) + : : : : +- CometFilter (14) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : : : +- CometBroadcastExchange (22) + : : : +- CometProject (21) + : : : +- CometFilter (20) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (19) + : : +- CometBroadcastExchange (28) + : : +- CometProject (27) + : : +- CometFilter (26) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (25) + : +- CometBroadcastExchange (33) + : +- CometFilter (32) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (31) + +- ReusedExchange (36) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(4) CometFilter +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(5) CometBroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Right output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4], [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9], Inner, BuildRight + +(7) CometProject +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] + +(8) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(9) CometFilter +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(10) CometBroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(11) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] +Right output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [sr_customer_sk#8, sr_item_sk#7], [cs_bill_customer_sk#12, cs_item_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16, d_year#17, d_moy#18] + +(14) CometFilter +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 9)) AND (d_year#17 = 1999)) AND isnotnull(d_date_sk#16)) + +(15) CometProject +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: [d_date_sk#16], [d_date_sk#16] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: [d_date_sk#16] + +(17) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [ss_sold_date_sk#6], [d_date_sk#16], Inner, BuildRight + +(18) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] + +(19) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Arguments: [d_date_sk#19, d_year#20, d_moy#21] + +(20) CometFilter +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : (((((isnotnull(d_moy#21) AND isnotnull(d_year#20)) AND (d_moy#21 >= 9)) AND (d_moy#21 <= 12)) AND (d_year#20 = 1999)) AND isnotnull(d_date_sk#19)) + +(21) CometProject +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Arguments: [d_date_sk#19], [d_date_sk#19] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: [d_date_sk#19] + +(23) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#19] +Arguments: [sr_returned_date_sk#11], [d_date_sk#19], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#19] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] + +(25) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#22, d_year#23] +Arguments: [d_date_sk#22, d_year#23] + +(26) CometFilter +Input [2]: [d_date_sk#22, d_year#23] +Condition : (d_year#23 IN (1999,2000,2001) AND isnotnull(d_date_sk#22)) + +(27) CometProject +Input [2]: [d_date_sk#22, d_year#23] +Arguments: [d_date_sk#22], [d_date_sk#22] + +(28) CometBroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: [d_date_sk#22] + +(29) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#22] +Arguments: [cs_sold_date_sk#15], [d_date_sk#22], Inner, BuildRight + +(30) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#22] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14], [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] + +(31) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [s_store_sk#24, s_store_id#25, s_store_name#26] + +(32) CometFilter +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Condition : isnotnull(s_store_sk#24) + +(33) CometBroadcastExchange +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [s_store_sk#24, s_store_id#25, s_store_name#26] + +(34) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] +Right output [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [ss_store_sk#3], [s_store_sk#24], Inner, BuildRight + +(35) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26], [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26] + +(36) ReusedExchange [Reuses operator id: 33] +Output [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] + +(37) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26] +Right output [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Arguments: [ss_item_sk#1], [i_item_sk#27], Inner, BuildRight + +(38) CometProject +Input [9]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_sk#27, i_item_id#28, i_item_desc#29] +Arguments: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29], [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] + +(39) ColumnarToRow [codegen id : 1] +Input [7]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] + +(40) HashAggregate [codegen id : 1] +Input [7]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] +Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26] +Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#10), partial_sum(cs_quantity#14)] +Aggregate Attributes [3]: [sum#30, sum#31, sum#32] +Results [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#33, sum#34, sum#35] + +(41) Exchange +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#33, sum#34, sum#35] +Arguments: hashpartitioning(i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(42) HashAggregate [codegen id : 2] +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#33, sum#34, sum#35] +Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26] +Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#10), sum(cs_quantity#14)] +Aggregate Attributes [3]: [sum(ss_quantity#5)#36, sum(sr_return_quantity#10)#37, sum(cs_quantity#14)#38] +Results [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum(ss_quantity#5)#36 AS store_sales_quantity#39, sum(sr_return_quantity#10)#37 AS store_returns_quantity#40, sum(cs_quantity#14)#38 AS catalog_sales_quantity#41] + +(43) TakeOrderedAndProject +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#39, store_returns_quantity#40, catalog_sales_quantity#41] +Arguments: 100, [i_item_id#28 ASC NULLS FIRST, i_item_desc#29 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, s_store_name#26 ASC NULLS FIRST], [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#39, store_returns_quantity#40, catalog_sales_quantity#41] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q29.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q29.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9d86a2fc9f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q29.native_datafusion/simplified.txt @@ -0,0 +1,47 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_quantity,store_returns_quantity,catalog_sales_quantity] + WholeStageCodegen (2) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(ss_quantity),sum(sr_return_quantity),sum(cs_quantity),store_sales_quantity,store_returns_quantity,catalog_sales_quantity,sum,sum,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen (1) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_quantity,sr_return_quantity,cs_quantity] [sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_id,i_item_desc] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_sk,i_item_id,i_item_desc] + CometProject [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_sk,s_store_id,s_store_name] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] #3 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [d_date_sk] #6 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #7 + CometFilter [s_store_sk,s_store_id,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id,s_store_name] + ReusedExchange [i_item_sk,i_item_id,i_item_desc] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q3.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q3.native_datafusion/explain.txt new file mode 100644 index 0000000000..de461c980e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q3.native_datafusion/explain.txt @@ -0,0 +1,105 @@ +== Physical Plan == +TakeOrderedAndProject (19) ++- * HashAggregate (18) + +- Exchange (17) + +- * HashAggregate (16) + +- * ColumnarToRow (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2, d_moy#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((isnotnull(d_moy#3) AND (d_moy#3 = 11)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2], [d_date_sk#1, d_year#2] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_year#2] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5], [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] + +(10) CometFilter +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Condition : ((isnotnull(i_manufact_id#10) AND (i_manufact_id#10 = 128)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9], [i_item_sk#7, i_brand_id#8, i_brand#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9], [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(15) ColumnarToRow [codegen id : 1] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(16) HashAggregate [codegen id : 1] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] + +(17) Exchange +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(18) HashAggregate [codegen id : 2] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS sum_agg#16] + +(19) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#14, brand#15, sum_agg#16] +Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#2, brand_id#14, brand#15, sum_agg#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q3.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q3.native_datafusion/simplified.txt new file mode 100644 index 0000000000..cbc5dad6b1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q3.native_datafusion/simplified.txt @@ -0,0 +1,23 @@ +TakeOrderedAndProject [d_year,sum_agg,brand_id,brand] + WholeStageCodegen (2) + HashAggregate [d_year,i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,sum_agg,sum] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (1) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [d_year,ss_ext_sales_price,i_brand_id,i_brand] + CometBroadcastHashJoin [d_year,ss_item_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand] + CometProject [d_year,ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,d_year,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_year] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_brand,i_manufact_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q30.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q30.native_datafusion/explain.txt new file mode 100644 index 0000000000..26cc3d472f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q30.native_datafusion/explain.txt @@ -0,0 +1,280 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * BroadcastHashJoin Inner BuildRight (49) + :- * Project (43) + : +- * BroadcastHashJoin Inner BuildRight (42) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (18) + : : : +- * HashAggregate (17) + : : : +- Exchange (16) + : : : +- * HashAggregate (15) + : : : +- * ColumnarToRow (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (9) + : : +- BroadcastExchange (35) + : : +- * Filter (34) + : : +- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * HashAggregate (30) + : : +- Exchange (29) + : : +- * HashAggregate (28) + : : +- * ColumnarToRow (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometProject (23) + : : : +- CometBroadcastHashJoin (22) + : : : :- CometFilter (20) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (19) + : : : +- ReusedExchange (21) + : : +- ReusedExchange (24) + : +- BroadcastExchange (41) + : +- * ColumnarToRow (40) + : +- CometFilter (39) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (38) + +- BroadcastExchange (48) + +- * ColumnarToRow (47) + +- CometProject (46) + +- CometFilter (45) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (44) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Arguments: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] + +(2) CometFilter +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Condition : (isnotnull(wr_returning_addr_sk#2) AND isnotnull(wr_returning_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5, d_year#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [wr_returned_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4, d_date_sk#5] +Arguments: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3], [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#7, ca_state#8] +Arguments: [ca_address_sk#7, ca_state#8] + +(10) CometFilter +Input [2]: [ca_address_sk#7, ca_state#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) + +(11) CometBroadcastExchange +Input [2]: [ca_address_sk#7, ca_state#8] +Arguments: [ca_address_sk#7, ca_state#8] + +(12) CometBroadcastHashJoin +Left output [3]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3] +Right output [2]: [ca_address_sk#7, ca_state#8] +Arguments: [wr_returning_addr_sk#2], [ca_address_sk#7], Inner, BuildRight + +(13) CometProject +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, ca_address_sk#7, ca_state#8] +Arguments: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8], [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] + +(14) ColumnarToRow [codegen id : 1] +Input [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] + +(15) HashAggregate [codegen id : 1] +Input [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#3))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] + +(16) Exchange +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] +Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(17) HashAggregate [codegen id : 7] +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(wr_return_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#3))#11] +Results [3]: [wr_returning_customer_sk#1 AS ctr_customer_sk#12, ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(wr_return_amt#3))#11,17,2) AS ctr_total_return#14] + +(18) Filter [codegen id : 7] +Input [3]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14] +Condition : isnotnull(ctr_total_return#14) + +(19) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [4]: [wr_returning_customer_sk#15, wr_returning_addr_sk#16, wr_return_amt#17, wr_returned_date_sk#18] +Arguments: [wr_returning_customer_sk#15, wr_returning_addr_sk#16, wr_return_amt#17, wr_returned_date_sk#18] + +(20) CometFilter +Input [4]: [wr_returning_customer_sk#15, wr_returning_addr_sk#16, wr_return_amt#17, wr_returned_date_sk#18] +Condition : isnotnull(wr_returning_addr_sk#16) + +(21) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#19] + +(22) CometBroadcastHashJoin +Left output [4]: [wr_returning_customer_sk#15, wr_returning_addr_sk#16, wr_return_amt#17, wr_returned_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [wr_returned_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(23) CometProject +Input [5]: [wr_returning_customer_sk#15, wr_returning_addr_sk#16, wr_return_amt#17, wr_returned_date_sk#18, d_date_sk#19] +Arguments: [wr_returning_customer_sk#15, wr_returning_addr_sk#16, wr_return_amt#17], [wr_returning_customer_sk#15, wr_returning_addr_sk#16, wr_return_amt#17] + +(24) ReusedExchange [Reuses operator id: 11] +Output [2]: [ca_address_sk#20, ca_state#21] + +(25) CometBroadcastHashJoin +Left output [3]: [wr_returning_customer_sk#15, wr_returning_addr_sk#16, wr_return_amt#17] +Right output [2]: [ca_address_sk#20, ca_state#21] +Arguments: [wr_returning_addr_sk#16], [ca_address_sk#20], Inner, BuildRight + +(26) CometProject +Input [5]: [wr_returning_customer_sk#15, wr_returning_addr_sk#16, wr_return_amt#17, ca_address_sk#20, ca_state#21] +Arguments: [wr_returning_customer_sk#15, wr_return_amt#17, ca_state#21], [wr_returning_customer_sk#15, wr_return_amt#17, ca_state#21] + +(27) ColumnarToRow [codegen id : 2] +Input [3]: [wr_returning_customer_sk#15, wr_return_amt#17, ca_state#21] + +(28) HashAggregate [codegen id : 2] +Input [3]: [wr_returning_customer_sk#15, wr_return_amt#17, ca_state#21] +Keys [2]: [wr_returning_customer_sk#15, ca_state#21] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#17))] +Aggregate Attributes [1]: [sum#22] +Results [3]: [wr_returning_customer_sk#15, ca_state#21, sum#23] + +(29) Exchange +Input [3]: [wr_returning_customer_sk#15, ca_state#21, sum#23] +Arguments: hashpartitioning(wr_returning_customer_sk#15, ca_state#21, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(30) HashAggregate [codegen id : 3] +Input [3]: [wr_returning_customer_sk#15, ca_state#21, sum#23] +Keys [2]: [wr_returning_customer_sk#15, ca_state#21] +Functions [1]: [sum(UnscaledValue(wr_return_amt#17))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#17))#11] +Results [2]: [ca_state#21 AS ctr_state#24, MakeDecimal(sum(UnscaledValue(wr_return_amt#17))#11,17,2) AS ctr_total_return#25] + +(31) HashAggregate [codegen id : 3] +Input [2]: [ctr_state#24, ctr_total_return#25] +Keys [1]: [ctr_state#24] +Functions [1]: [partial_avg(ctr_total_return#25)] +Aggregate Attributes [2]: [sum#26, count#27] +Results [3]: [ctr_state#24, sum#28, count#29] + +(32) Exchange +Input [3]: [ctr_state#24, sum#28, count#29] +Arguments: hashpartitioning(ctr_state#24, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(33) HashAggregate [codegen id : 4] +Input [3]: [ctr_state#24, sum#28, count#29] +Keys [1]: [ctr_state#24] +Functions [1]: [avg(ctr_total_return#25)] +Aggregate Attributes [1]: [avg(ctr_total_return#25)#30] +Results [2]: [(avg(ctr_total_return#25)#30 * 1.2) AS (avg(ctr_total_return) * 1.2)#31, ctr_state#24] + +(34) Filter [codegen id : 4] +Input [2]: [(avg(ctr_total_return) * 1.2)#31, ctr_state#24] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#31) + +(35) BroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#31, ctr_state#24] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=4] + +(36) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ctr_state#13] +Right keys [1]: [ctr_state#24] +Join type: Inner +Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#31) + +(37) Project [codegen id : 7] +Output [2]: [ctr_customer_sk#12, ctr_total_return#14] +Input [5]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14, (avg(ctr_total_return) * 1.2)#31, ctr_state#24] + +(38) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [14]: [c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45] +Arguments: [c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45] + +(39) CometFilter +Input [14]: [c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45] +Condition : (isnotnull(c_customer_sk#32) AND isnotnull(c_current_addr_sk#34)) + +(40) ColumnarToRow [codegen id : 5] +Input [14]: [c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45] + +(41) BroadcastExchange +Input [14]: [c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(42) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ctr_customer_sk#12] +Right keys [1]: [c_customer_sk#32] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 7] +Output [14]: [ctr_total_return#14, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45] +Input [16]: [ctr_customer_sk#12, ctr_total_return#14, c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45] + +(44) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#46, ca_state#47] +Arguments: [ca_address_sk#46, ca_state#47] + +(45) CometFilter +Input [2]: [ca_address_sk#46, ca_state#47] +Condition : ((isnotnull(ca_state#47) AND (ca_state#47 = GA)) AND isnotnull(ca_address_sk#46)) + +(46) CometProject +Input [2]: [ca_address_sk#46, ca_state#47] +Arguments: [ca_address_sk#46], [ca_address_sk#46] + +(47) ColumnarToRow [codegen id : 6] +Input [1]: [ca_address_sk#46] + +(48) BroadcastExchange +Input [1]: [ca_address_sk#46] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(49) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#34] +Right keys [1]: [ca_address_sk#46] +Join type: Inner +Join condition: None + +(50) Project [codegen id : 7] +Output [13]: [c_customer_id#33, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45, ctr_total_return#14] +Input [15]: [ctr_total_return#14, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45, ca_address_sk#46] + +(51) TakeOrderedAndProject +Input [13]: [c_customer_id#33, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45, ctr_total_return#14] +Arguments: 100, [c_customer_id#33 ASC NULLS FIRST, c_salutation#35 ASC NULLS FIRST, c_first_name#36 ASC NULLS FIRST, c_last_name#37 ASC NULLS FIRST, c_preferred_cust_flag#38 ASC NULLS FIRST, c_birth_day#39 ASC NULLS FIRST, c_birth_month#40 ASC NULLS FIRST, c_birth_year#41 ASC NULLS FIRST, c_birth_country#42 ASC NULLS FIRST, c_login#43 ASC NULLS FIRST, c_email_address#44 ASC NULLS FIRST, c_last_review_date#45 ASC NULLS FIRST, ctr_total_return#14 ASC NULLS FIRST], [c_customer_id#33, c_salutation#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_day#39, c_birth_month#40, c_birth_year#41, c_birth_country#42, c_login#43, c_email_address#44, c_last_review_date#45, ctr_total_return#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q30.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q30.native_datafusion/simplified.txt new file mode 100644 index 0000000000..cf0645ac36 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q30.native_datafusion/simplified.txt @@ -0,0 +1,68 @@ +TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + WholeStageCodegen (7) + Project [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(avg(ctr_total_return) * 1.2)] + Filter [ctr_total_return] + HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_customer_sk,ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [wr_returning_customer_sk,ca_state] #1 + WholeStageCodegen (1) + HashAggregate [wr_returning_customer_sk,ca_state,wr_return_amt] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [wr_returning_customer_sk,wr_return_amt,ca_state] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,ca_address_sk,ca_state] + CometProject [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk,d_date_sk] + CometFilter [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [ca_address_sk,ca_state] #3 + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [(avg(ctr_total_return) * 1.2)] + HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),sum,count] + InputAdapter + Exchange [ctr_state] #5 + WholeStageCodegen (3) + HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] + HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [wr_returning_customer_sk,ca_state] #6 + WholeStageCodegen (2) + HashAggregate [wr_returning_customer_sk,ca_state,wr_return_amt] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [wr_returning_customer_sk,wr_return_amt,ca_state] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,ca_address_sk,ca_state] + CometProject [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + CometBroadcastHashJoin [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk,d_date_sk] + CometFilter [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + ReusedExchange [d_date_sk] #2 + ReusedExchange [ca_address_sk,ca_state] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometFilter [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + ColumnarToRow + InputAdapter + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q31.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q31.native_datafusion/explain.txt new file mode 100644 index 0000000000..0167b4b1a4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q31.native_datafusion/explain.txt @@ -0,0 +1,341 @@ +== Physical Plan == +* ColumnarToRow (62) ++- CometSort (61) + +- CometColumnarExchange (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (49) + : : : +- * BroadcastHashJoin Inner BuildRight (48) + : : : :- * BroadcastHashJoin Inner BuildRight (32) + : : : : :- * HashAggregate (16) + : : : : : +- Exchange (15) + : : : : : +- * HashAggregate (14) + : : : : : +- * ColumnarToRow (13) + : : : : : +- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (8) + : : : : +- BroadcastExchange (31) + : : : : +- * HashAggregate (30) + : : : : +- Exchange (29) + : : : : +- * HashAggregate (28) + : : : : +- * ColumnarToRow (27) + : : : : +- CometProject (26) + : : : : +- CometBroadcastHashJoin (25) + : : : : :- CometProject (23) + : : : : : +- CometBroadcastHashJoin (22) + : : : : : :- CometFilter (18) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (17) + : : : : : +- CometBroadcastExchange (21) + : : : : : +- CometFilter (20) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (19) + : : : : +- ReusedExchange (24) + : : : +- BroadcastExchange (47) + : : : +- * HashAggregate (46) + : : : +- Exchange (45) + : : : +- * HashAggregate (44) + : : : +- * ColumnarToRow (43) + : : : +- CometProject (42) + : : : +- CometBroadcastHashJoin (41) + : : : :- CometProject (39) + : : : : +- CometBroadcastHashJoin (38) + : : : : :- CometFilter (34) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (33) + : : : : +- CometBroadcastExchange (37) + : : : : +- CometFilter (36) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (35) + : : : +- ReusedExchange (40) + : : +- BroadcastExchange (52) + : : +- * HashAggregate (51) + : : +- ReusedExchange (50) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Arguments: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_addr_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [d_date_sk#4, d_year#5, d_qoy#6] + +(4) CometFilter +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 1)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) + +(5) CometBroadcastExchange +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Right output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(7) CometProject +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6], [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6] + +(8) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#7, ca_county#8] +Arguments: [ca_address_sk#7, ca_county#8] + +(9) CometFilter +Input [2]: [ca_address_sk#7, ca_county#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_county#8)) + +(10) CometBroadcastExchange +Input [2]: [ca_address_sk#7, ca_county#8] +Arguments: [ca_address_sk#7, ca_county#8] + +(11) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6] +Right output [2]: [ca_address_sk#7, ca_county#8] +Arguments: [ss_addr_sk#1], [ca_address_sk#7], Inner, BuildRight + +(12) CometProject +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_address_sk#7, ca_county#8] +Arguments: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8], [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] + +(13) ColumnarToRow [codegen id : 1] +Input [4]: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] + +(14) HashAggregate [codegen id : 1] +Input [4]: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] +Keys [3]: [ca_county#8, d_qoy#6, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#9] +Results [4]: [ca_county#8, d_qoy#6, d_year#5, sum#10] + +(15) Exchange +Input [4]: [ca_county#8, d_qoy#6, d_year#5, sum#10] +Arguments: hashpartitioning(ca_county#8, d_qoy#6, d_year#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(16) HashAggregate [codegen id : 12] +Input [4]: [ca_county#8, d_qoy#6, d_year#5, sum#10] +Keys [3]: [ca_county#8, d_qoy#6, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#11] +Results [3]: [ca_county#8, d_year#5, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#11,17,2) AS store_sales#12] + +(17) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15] +Arguments: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15] + +(18) CometFilter +Input [3]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15] +Condition : isnotnull(ss_addr_sk#13) + +(19) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Arguments: [d_date_sk#16, d_year#17, d_qoy#18] + +(20) CometFilter +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Condition : ((((isnotnull(d_qoy#18) AND isnotnull(d_year#17)) AND (d_qoy#18 = 2)) AND (d_year#17 = 2000)) AND isnotnull(d_date_sk#16)) + +(21) CometBroadcastExchange +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Arguments: [d_date_sk#16, d_year#17, d_qoy#18] + +(22) CometBroadcastHashJoin +Left output [3]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15] +Right output [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Arguments: [ss_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight + +(23) CometProject +Input [6]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15, d_date_sk#16, d_year#17, d_qoy#18] +Arguments: [ss_addr_sk#13, ss_ext_sales_price#14, d_year#17, d_qoy#18], [ss_addr_sk#13, ss_ext_sales_price#14, d_year#17, d_qoy#18] + +(24) ReusedExchange [Reuses operator id: 10] +Output [2]: [ca_address_sk#19, ca_county#20] + +(25) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#13, ss_ext_sales_price#14, d_year#17, d_qoy#18] +Right output [2]: [ca_address_sk#19, ca_county#20] +Arguments: [ss_addr_sk#13], [ca_address_sk#19], Inner, BuildRight + +(26) CometProject +Input [6]: [ss_addr_sk#13, ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_address_sk#19, ca_county#20] +Arguments: [ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_county#20], [ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_county#20] + +(27) ColumnarToRow [codegen id : 2] +Input [4]: [ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_county#20] + +(28) HashAggregate [codegen id : 2] +Input [4]: [ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_county#20] +Keys [3]: [ca_county#20, d_qoy#18, d_year#17] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#14))] +Aggregate Attributes [1]: [sum#21] +Results [4]: [ca_county#20, d_qoy#18, d_year#17, sum#22] + +(29) Exchange +Input [4]: [ca_county#20, d_qoy#18, d_year#17, sum#22] +Arguments: hashpartitioning(ca_county#20, d_qoy#18, d_year#17, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(30) HashAggregate [codegen id : 3] +Input [4]: [ca_county#20, d_qoy#18, d_year#17, sum#22] +Keys [3]: [ca_county#20, d_qoy#18, d_year#17] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#14))#11] +Results [2]: [ca_county#20, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#14))#11,17,2) AS store_sales#23] + +(31) BroadcastExchange +Input [2]: [ca_county#20, store_sales#23] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] + +(32) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ca_county#8] +Right keys [1]: [ca_county#20] +Join type: Inner +Join condition: None + +(33) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Arguments: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] + +(34) CometFilter +Input [3]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Condition : isnotnull(ss_addr_sk#24) + +(35) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#27, d_year#28, d_qoy#29] +Arguments: [d_date_sk#27, d_year#28, d_qoy#29] + +(36) CometFilter +Input [3]: [d_date_sk#27, d_year#28, d_qoy#29] +Condition : ((((isnotnull(d_qoy#29) AND isnotnull(d_year#28)) AND (d_qoy#29 = 3)) AND (d_year#28 = 2000)) AND isnotnull(d_date_sk#27)) + +(37) CometBroadcastExchange +Input [3]: [d_date_sk#27, d_year#28, d_qoy#29] +Arguments: [d_date_sk#27, d_year#28, d_qoy#29] + +(38) CometBroadcastHashJoin +Left output [3]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Right output [3]: [d_date_sk#27, d_year#28, d_qoy#29] +Arguments: [ss_sold_date_sk#26], [d_date_sk#27], Inner, BuildRight + +(39) CometProject +Input [6]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26, d_date_sk#27, d_year#28, d_qoy#29] +Arguments: [ss_addr_sk#24, ss_ext_sales_price#25, d_year#28, d_qoy#29], [ss_addr_sk#24, ss_ext_sales_price#25, d_year#28, d_qoy#29] + +(40) ReusedExchange [Reuses operator id: 10] +Output [2]: [ca_address_sk#30, ca_county#31] + +(41) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#24, ss_ext_sales_price#25, d_year#28, d_qoy#29] +Right output [2]: [ca_address_sk#30, ca_county#31] +Arguments: [ss_addr_sk#24], [ca_address_sk#30], Inner, BuildRight + +(42) CometProject +Input [6]: [ss_addr_sk#24, ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_address_sk#30, ca_county#31] +Arguments: [ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_county#31], [ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_county#31] + +(43) ColumnarToRow [codegen id : 4] +Input [4]: [ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_county#31] + +(44) HashAggregate [codegen id : 4] +Input [4]: [ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_county#31] +Keys [3]: [ca_county#31, d_qoy#29, d_year#28] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#25))] +Aggregate Attributes [1]: [sum#32] +Results [4]: [ca_county#31, d_qoy#29, d_year#28, sum#33] + +(45) Exchange +Input [4]: [ca_county#31, d_qoy#29, d_year#28, sum#33] +Arguments: hashpartitioning(ca_county#31, d_qoy#29, d_year#28, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(46) HashAggregate [codegen id : 5] +Input [4]: [ca_county#31, d_qoy#29, d_year#28, sum#33] +Keys [3]: [ca_county#31, d_qoy#29, d_year#28] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#25))#11] +Results [2]: [ca_county#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#25))#11,17,2) AS store_sales#34] + +(47) BroadcastExchange +Input [2]: [ca_county#31, store_sales#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5] + +(48) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ca_county#20] +Right keys [1]: [ca_county#31] +Join type: Inner +Join condition: None + +(49) Project [codegen id : 12] +Output [5]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34] +Input [7]: [ca_county#8, d_year#5, store_sales#12, ca_county#20, store_sales#23, ca_county#31, store_sales#34] + +(50) ReusedExchange [Reuses operator id: 15] +Output [4]: [ca_county#35, d_qoy#36, d_year#37, sum#38] + +(51) HashAggregate [codegen id : 7] +Input [4]: [ca_county#35, d_qoy#36, d_year#37, sum#38] +Keys [3]: [ca_county#35, d_qoy#36, d_year#37] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#39))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#39))#40] +Results [2]: [ca_county#35, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#39))#40,17,2) AS web_sales#41] + +(52) BroadcastExchange +Input [2]: [ca_county#35, web_sales#41] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(53) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ca_county#8] +Right keys [1]: [ca_county#35] +Join type: Inner +Join condition: None + +(54) ReusedExchange [Reuses operator id: 31] +Output [2]: [ca_county#42, web_sales#43] + +(55) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ca_county#35] +Right keys [1]: [ca_county#42] +Join type: Inner +Join condition: (CASE WHEN (web_sales#41 > 0.00) THEN (web_sales#43 / web_sales#41) END > CASE WHEN (store_sales#12 > 0.00) THEN (store_sales#23 / store_sales#12) END) + +(56) Project [codegen id : 12] +Output [8]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34, ca_county#35, web_sales#41, web_sales#43] +Input [9]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34, ca_county#35, web_sales#41, ca_county#42, web_sales#43] + +(57) ReusedExchange [Reuses operator id: 47] +Output [2]: [ca_county#44, web_sales#45] + +(58) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [ca_county#35] +Right keys [1]: [ca_county#44] +Join type: Inner +Join condition: (CASE WHEN (web_sales#43 > 0.00) THEN (web_sales#45 / web_sales#43) END > CASE WHEN (store_sales#23 > 0.00) THEN (store_sales#34 / store_sales#23) END) + +(59) Project [codegen id : 12] +Output [6]: [ca_county#8, d_year#5, (web_sales#43 / web_sales#41) AS web_q1_q2_increase#46, (store_sales#23 / store_sales#12) AS store_q1_q2_increase#47, (web_sales#45 / web_sales#43) AS web_q2_q3_increase#48, (store_sales#34 / store_sales#23) AS store_q2_q3_increase#49] +Input [10]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34, ca_county#35, web_sales#41, web_sales#43, ca_county#44, web_sales#45] + +(60) CometColumnarExchange +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#46, store_q1_q2_increase#47, web_q2_q3_increase#48, store_q2_q3_increase#49] +Arguments: rangepartitioning(ca_county#8 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=7] + +(61) CometSort +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#46, store_q1_q2_increase#47, web_q2_q3_increase#48, store_q2_q3_increase#49] +Arguments: [ca_county#8, d_year#5, web_q1_q2_increase#46, store_q1_q2_increase#47, web_q2_q3_increase#48, store_q2_q3_increase#49], [ca_county#8 ASC NULLS FIRST] + +(62) ColumnarToRow [codegen id : 13] +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#46, store_q1_q2_increase#47, web_q2_q3_increase#48, store_q2_q3_increase#49] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q31.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q31.native_datafusion/simplified.txt new file mode 100644 index 0000000000..069817737f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q31.native_datafusion/simplified.txt @@ -0,0 +1,83 @@ +WholeStageCodegen (13) + ColumnarToRow + InputAdapter + CometSort [ca_county,d_year,web_q1_q2_increase,store_q1_q2_increase,web_q2_q3_increase,store_q2_q3_increase] + CometColumnarExchange [ca_county] #1 + WholeStageCodegen (12) + Project [ca_county,d_year,web_sales,web_sales,store_sales,store_sales,web_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales] + Project [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county] + Project [ca_county,d_year,store_sales,store_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county] + BroadcastHashJoin [ca_county,ca_county] + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #2 + WholeStageCodegen (1) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #3 + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [ca_address_sk,ca_county] #4 + CometFilter [ca_address_sk,ca_county] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #6 + WholeStageCodegen (2) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #7 + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #9 + WholeStageCodegen (4) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,d_year,d_qoy,ca_county] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy,ca_address_sk,ca_county] + CometProject [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + CometBroadcastHashJoin [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_qoy] + CometFilter [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #10 + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (7) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + ReusedExchange [ca_county,d_qoy,d_year,sum] #2 + InputAdapter + ReusedExchange [ca_county,web_sales] #5 + InputAdapter + ReusedExchange [ca_county,web_sales] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q32.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q32.native_datafusion/explain.txt new file mode 100644 index 0000000000..8514993cd1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q32.native_datafusion/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +* HashAggregate (35) ++- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * ColumnarToRow (9) + : : +- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : +- BroadcastExchange (23) + : +- * Filter (22) + : +- * HashAggregate (21) + : +- Exchange (20) + : +- * HashAggregate (19) + : +- * ColumnarToRow (18) + : +- CometProject (17) + : +- CometBroadcastHashJoin (16) + : :- CometFilter (11) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (10) + : +- CometBroadcastExchange (15) + : +- CometProject (14) + : +- CometFilter (13) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (12) + +- BroadcastExchange (30) + +- * ColumnarToRow (29) + +- CometProject (28) + +- CometFilter (27) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (26) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Arguments: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] + +(2) CometFilter +Input [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Condition : (isnotnull(cs_item_sk#1) AND isnotnull(cs_ext_discount_amt#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#4, i_manufact_id#5] +Arguments: [i_item_sk#4, i_manufact_id#5] + +(4) CometFilter +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 977)) AND isnotnull(i_item_sk#4)) + +(5) CometProject +Input [2]: [i_item_sk#4, i_manufact_id#5] +Arguments: [i_item_sk#4], [i_item_sk#4] + +(6) CometBroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: [i_item_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Right output [1]: [i_item_sk#4] +Arguments: [cs_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] +Arguments: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4], [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] + +(9) ColumnarToRow [codegen id : 4] +Input [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] + +(10) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Arguments: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] + +(11) CometFilter +Input [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Condition : isnotnull(cs_item_sk#6) + +(12) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9, d_date#10] + +(13) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-01-27)) AND (d_date#10 <= 2000-04-26)) AND isnotnull(d_date_sk#9)) + +(14) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(15) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(16) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [cs_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(17) CometProject +Input [4]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8, d_date_sk#9] +Arguments: [cs_item_sk#6, cs_ext_discount_amt#7], [cs_item_sk#6, cs_ext_discount_amt#7] + +(18) ColumnarToRow [codegen id : 1] +Input [2]: [cs_item_sk#6, cs_ext_discount_amt#7] + +(19) HashAggregate [codegen id : 1] +Input [2]: [cs_item_sk#6, cs_ext_discount_amt#7] +Keys [1]: [cs_item_sk#6] +Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#7))] +Aggregate Attributes [2]: [sum#11, count#12] +Results [3]: [cs_item_sk#6, sum#13, count#14] + +(20) Exchange +Input [3]: [cs_item_sk#6, sum#13, count#14] +Arguments: hashpartitioning(cs_item_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(21) HashAggregate [codegen id : 2] +Input [3]: [cs_item_sk#6, sum#13, count#14] +Keys [1]: [cs_item_sk#6] +Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#7))] +Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#7))#15] +Results [2]: [(1.3 * cast((avg(UnscaledValue(cs_ext_discount_amt#7))#15 / 100.0) as decimal(11,6))) AS (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] + +(22) Filter [codegen id : 2] +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] +Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#16) + +(23) BroadcastExchange +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=2] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#4] +Right keys [1]: [cs_item_sk#6] +Join type: Inner +Join condition: (cast(cs_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#16) + +(25) Project [codegen id : 4] +Output [2]: [cs_ext_discount_amt#2, cs_sold_date_sk#3] +Input [5]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4, (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] + +(26) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#17, d_date#18] +Arguments: [d_date_sk#17, d_date#18] + +(27) CometFilter +Input [2]: [d_date_sk#17, d_date#18] +Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 2000-01-27)) AND (d_date#18 <= 2000-04-26)) AND isnotnull(d_date_sk#17)) + +(28) CometProject +Input [2]: [d_date_sk#17, d_date#18] +Arguments: [d_date_sk#17], [d_date_sk#17] + +(29) ColumnarToRow [codegen id : 3] +Input [1]: [d_date_sk#17] + +(30) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(31) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#3] +Right keys [1]: [d_date_sk#17] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 4] +Output [1]: [cs_ext_discount_amt#2] +Input [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, d_date_sk#17] + +(33) HashAggregate [codegen id : 4] +Input [1]: [cs_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum#19] +Results [1]: [sum#20] + +(34) Exchange +Input [1]: [sum#20] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(35) HashAggregate [codegen id : 5] +Input [1]: [sum#20] +Keys: [] +Functions [1]: [sum(UnscaledValue(cs_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_discount_amt#2))#21] +Results [1]: [MakeDecimal(sum(UnscaledValue(cs_ext_discount_amt#2))#21,17,2) AS excess discount amount#22] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q32.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q32.native_datafusion/simplified.txt new file mode 100644 index 0000000000..79e7290c35 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q32.native_datafusion/simplified.txt @@ -0,0 +1,47 @@ +WholeStageCodegen (5) + HashAggregate [sum] [sum(UnscaledValue(cs_ext_discount_amt)),excess discount amount,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [cs_ext_discount_amt] [sum,sum] + Project [cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_discount_amt,cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(1.3 * avg(cs_ext_discount_amt))] + ColumnarToRow + InputAdapter + CometProject [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + CometBroadcastHashJoin [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + CometFilter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk] #2 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [(1.3 * avg(cs_ext_discount_amt))] + HashAggregate [cs_item_sk,sum,count] [avg(UnscaledValue(cs_ext_discount_amt)),(1.3 * avg(cs_ext_discount_amt)),sum,count] + InputAdapter + Exchange [cs_item_sk] #4 + WholeStageCodegen (1) + HashAggregate [cs_item_sk,cs_ext_discount_amt] [sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [cs_item_sk,cs_ext_discount_amt] + CometBroadcastHashJoin [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk,d_date_sk] + CometFilter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q33.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q33.native_datafusion/explain.txt new file mode 100644 index 0000000000..f6b49a4d96 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q33.native_datafusion/explain.txt @@ -0,0 +1,273 @@ +== Physical Plan == +TakeOrderedAndProject (50) ++- * HashAggregate (49) + +- Exchange (48) + +- * HashAggregate (47) + +- Union (46) + :- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * ColumnarToRow (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (9) + : +- CometBroadcastExchange (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (16) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (15) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (17) + :- * HashAggregate (43) + : +- Exchange (42) + : +- * HashAggregate (41) + : +- * ColumnarToRow (40) + : +- CometProject (39) + : +- CometBroadcastHashJoin (38) + : :- CometProject (36) + : : +- CometBroadcastHashJoin (35) + : : :- CometProject (33) + : : : +- CometBroadcastHashJoin (32) + : : : :- CometFilter (30) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (29) + : : : +- ReusedExchange (31) + : : +- ReusedExchange (34) + : +- ReusedExchange (37) + +- * HashAggregate (45) + +- ReusedExchange (44) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5, d_year#6, d_moy#7] + +(4) CometFilter +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 5)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3], [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8, ca_gmt_offset#9] + +(10) CometFilter +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(11) CometProject +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8], [ca_address_sk#8] + +(12) CometBroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: [ca_address_sk#8] + +(13) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Right output [1]: [ca_address_sk#8] +Arguments: [ss_addr_sk#2], [ca_address_sk#8], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] +Arguments: [ss_item_sk#1, ss_ext_sales_price#3], [ss_item_sk#1, ss_ext_sales_price#3] + +(15) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#10, i_manufact_id#11] +Arguments: [i_item_sk#10, i_manufact_id#11] + +(16) CometFilter +Input [2]: [i_item_sk#10, i_manufact_id#11] +Condition : isnotnull(i_item_sk#10) + +(17) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_category#12, i_manufact_id#13] +Arguments: [i_category#12, i_manufact_id#13] + +(18) CometFilter +Input [2]: [i_category#12, i_manufact_id#13] +Condition : (isnotnull(i_category#12) AND (i_category#12 = Electronics )) + +(19) CometProject +Input [2]: [i_category#12, i_manufact_id#13] +Arguments: [i_manufact_id#13], [i_manufact_id#13] + +(20) CometBroadcastExchange +Input [1]: [i_manufact_id#13] +Arguments: [i_manufact_id#13] + +(21) CometBroadcastHashJoin +Left output [2]: [i_item_sk#10, i_manufact_id#11] +Right output [1]: [i_manufact_id#13] +Arguments: [i_manufact_id#11], [i_manufact_id#13], LeftSemi, BuildRight + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#10, i_manufact_id#11] +Arguments: [i_item_sk#10, i_manufact_id#11] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Right output [2]: [i_item_sk#10, i_manufact_id#11] +Arguments: [ss_item_sk#1], [i_item_sk#10], Inner, BuildRight + +(24) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_manufact_id#11] +Arguments: [ss_ext_sales_price#3, i_manufact_id#11], [ss_ext_sales_price#3, i_manufact_id#11] + +(25) ColumnarToRow [codegen id : 1] +Input [2]: [ss_ext_sales_price#3, i_manufact_id#11] + +(26) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_sales_price#3, i_manufact_id#11] +Keys [1]: [i_manufact_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [2]: [i_manufact_id#11, sum#15] + +(27) Exchange +Input [2]: [i_manufact_id#11, sum#15] +Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(28) HashAggregate [codegen id : 2] +Input [2]: [i_manufact_id#11, sum#15] +Keys [1]: [i_manufact_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [2]: [i_manufact_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS total_sales#17] + +(29) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Arguments: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] + +(30) CometFilter +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Condition : (isnotnull(cs_bill_addr_sk#18) AND isnotnull(cs_item_sk#19)) + +(31) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#22] + +(32) CometBroadcastHashJoin +Left output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Right output [1]: [d_date_sk#22] +Arguments: [cs_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + +(33) CometProject +Input [5]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21, d_date_sk#22] +Arguments: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20], [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] + +(34) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#23] + +(35) CometBroadcastHashJoin +Left output [3]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Right output [1]: [ca_address_sk#23] +Arguments: [cs_bill_addr_sk#18], [ca_address_sk#23], Inner, BuildRight + +(36) CometProject +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, ca_address_sk#23] +Arguments: [cs_item_sk#19, cs_ext_sales_price#20], [cs_item_sk#19, cs_ext_sales_price#20] + +(37) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#24, i_manufact_id#25] + +(38) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#19, cs_ext_sales_price#20] +Right output [2]: [i_item_sk#24, i_manufact_id#25] +Arguments: [cs_item_sk#19], [i_item_sk#24], Inner, BuildRight + +(39) CometProject +Input [4]: [cs_item_sk#19, cs_ext_sales_price#20, i_item_sk#24, i_manufact_id#25] +Arguments: [cs_ext_sales_price#20, i_manufact_id#25], [cs_ext_sales_price#20, i_manufact_id#25] + +(40) ColumnarToRow [codegen id : 3] +Input [2]: [cs_ext_sales_price#20, i_manufact_id#25] + +(41) HashAggregate [codegen id : 3] +Input [2]: [cs_ext_sales_price#20, i_manufact_id#25] +Keys [1]: [i_manufact_id#25] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum#26] +Results [2]: [i_manufact_id#25, sum#27] + +(42) Exchange +Input [2]: [i_manufact_id#25, sum#27] +Arguments: hashpartitioning(i_manufact_id#25, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(43) HashAggregate [codegen id : 4] +Input [2]: [i_manufact_id#25, sum#27] +Keys [1]: [i_manufact_id#25] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#20))#28] +Results [2]: [i_manufact_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#20))#28,17,2) AS total_sales#29] + +(44) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_manufact_id#30, sum#31] + +(45) HashAggregate [codegen id : 6] +Input [2]: [i_manufact_id#30, sum#31] +Keys [1]: [i_manufact_id#30] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#32))#33] +Results [2]: [i_manufact_id#30, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#32))#33,17,2) AS total_sales#34] + +(46) Union + +(47) HashAggregate [codegen id : 7] +Input [2]: [i_manufact_id#11, total_sales#17] +Keys [1]: [i_manufact_id#11] +Functions [1]: [partial_sum(total_sales#17)] +Aggregate Attributes [2]: [sum#35, isEmpty#36] +Results [3]: [i_manufact_id#11, sum#37, isEmpty#38] + +(48) Exchange +Input [3]: [i_manufact_id#11, sum#37, isEmpty#38] +Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(49) HashAggregate [codegen id : 8] +Input [3]: [i_manufact_id#11, sum#37, isEmpty#38] +Keys [1]: [i_manufact_id#11] +Functions [1]: [sum(total_sales#17)] +Aggregate Attributes [1]: [sum(total_sales#17)#39] +Results [2]: [i_manufact_id#11, sum(total_sales#17)#39 AS total_sales#40] + +(50) TakeOrderedAndProject +Input [2]: [i_manufact_id#11, total_sales#40] +Arguments: 100, [total_sales#40 ASC NULLS FIRST], [i_manufact_id#11, total_sales#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q33.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q33.native_datafusion/simplified.txt new file mode 100644 index 0000000000..363d49dbb0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q33.native_datafusion/simplified.txt @@ -0,0 +1,64 @@ +TakeOrderedAndProject [total_sales,i_manufact_id] + WholeStageCodegen (8) + HashAggregate [i_manufact_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (7) + HashAggregate [i_manufact_id,total_sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (2) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #2 + WholeStageCodegen (1) + HashAggregate [i_manufact_id,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,i_manufact_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_manufact_id] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ca_address_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ca_address_sk] #4 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [i_item_sk,i_manufact_id] #5 + CometBroadcastHashJoin [i_item_sk,i_manufact_id,i_manufact_id] + CometFilter [i_item_sk,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_manufact_id] + CometBroadcastExchange [i_manufact_id] #6 + CometProject [i_manufact_id] + CometFilter [i_category,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_category,i_manufact_id] + WholeStageCodegen (4) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #7 + WholeStageCodegen (3) + HashAggregate [i_manufact_id,cs_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [cs_ext_sales_price,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,i_item_sk,i_manufact_id] + CometProject [cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,ca_address_sk] + CometProject [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_manufact_id] #5 + WholeStageCodegen (6) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + ReusedExchange [i_manufact_id,sum] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q34.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q34.native_datafusion/explain.txt new file mode 100644 index 0000000000..4d010531bc --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q34.native_datafusion/explain.txt @@ -0,0 +1,168 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometSort (31) + +- CometColumnarExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (24) + : +- CometHashAggregate (23) + : +- CometExchange (22) + : +- CometHashAggregate (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6, d_year#7, d_dom#8] + +(4) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9, s_county#10] + +(10) CometFilter +Input [2]: [s_store_sk#9, s_county#10] +Condition : ((isnotnull(s_county#10) AND (s_county#10 = Williamson County)) AND isnotnull(s_store_sk#9)) + +(11) CometProject +Input [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9], [s_store_sk#9] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: [s_store_sk#9] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Right output [1]: [s_store_sk#9] +Arguments: [ss_store_sk#3], [s_store_sk#9], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(16) CometFilter +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(hd_dep_count#13 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(hd_vehicle_count#14 as double)))))) > 1.2) END) AND isnotnull(hd_demo_sk#11)) + +(17) CometProject +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11], [hd_demo_sk#11] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: [hd_demo_sk#11] + +(19) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Right output [1]: [hd_demo_sk#11] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#11], Inner, BuildRight + +(20) CometProject +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Arguments: [ss_customer_sk#1, ss_ticket_number#4], [ss_customer_sk#1, ss_ticket_number#4] + +(21) CometHashAggregate +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] + +(22) CometExchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] + +(24) CometFilter +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Condition : ((cnt#16 >= 15) AND (cnt#16 <= 20)) + +(25) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(26) CometFilter +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Condition : isnotnull(c_customer_sk#17) + +(27) CometBroadcastExchange +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(28) CometBroadcastHashJoin +Left output [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Right output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [ss_customer_sk#1], [c_customer_sk#17], Inner, BuildRight + +(29) CometProject +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16, c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + +(30) CometColumnarExchange +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: rangepartitioning(c_last_name#20 ASC NULLS FIRST, c_first_name#19 ASC NULLS FIRST, c_salutation#18 ASC NULLS FIRST, c_preferred_cust_flag#21 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(31) CometSort +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20 ASC NULLS FIRST, c_first_name#19 ASC NULLS FIRST, c_salutation#18 ASC NULLS FIRST, c_preferred_cust_flag#21 DESC NULLS LAST] + +(32) ColumnarToRow [codegen id : 1] +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q34.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q34.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bf05de8c6e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q34.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometColumnarExchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] #1 + CometProject [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,cnt,c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometFilter [ss_ticket_number,ss_customer_sk,cnt] + CometHashAggregate [ss_ticket_number,ss_customer_sk,cnt,count,count(1)] + CometExchange [ss_ticket_number,ss_customer_sk] #2 + CometHashAggregate [ss_ticket_number,ss_customer_sk,count] + CometProject [ss_customer_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_county] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_county] + CometBroadcastExchange [hd_demo_sk] #5 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] #6 + CometFilter [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q35.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q35.native_datafusion/explain.txt new file mode 100644 index 0000000000..aae420dfb5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q35.native_datafusion/explain.txt @@ -0,0 +1,209 @@ +== Physical Plan == +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * Filter (22) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (21) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (13) + : : : +- ReusedExchange (14) + : : +- ReusedExchange (20) + : +- BroadcastExchange (27) + : +- * ColumnarToRow (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(2) CometFilter +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#6, ss_sold_date_sk#7] + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Arguments: [d_date_sk#8, d_year#9, d_qoy#10] + +(5) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ss_customer_sk#6], [ss_customer_sk#6] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Right output [1]: [ss_customer_sk#6] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(13) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Arguments: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#13] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Right output [1]: [d_date_sk#13] +Arguments: [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Arguments: [ws_bill_customer_sk#11], [ws_bill_customer_sk#11] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#11] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(20) ReusedExchange [Reuses operator id: 18] +Output [1]: [cs_ship_customer_sk#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(22) Filter [codegen id : 5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(23) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#15, ca_state#16] +Arguments: [ca_address_sk#15, ca_state#16] + +(25) CometFilter +Input [2]: [ca_address_sk#15, ca_state#16] +Condition : isnotnull(ca_address_sk#15) + +(26) ColumnarToRow [codegen id : 3] +Input [2]: [ca_address_sk#15, ca_state#16] + +(27) BroadcastExchange +Input [2]: [ca_address_sk#15, ca_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#15] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, ca_state#16] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#15, ca_state#16] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Arguments: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] + +(31) CometFilter +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Condition : isnotnull(cd_demo_sk#17) + +(32) ColumnarToRow [codegen id : 4] +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] + +(33) BroadcastExchange +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 5] +Output [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Input [8]: [c_current_cdemo_sk#4, ca_state#16, cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] + +(36) HashAggregate [codegen id : 5] +Input [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Keys [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Functions [10]: [partial_count(1), partial_min(cd_dep_count#20), partial_max(cd_dep_count#20), partial_avg(cd_dep_count#20), partial_min(cd_dep_employed_count#21), partial_max(cd_dep_employed_count#21), partial_avg(cd_dep_employed_count#21), partial_min(cd_dep_college_count#22), partial_max(cd_dep_college_count#22), partial_avg(cd_dep_college_count#22)] +Aggregate Attributes [13]: [count#23, min#24, max#25, sum#26, count#27, min#28, max#29, sum#30, count#31, min#32, max#33, sum#34, count#35] +Results [19]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, count#36, min#37, max#38, sum#39, count#40, min#41, max#42, sum#43, count#44, min#45, max#46, sum#47, count#48] + +(37) Exchange +Input [19]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, count#36, min#37, max#38, sum#39, count#40, min#41, max#42, sum#43, count#44, min#45, max#46, sum#47, count#48] +Arguments: hashpartitioning(ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 6] +Input [19]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, count#36, min#37, max#38, sum#39, count#40, min#41, max#42, sum#43, count#44, min#45, max#46, sum#47, count#48] +Keys [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Functions [10]: [count(1), min(cd_dep_count#20), max(cd_dep_count#20), avg(cd_dep_count#20), min(cd_dep_employed_count#21), max(cd_dep_employed_count#21), avg(cd_dep_employed_count#21), min(cd_dep_college_count#22), max(cd_dep_college_count#22), avg(cd_dep_college_count#22)] +Aggregate Attributes [10]: [count(1)#49, min(cd_dep_count#20)#50, max(cd_dep_count#20)#51, avg(cd_dep_count#20)#52, min(cd_dep_employed_count#21)#53, max(cd_dep_employed_count#21)#54, avg(cd_dep_employed_count#21)#55, min(cd_dep_college_count#22)#56, max(cd_dep_college_count#22)#57, avg(cd_dep_college_count#22)#58] +Results [18]: [ca_state#16, cd_gender#18, cd_marital_status#19, count(1)#49 AS cnt1#59, min(cd_dep_count#20)#50 AS min(cd_dep_count)#60, max(cd_dep_count#20)#51 AS max(cd_dep_count)#61, avg(cd_dep_count#20)#52 AS avg(cd_dep_count)#62, cd_dep_employed_count#21, count(1)#49 AS cnt2#63, min(cd_dep_employed_count#21)#53 AS min(cd_dep_employed_count)#64, max(cd_dep_employed_count#21)#54 AS max(cd_dep_employed_count)#65, avg(cd_dep_employed_count#21)#55 AS avg(cd_dep_employed_count)#66, cd_dep_college_count#22, count(1)#49 AS cnt3#67, min(cd_dep_college_count#22)#56 AS min(cd_dep_college_count)#68, max(cd_dep_college_count#22)#57 AS max(cd_dep_college_count)#69, avg(cd_dep_college_count#22)#58 AS avg(cd_dep_college_count)#70, cd_dep_count#20] + +(39) TakeOrderedAndProject +Input [18]: [ca_state#16, cd_gender#18, cd_marital_status#19, cnt1#59, min(cd_dep_count)#60, max(cd_dep_count)#61, avg(cd_dep_count)#62, cd_dep_employed_count#21, cnt2#63, min(cd_dep_employed_count)#64, max(cd_dep_employed_count)#65, avg(cd_dep_employed_count)#66, cd_dep_college_count#22, cnt3#67, min(cd_dep_college_count)#68, max(cd_dep_college_count)#69, avg(cd_dep_college_count)#70, cd_dep_count#20] +Arguments: 100, [ca_state#16 ASC NULLS FIRST, cd_gender#18 ASC NULLS FIRST, cd_marital_status#19 ASC NULLS FIRST, cd_dep_count#20 ASC NULLS FIRST, cd_dep_employed_count#21 ASC NULLS FIRST, cd_dep_college_count#22 ASC NULLS FIRST], [ca_state#16, cd_gender#18, cd_marital_status#19, cnt1#59, min(cd_dep_count)#60, max(cd_dep_count)#61, avg(cd_dep_count)#62, cd_dep_employed_count#21, cnt2#63, min(cd_dep_employed_count)#64, max(cd_dep_employed_count)#65, avg(cd_dep_employed_count)#66, cd_dep_college_count#22, cnt3#67, min(cd_dep_college_count)#68, max(cd_dep_college_count)#69, avg(cd_dep_college_count)#70] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q35.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q35.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6767df5cc1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q35.native_datafusion/simplified.txt @@ -0,0 +1,53 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count)] + WholeStageCodegen (6) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] [count(1),min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (5) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,min,max,sum,count,min,max,sum,count,min,max,sum,count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [cs_ship_customer_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q36.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q36.native_datafusion/explain.txt new file mode 100644 index 0000000000..c61da07a87 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q36.native_datafusion/explain.txt @@ -0,0 +1,156 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * Project (28) + +- Window (27) + +- * Sort (26) + +- Exchange (25) + +- * HashAggregate (24) + +- Exchange (23) + +- * HashAggregate (22) + +- * ColumnarToRow (21) + +- CometExpand (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + +- CometBroadcastExchange (17) + +- CometProject (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`store` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#6, d_year#7] +Arguments: [d_date_sk#6, d_year#7] + +(4) CometFilter +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [2]: [d_date_sk#6, d_year#7] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4], [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [i_item_sk#8, i_class#9, i_category#10] + +(10) CometFilter +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Condition : isnotnull(i_item_sk#8) + +(11) CometBroadcastExchange +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [i_item_sk#8, i_class#9, i_category#10] + +(12) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Right output [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [ss_item_sk#1], [i_item_sk#8], Inner, BuildRight + +(13) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] +Arguments: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10], [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] + +(14) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#11, s_state#12] +Arguments: [s_store_sk#11, s_state#12] + +(15) CometFilter +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) + +(16) CometProject +Input [2]: [s_store_sk#11, s_state#12] +Arguments: [s_store_sk#11], [s_store_sk#11] + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(18) CometBroadcastHashJoin +Left output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#2], [s_store_sk#11], Inner, BuildRight + +(19) CometProject +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] +Arguments: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9], [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] + +(20) CometExpand +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] +Arguments: [[ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9, 0], [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, null, 1], [ss_ext_sales_price#3, ss_net_profit#4, null, null, 3]], [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] + +(21) ColumnarToRow [codegen id : 1] +Input [5]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] + +(22) HashAggregate [codegen id : 1] +Input [5]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum#16, sum#17] +Results [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] + +(23) Exchange +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] +Arguments: hashpartitioning(i_category#13, i_class#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(24) HashAggregate [codegen id : 2] +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21] +Results [7]: [(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2)) AS gross_margin#22, i_category#13, i_class#14, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS lochierarchy#23, (MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2)) AS _w0#24, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS _w1#25, CASE WHEN (cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint) = 0) THEN i_category#13 END AS _w2#26] + +(25) Exchange +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26] +Arguments: hashpartitioning(_w1#25, _w2#26, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(26) Sort [codegen id : 3] +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26] +Arguments: [_w1#25 ASC NULLS FIRST, _w2#26 ASC NULLS FIRST, _w0#24 ASC NULLS FIRST], false, 0 + +(27) Window +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26] +Arguments: [rank(_w0#24) windowspecdefinition(_w1#25, _w2#26, _w0#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#27], [_w1#25, _w2#26], [_w0#24 ASC NULLS FIRST] + +(28) Project [codegen id : 4] +Output [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] +Input [8]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26, rank_within_parent#27] + +(29) TakeOrderedAndProject +Input [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] +Arguments: 100, [lochierarchy#23 DESC NULLS LAST, CASE WHEN (lochierarchy#23 = 0) THEN i_category#13 END ASC NULLS FIRST, rank_within_parent#27 ASC NULLS FIRST], [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q36.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q36.native_datafusion/simplified.txt new file mode 100644 index 0000000000..255fe0d657 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q36.native_datafusion/simplified.txt @@ -0,0 +1,37 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] + WholeStageCodegen (4) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (3) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (2) + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,lochierarchy,_w0,_w1,_w2,sum,sum] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen (1) + HashAggregate [i_category,i_class,spark_grouping_id,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExpand [i_category,i_class] [ss_ext_sales_price,ss_net_profit,i_category,i_class,spark_grouping_id] + CometProject [ss_ext_sales_price,ss_net_profit,i_category,i_class] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category,s_store_sk] + CometProject [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,i_item_sk,i_class,i_category] + CometProject [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_class,i_category] #4 + CometFilter [i_item_sk,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_class,i_category] + CometBroadcastExchange [s_store_sk] #5 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q37.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q37.native_datafusion/explain.txt new file mode 100644 index 0000000000..bc7fc8f71a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q37.native_datafusion/explain.txt @@ -0,0 +1,137 @@ +== Physical Plan == +* ColumnarToRow (26) ++- CometTakeOrderedAndProject (25) + +- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometBroadcastHashJoin (20) + :- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : +- CometBroadcastExchange (7) + : : +- CometProject (6) + : : +- CometFilter (5) + : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (4) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (10) + +- CometProject (19) + +- CometFilter (18) + +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (17) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(2) CometFilter +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 68.00)) AND (i_current_price#4 <= 98.00)) AND i_manufact_id#5 IN (677,940,694,808)) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(4) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(5) CometFilter +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) + +(6) CometProject +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8], [inv_item_sk#6, inv_date_sk#8] + +(7) CometBroadcastExchange +Input [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1], [inv_item_sk#6], Inner, BuildRight + +(9) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] + +(10) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9, d_date#10] + +(11) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-02-01)) AND (d_date#10 <= 2000-04-01)) AND isnotnull(d_date_sk#9)) + +(12) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(14) CometBroadcastHashJoin +Left output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [inv_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(15) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#9] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(16) CometBroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(17) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Arguments: [cs_item_sk#11, cs_sold_date_sk#12] + +(18) CometFilter +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Condition : isnotnull(cs_item_sk#11) + +(19) CometProject +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Arguments: [cs_item_sk#11], [cs_item_sk#11] + +(20) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [1]: [cs_item_sk#11] +Arguments: [i_item_sk#1], [cs_item_sk#11], Inner, BuildLeft + +(21) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#11] +Arguments: [i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(22) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(23) CometExchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(25) CometTakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#2 ASC NULLS FIRST], output=[i_item_id#2,i_item_desc#3,i_current_price#4]), [i_item_id#2, i_item_desc#3, i_current_price#4], 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(26) ColumnarToRow [codegen id : 1] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q37.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q37.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2a8cf3e62b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q37.native_datafusion/simplified.txt @@ -0,0 +1,28 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometExchange [i_item_id,i_item_desc,i_current_price] #1 + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometProject [i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,cs_item_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price] #2 + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk,d_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_item_sk,inv_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometBroadcastExchange [inv_item_sk,inv_date_sk] #3 + CometProject [inv_item_sk,inv_date_sk] + CometFilter [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q38.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q38.native_datafusion/explain.txt new file mode 100644 index 0000000000..dab0b8c9ad --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q38.native_datafusion/explain.txt @@ -0,0 +1,143 @@ +== Physical Plan == +* ColumnarToRow (27) ++- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometProject (23) + +- CometBroadcastHashJoin (22) + :- CometBroadcastHashJoin (20) + : :- CometHashAggregate (16) + : : +- CometExchange (15) + : : +- CometHashAggregate (14) + : : +- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (9) + : +- CometBroadcastExchange (19) + : +- CometHashAggregate (18) + : +- ReusedExchange (17) + +- ReusedExchange (21) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Arguments: [ss_customer_sk#1, ss_sold_date_sk#2] + +(2) CometFilter +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Condition : isnotnull(ss_customer_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Arguments: [d_date_sk#3, d_date#4, d_month_seq#5] + +(4) CometFilter +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(5) CometProject +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Arguments: [d_date_sk#3, d_date#4], [d_date_sk#3, d_date#4] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: [d_date_sk#3, d_date#4] + +(7) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Right output [2]: [d_date_sk#3, d_date#4] +Arguments: [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#3, d_date#4] +Arguments: [ss_customer_sk#1, d_date#4], [ss_customer_sk#1, d_date#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(10) CometFilter +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) + +(11) CometBroadcastExchange +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, d_date#4] +Right output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [ss_customer_sk#1], [c_customer_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ss_customer_sk#1, d_date#4, c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_last_name#8, c_first_name#7, d_date#4], [c_last_name#8, c_first_name#7, d_date#4] + +(14) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(15) CometExchange +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(17) ReusedExchange [Reuses operator id: 15] +Output [3]: [c_last_name#9, c_first_name#10, d_date#11] + +(18) CometHashAggregate +Input [3]: [c_last_name#9, c_first_name#10, d_date#11] +Keys [3]: [c_last_name#9, c_first_name#10, d_date#11] +Functions: [] + +(19) CometBroadcastExchange +Input [3]: [c_last_name#9, c_first_name#10, d_date#11] +Arguments: [c_last_name#9, c_first_name#10, d_date#11] + +(20) CometBroadcastHashJoin +Left output [3]: [c_last_name#8, c_first_name#7, d_date#4] +Right output [3]: [c_last_name#9, c_first_name#10, d_date#11] +Arguments: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)], [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#10, ), isnull(c_first_name#10), coalesce(d_date#11, 1970-01-01), isnull(d_date#11)], LeftSemi, BuildRight + +(21) ReusedExchange [Reuses operator id: 19] +Output [3]: [c_last_name#12, c_first_name#13, d_date#14] + +(22) CometBroadcastHashJoin +Left output [3]: [c_last_name#8, c_first_name#7, d_date#4] +Right output [3]: [c_last_name#12, c_first_name#13, d_date#14] +Arguments: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)], [coalesce(c_last_name#12, ), isnull(c_last_name#12), coalesce(c_first_name#13, ), isnull(c_first_name#13), coalesce(d_date#14, 1970-01-01), isnull(d_date#14)], LeftSemi, BuildRight + +(23) CometProject +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(24) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(25) CometExchange +Input [1]: [count#15] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(26) CometHashAggregate +Input [1]: [count#15] +Keys: [] +Functions [1]: [count(1)] + +(27) ColumnarToRow [codegen id : 1] +Input [1]: [count(1)#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q38.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q38.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c2697a2c1c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q38.native_datafusion/simplified.txt @@ -0,0 +1,29 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [count(1),count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + CometHashAggregate [c_last_name,c_first_name,d_date] + CometExchange [c_last_name,c_first_name,d_date] #2 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometProject [c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [ss_customer_sk,d_date,c_customer_sk,c_first_name,c_last_name] + CometProject [ss_customer_sk,d_date] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_customer_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #3 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_month_seq] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name] #4 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name] + CometBroadcastExchange [c_last_name,c_first_name,d_date] #5 + CometHashAggregate [c_last_name,c_first_name,d_date] + ReusedExchange [c_last_name,c_first_name,d_date] #2 + ReusedExchange [c_last_name,c_first_name,d_date] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39a.native_datafusion/explain.txt new file mode 100644 index 0000000000..3448dda988 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39a.native_datafusion/explain.txt @@ -0,0 +1,263 @@ +== Physical Plan == +* ColumnarToRow (49) ++- CometSort (48) + +- CometColumnarExchange (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (24) + : +- * Filter (23) + : +- * HashAggregate (22) + : +- Exchange (21) + : +- * HashAggregate (20) + : +- * ColumnarToRow (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + +- BroadcastExchange (45) + +- * Project (44) + +- * Filter (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * ColumnarToRow (39) + +- CometProject (38) + +- CometBroadcastHashJoin (37) + :- CometProject (32) + : +- CometBroadcastHashJoin (31) + : :- CometProject (29) + : : +- CometBroadcastHashJoin (28) + : : :- CometFilter (26) + : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (25) + : : +- ReusedExchange (27) + : +- ReusedExchange (30) + +- CometBroadcastExchange (36) + +- CometProject (35) + +- CometFilter (34) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (33) + + +(1) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [1]: [i_item_sk#5] +Arguments: [i_item_sk#5] + +(4) CometFilter +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(5) CometBroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: [i_item_sk#5] + +(6) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [i_item_sk#5] +Arguments: [inv_item_sk#1], [i_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5], [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] + +(8) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(9) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(11) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#6], Inner, BuildRight + +(12) CometProject +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7], [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(14) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#8)) + +(15) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10], [d_date_sk#8, d_moy#10] + +(16) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10] + +(17) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Right output [2]: [d_date_sk#8, d_moy#10] +Arguments: [inv_date_sk#4], [d_date_sk#8], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_date_sk#8, d_moy#10] +Arguments: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10], [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] + +(19) ColumnarToRow [codegen id : 1] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] + +(20) HashAggregate [codegen id : 1] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [5]: [n#11, avg#12, m2#13, sum#14, count#15] +Results [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] + +(21) Exchange +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Arguments: hashpartitioning(w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(22) HashAggregate [codegen id : 4] +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#21, avg(inv_quantity_on_hand#3)#22] +Results [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stddev_samp(cast(inv_quantity_on_hand#3 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#3)#22 AS mean#24] + +(23) Filter [codegen id : 4] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] +Condition : CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#24)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#23 / knownfloatingpointnormalized(normalizenanandzero(mean#24))))) > 1.0) END + +(24) Project [codegen id : 4] +Output [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#24)) = 0.0) THEN null ELSE (stdev#23 / knownfloatingpointnormalized(normalizenanandzero(mean#24))) END AS cov#25] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] + +(25) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Arguments: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] + +(26) CometFilter +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Condition : (isnotnull(inv_item_sk#26) AND isnotnull(inv_warehouse_sk#27)) + +(27) ReusedExchange [Reuses operator id: 5] +Output [1]: [i_item_sk#30] + +(28) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Right output [1]: [i_item_sk#30] +Arguments: [inv_item_sk#26], [i_item_sk#30], Inner, BuildRight + +(29) CometProject +Input [5]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] +Arguments: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30], [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] + +(30) ReusedExchange [Reuses operator id: 10] +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(31) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] +Right output [2]: [w_warehouse_sk#31, w_warehouse_name#32] +Arguments: [inv_warehouse_sk#27], [w_warehouse_sk#31], Inner, BuildRight + +(32) CometProject +Input [6]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Arguments: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32], [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] + +(33) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Arguments: [d_date_sk#33, d_year#34, d_moy#35] + +(34) CometFilter +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(35) CometProject +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Arguments: [d_date_sk#33, d_moy#35], [d_date_sk#33, d_moy#35] + +(36) CometBroadcastExchange +Input [2]: [d_date_sk#33, d_moy#35] +Arguments: [d_date_sk#33, d_moy#35] + +(37) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Right output [2]: [d_date_sk#33, d_moy#35] +Arguments: [inv_date_sk#29], [d_date_sk#33], Inner, BuildRight + +(38) CometProject +Input [7]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] +Arguments: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35], [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] + +(39) ColumnarToRow [codegen id : 2] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] + +(40) HashAggregate [codegen id : 2] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#28 as double)), partial_avg(inv_quantity_on_hand#28)] +Aggregate Attributes [5]: [n#36, avg#37, m2#38, sum#39, count#40] +Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] + +(41) Exchange +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(42) HashAggregate [codegen id : 3] +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double)), avg(inv_quantity_on_hand#28)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double))#21, avg(inv_quantity_on_hand#28)#22] +Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#28 as double))#21 AS stdev#46, avg(inv_quantity_on_hand#28)#22 AS mean#47] + +(43) Filter [codegen id : 3] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#46, mean#47] +Condition : CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#47)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#46 / knownfloatingpointnormalized(normalizenanandzero(mean#47))))) > 1.0) END + +(44) Project [codegen id : 3] +Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#47)) = 0.0) THEN null ELSE (stdev#46 / knownfloatingpointnormalized(normalizenanandzero(mean#47))) END AS cov#48] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#46, mean#47] + +(45) BroadcastExchange +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, cov#48] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=3] + +(46) BroadcastHashJoin [codegen id : 4] +Left keys [2]: [i_item_sk#5, w_warehouse_sk#6] +Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] +Join type: Inner +Join condition: None + +(47) CometColumnarExchange +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, cov#48] +Arguments: rangepartitioning(w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#47 ASC NULLS FIRST, cov#48 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] + +(48) CometSort +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, cov#48] +Arguments: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, cov#48], [w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#47 ASC NULLS FIRST, cov#48 ASC NULLS FIRST] + +(49) ColumnarToRow [codegen id : 5] +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, cov#48] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..d7520db711 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39a.native_datafusion/simplified.txt @@ -0,0 +1,60 @@ +WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometColumnarExchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + WholeStageCodegen (4) + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen (1) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + ColumnarToRow + InputAdapter + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [i_item_sk] #3 + CometFilter [i_item_sk] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [d_date_sk,d_moy] #5 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen (2) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + ColumnarToRow + InputAdapter + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + ReusedExchange [i_item_sk] #3 + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + CometBroadcastExchange [d_date_sk,d_moy] #8 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39b.native_datafusion/explain.txt new file mode 100644 index 0000000000..d803964b9b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39b.native_datafusion/explain.txt @@ -0,0 +1,263 @@ +== Physical Plan == +* ColumnarToRow (49) ++- CometSort (48) + +- CometColumnarExchange (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (24) + : +- * Filter (23) + : +- * HashAggregate (22) + : +- Exchange (21) + : +- * HashAggregate (20) + : +- * ColumnarToRow (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + +- BroadcastExchange (45) + +- * Project (44) + +- * Filter (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * ColumnarToRow (39) + +- CometProject (38) + +- CometBroadcastHashJoin (37) + :- CometProject (32) + : +- CometBroadcastHashJoin (31) + : :- CometProject (29) + : : +- CometBroadcastHashJoin (28) + : : :- CometFilter (26) + : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (25) + : : +- ReusedExchange (27) + : +- ReusedExchange (30) + +- CometBroadcastExchange (36) + +- CometProject (35) + +- CometFilter (34) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (33) + + +(1) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [1]: [i_item_sk#5] +Arguments: [i_item_sk#5] + +(4) CometFilter +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(5) CometBroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: [i_item_sk#5] + +(6) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [i_item_sk#5] +Arguments: [inv_item_sk#1], [i_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5], [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] + +(8) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(9) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(11) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#6], Inner, BuildRight + +(12) CometProject +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7], [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(14) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#8)) + +(15) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10], [d_date_sk#8, d_moy#10] + +(16) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_moy#10] +Arguments: [d_date_sk#8, d_moy#10] + +(17) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Right output [2]: [d_date_sk#8, d_moy#10] +Arguments: [inv_date_sk#4], [d_date_sk#8], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_date_sk#8, d_moy#10] +Arguments: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10], [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] + +(19) ColumnarToRow [codegen id : 1] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] + +(20) HashAggregate [codegen id : 1] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [5]: [n#11, avg#12, m2#13, sum#14, count#15] +Results [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] + +(21) Exchange +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Arguments: hashpartitioning(w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(22) HashAggregate [codegen id : 4] +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#21, avg(inv_quantity_on_hand#3)#22] +Results [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stddev_samp(cast(inv_quantity_on_hand#3 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#3)#22 AS mean#24] + +(23) Filter [codegen id : 4] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] +Condition : (CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#24)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#23 / knownfloatingpointnormalized(normalizenanandzero(mean#24))))) > 1.0) END AND CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#24)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#23 / knownfloatingpointnormalized(normalizenanandzero(mean#24))))) > 1.5) END) + +(24) Project [codegen id : 4] +Output [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#24)) = 0.0) THEN null ELSE (stdev#23 / knownfloatingpointnormalized(normalizenanandzero(mean#24))) END AS cov#25] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] + +(25) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Arguments: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] + +(26) CometFilter +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Condition : (isnotnull(inv_item_sk#26) AND isnotnull(inv_warehouse_sk#27)) + +(27) ReusedExchange [Reuses operator id: 5] +Output [1]: [i_item_sk#30] + +(28) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Right output [1]: [i_item_sk#30] +Arguments: [inv_item_sk#26], [i_item_sk#30], Inner, BuildRight + +(29) CometProject +Input [5]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] +Arguments: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30], [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] + +(30) ReusedExchange [Reuses operator id: 10] +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(31) CometBroadcastHashJoin +Left output [4]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] +Right output [2]: [w_warehouse_sk#31, w_warehouse_name#32] +Arguments: [inv_warehouse_sk#27], [w_warehouse_sk#31], Inner, BuildRight + +(32) CometProject +Input [6]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Arguments: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32], [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] + +(33) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Arguments: [d_date_sk#33, d_year#34, d_moy#35] + +(34) CometFilter +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(35) CometProject +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Arguments: [d_date_sk#33, d_moy#35], [d_date_sk#33, d_moy#35] + +(36) CometBroadcastExchange +Input [2]: [d_date_sk#33, d_moy#35] +Arguments: [d_date_sk#33, d_moy#35] + +(37) CometBroadcastHashJoin +Left output [5]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Right output [2]: [d_date_sk#33, d_moy#35] +Arguments: [inv_date_sk#29], [d_date_sk#33], Inner, BuildRight + +(38) CometProject +Input [7]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] +Arguments: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35], [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] + +(39) ColumnarToRow [codegen id : 2] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] + +(40) HashAggregate [codegen id : 2] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#28 as double)), partial_avg(inv_quantity_on_hand#28)] +Aggregate Attributes [5]: [n#36, avg#37, m2#38, sum#39, count#40] +Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] + +(41) Exchange +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(42) HashAggregate [codegen id : 3] +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double)), avg(inv_quantity_on_hand#28)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double))#21, avg(inv_quantity_on_hand#28)#22] +Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#28 as double))#21 AS stdev#46, avg(inv_quantity_on_hand#28)#22 AS mean#47] + +(43) Filter [codegen id : 3] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#46, mean#47] +Condition : CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#47)) = 0.0) THEN false ELSE (knownfloatingpointnormalized(normalizenanandzero((stdev#46 / knownfloatingpointnormalized(normalizenanandzero(mean#47))))) > 1.0) END + +(44) Project [codegen id : 3] +Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, CASE WHEN (knownfloatingpointnormalized(normalizenanandzero(mean#47)) = 0.0) THEN null ELSE (stdev#46 / knownfloatingpointnormalized(normalizenanandzero(mean#47))) END AS cov#48] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#46, mean#47] + +(45) BroadcastExchange +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, cov#48] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=3] + +(46) BroadcastHashJoin [codegen id : 4] +Left keys [2]: [i_item_sk#5, w_warehouse_sk#6] +Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] +Join type: Inner +Join condition: None + +(47) CometColumnarExchange +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, cov#48] +Arguments: rangepartitioning(w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#47 ASC NULLS FIRST, cov#48 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=4] + +(48) CometSort +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, cov#48] +Arguments: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, cov#48], [w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#47 ASC NULLS FIRST, cov#48 ASC NULLS FIRST] + +(49) ColumnarToRow [codegen id : 5] +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#47, cov#48] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..d7520db711 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q39b.native_datafusion/simplified.txt @@ -0,0 +1,60 @@ +WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,w_warehouse_sk,i_item_sk,d_moy,mean,cov] + CometColumnarExchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + WholeStageCodegen (4) + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen (1) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + ColumnarToRow + InputAdapter + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [i_item_sk] #3 + CometFilter [i_item_sk] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [d_date_sk,d_moy] #5 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen (2) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + ColumnarToRow + InputAdapter + CometProject [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + CometBroadcastHashJoin [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name,d_date_sk,d_moy] + CometProject [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + ReusedExchange [i_item_sk] #3 + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + CometBroadcastExchange [d_date_sk,d_moy] #8 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q4.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q4.native_datafusion/explain.txt new file mode 100644 index 0000000000..06788d9a9b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q4.native_datafusion/explain.txt @@ -0,0 +1,283 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * Project (50) + +- * BroadcastHashJoin Inner BuildRight (49) + :- * Project (47) + : +- * BroadcastHashJoin Inner BuildRight (46) + : :- * Project (44) + : : +- * BroadcastHashJoin Inner BuildRight (43) + : : :- * Project (39) + : : : +- * BroadcastHashJoin Inner BuildRight (38) + : : : :- * BroadcastHashJoin Inner BuildRight (33) + : : : : :- * Filter (17) + : : : : : +- * HashAggregate (16) + : : : : : +- Exchange (15) + : : : : : +- * HashAggregate (14) + : : : : : +- * ColumnarToRow (13) + : : : : : +- CometProject (12) + : : : : : +- CometBroadcastHashJoin (11) + : : : : : :- CometProject (7) + : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : : : +- CometBroadcastExchange (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : : +- CometBroadcastExchange (10) + : : : : : +- CometFilter (9) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : : : +- BroadcastExchange (32) + : : : : +- * HashAggregate (31) + : : : : +- Exchange (30) + : : : : +- * HashAggregate (29) + : : : : +- * ColumnarToRow (28) + : : : : +- CometProject (27) + : : : : +- CometBroadcastHashJoin (26) + : : : : :- CometProject (22) + : : : : : +- CometBroadcastHashJoin (21) + : : : : : :- CometFilter (19) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (18) + : : : : : +- ReusedExchange (20) + : : : : +- CometBroadcastExchange (25) + : : : : +- CometFilter (24) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (23) + : : : +- BroadcastExchange (37) + : : : +- * Filter (36) + : : : +- * HashAggregate (35) + : : : +- ReusedExchange (34) + : : +- BroadcastExchange (42) + : : +- * HashAggregate (41) + : : +- ReusedExchange (40) + : +- ReusedExchange (45) + +- ReusedExchange (48) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Arguments: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(2) CometFilter +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(4) CometFilter +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Condition : isnotnull(ss_customer_sk#9) + +(5) CometBroadcastExchange +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(6) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Right output [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [c_customer_sk#1], [ss_customer_sk#9], Inner, BuildRight + +(7) CometProject +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15, d_year#16] + +(9) CometFilter +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15, d_year#16] + +(11) CometBroadcastHashJoin +Left output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Right output [2]: [d_date_sk#15, d_year#16] +Arguments: [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + +(12) CometProject +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14, d_date_sk#15, d_year#16] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] + +(13) ColumnarToRow [codegen id : 1] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] + +(14) HashAggregate [codegen id : 1] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16] +Functions [1]: [partial_sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))] +Aggregate Attributes [2]: [sum#17, isEmpty#18] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#19, isEmpty#20] + +(15) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#19, isEmpty#20] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(16) HashAggregate [codegen id : 12] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#19, isEmpty#20] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16] +Functions [1]: [sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))] +Aggregate Attributes [1]: [sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))#21] +Results [2]: [c_customer_id#2 AS customer_id#22, sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))#21 AS year_total#23] + +(17) Filter [codegen id : 12] +Input [2]: [customer_id#22, year_total#23] +Condition : (isnotnull(year_total#23) AND (year_total#23 > 0.000000)) + +(18) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31] +Arguments: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31] + +(19) CometFilter +Input [8]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31] +Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_customer_id#25)) + +(20) ReusedExchange [Reuses operator id: 5] +Output [6]: [ss_customer_sk#32, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] + +(21) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31] +Right output [6]: [ss_customer_sk#32, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] +Arguments: [c_customer_sk#24], [ss_customer_sk#32], Inner, BuildRight + +(22) CometProject +Input [14]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_customer_sk#32, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] +Arguments: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37], [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] + +(23) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#38, d_year#39] +Arguments: [d_date_sk#38, d_year#39] + +(24) CometFilter +Input [2]: [d_date_sk#38, d_year#39] +Condition : ((isnotnull(d_year#39) AND (d_year#39 = 2002)) AND isnotnull(d_date_sk#38)) + +(25) CometBroadcastExchange +Input [2]: [d_date_sk#38, d_year#39] +Arguments: [d_date_sk#38, d_year#39] + +(26) CometBroadcastHashJoin +Left output [12]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] +Right output [2]: [d_date_sk#38, d_year#39] +Arguments: [ss_sold_date_sk#37], [d_date_sk#38], Inner, BuildRight + +(27) CometProject +Input [14]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37, d_date_sk#38, d_year#39] +Arguments: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, d_year#39], [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, d_year#39] + +(28) ColumnarToRow [codegen id : 2] +Input [12]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, d_year#39] + +(29) HashAggregate [codegen id : 2] +Input [12]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, d_year#39] +Keys [8]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39] +Functions [1]: [partial_sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [10]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, sum#42, isEmpty#43] + +(30) Exchange +Input [10]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, sum#42, isEmpty#43] +Arguments: hashpartitioning(c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(31) HashAggregate [codegen id : 3] +Input [10]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, sum#42, isEmpty#43] +Keys [8]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39] +Functions [1]: [sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))] +Aggregate Attributes [1]: [sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))#21] +Results [8]: [c_customer_id#25 AS customer_id#44, c_first_name#26 AS customer_first_name#45, c_last_name#27 AS customer_last_name#46, c_preferred_cust_flag#28 AS customer_preferred_cust_flag#47, c_birth_country#29 AS customer_birth_country#48, c_login#30 AS customer_login#49, c_email_address#31 AS customer_email_address#50, sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))#21 AS year_total#51] + +(32) BroadcastExchange +Input [8]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] + +(33) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#44] +Join type: Inner +Join condition: None + +(34) ReusedExchange [Reuses operator id: 15] +Output [10]: [c_customer_id#52, c_first_name#53, c_last_name#54, c_preferred_cust_flag#55, c_birth_country#56, c_login#57, c_email_address#58, d_year#59, sum#60, isEmpty#61] + +(35) HashAggregate [codegen id : 5] +Input [10]: [c_customer_id#52, c_first_name#53, c_last_name#54, c_preferred_cust_flag#55, c_birth_country#56, c_login#57, c_email_address#58, d_year#59, sum#60, isEmpty#61] +Keys [8]: [c_customer_id#52, c_first_name#53, c_last_name#54, c_preferred_cust_flag#55, c_birth_country#56, c_login#57, c_email_address#58, d_year#59] +Functions [1]: [sum(((((cs_ext_list_price#62 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#64) + cs_ext_sales_price#65) / 2))] +Aggregate Attributes [1]: [sum(((((cs_ext_list_price#62 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#64) + cs_ext_sales_price#65) / 2))#66] +Results [2]: [c_customer_id#52 AS customer_id#67, sum(((((cs_ext_list_price#62 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#64) + cs_ext_sales_price#65) / 2))#66 AS year_total#68] + +(36) Filter [codegen id : 5] +Input [2]: [customer_id#67, year_total#68] +Condition : (isnotnull(year_total#68) AND (year_total#68 > 0.000000)) + +(37) BroadcastExchange +Input [2]: [customer_id#67, year_total#68] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] + +(38) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#67] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 12] +Output [11]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51, year_total#68] +Input [12]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51, customer_id#67, year_total#68] + +(40) ReusedExchange [Reuses operator id: 30] +Output [10]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#76, sum#77, isEmpty#78] + +(41) HashAggregate [codegen id : 7] +Input [10]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#76, sum#77, isEmpty#78] +Keys [8]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#76] +Functions [1]: [sum(((((cs_ext_list_price#79 - cs_ext_wholesale_cost#80) - cs_ext_discount_amt#81) + cs_ext_sales_price#82) / 2))] +Aggregate Attributes [1]: [sum(((((cs_ext_list_price#79 - cs_ext_wholesale_cost#80) - cs_ext_discount_amt#81) + cs_ext_sales_price#82) / 2))#66] +Results [2]: [c_customer_id#69 AS customer_id#83, sum(((((cs_ext_list_price#79 - cs_ext_wholesale_cost#80) - cs_ext_discount_amt#81) + cs_ext_sales_price#82) / 2))#66 AS year_total#84] + +(42) BroadcastExchange +Input [2]: [customer_id#83, year_total#84] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5] + +(43) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#83] +Join type: Inner +Join condition: (CASE WHEN (year_total#68 > 0.000000) THEN (year_total#84 / year_total#68) END > CASE WHEN (year_total#23 > 0.000000) THEN (year_total#51 / year_total#23) END) + +(44) Project [codegen id : 12] +Output [10]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#68, year_total#84] +Input [13]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51, year_total#68, customer_id#83, year_total#84] + +(45) ReusedExchange [Reuses operator id: 37] +Output [2]: [customer_id#85, year_total#86] + +(46) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#85] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 12] +Output [11]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#68, year_total#84, year_total#86] +Input [12]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#68, year_total#84, customer_id#85, year_total#86] + +(48) ReusedExchange [Reuses operator id: 42] +Output [2]: [customer_id#87, year_total#88] + +(49) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#87] +Join type: Inner +Join condition: (CASE WHEN (year_total#68 > 0.000000) THEN (year_total#84 / year_total#68) END > CASE WHEN (year_total#86 > 0.000000) THEN (year_total#88 / year_total#86) END) + +(50) Project [codegen id : 12] +Output [7]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50] +Input [13]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#68, year_total#84, year_total#86, customer_id#87, year_total#88] + +(51) TakeOrderedAndProject +Input [7]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50] +Arguments: 100, [customer_id#44 ASC NULLS FIRST, customer_first_name#45 ASC NULLS FIRST, customer_last_name#46 ASC NULLS FIRST, customer_preferred_cust_flag#47 ASC NULLS FIRST, customer_birth_country#48 ASC NULLS FIRST, customer_login#49 ASC NULLS FIRST, customer_email_address#50 ASC NULLS FIRST], [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q4.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q4.native_datafusion/simplified.txt new file mode 100644 index 0000000000..12ceb852df --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q4.native_datafusion/simplified.txt @@ -0,0 +1,68 @@ +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + WholeStageCodegen (12) + Project [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + Project [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #1 + WholeStageCodegen (1) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2)),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #5 + WholeStageCodegen (2) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] #1 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] #5 + InputAdapter + ReusedExchange [customer_id,year_total] #7 + InputAdapter + ReusedExchange [customer_id,year_total] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q40.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q40.native_datafusion/explain.txt new file mode 100644 index 0000000000..1cad180942 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q40.native_datafusion/explain.txt @@ -0,0 +1,172 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * ColumnarToRow (28) + +- CometProject (27) + +- CometBroadcastHashJoin (26) + :- CometProject (22) + : +- CometBroadcastHashJoin (21) + : :- CometProject (16) + : : +- CometBroadcastHashJoin (15) + : : :- CometProject (11) + : : : +- CometSortMergeJoin (10) + : : : :- CometSort (4) + : : : : +- CometExchange (3) + : : : : +- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : +- CometSort (9) + : : : +- CometExchange (8) + : : : +- CometProject (7) + : : : +- CometFilter (6) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (5) + : : +- CometBroadcastExchange (14) + : : +- CometFilter (13) + : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (12) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (17) + +- CometBroadcastExchange (25) + +- CometFilter (24) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (23) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] + +(2) CometFilter +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Condition : (isnotnull(cs_warehouse_sk#1) AND isnotnull(cs_item_sk#2)) + +(3) CometExchange +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5], [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST] + +(5) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Arguments: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] + +(6) CometFilter +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Condition : (isnotnull(cr_order_number#7) AND isnotnull(cr_item_sk#6)) + +(7) CometProject +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Arguments: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8], [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] + +(8) CometExchange +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: hashpartitioning(cr_order_number#7, cr_item_sk#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8], [cr_order_number#7 ASC NULLS FIRST, cr_item_sk#6 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Right output [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cs_order_number#3, cs_item_sk#2], [cr_order_number#7, cr_item_sk#6], LeftOuter + +(11) CometProject +Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8], [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8] + +(12) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#10, w_state#11] +Arguments: [w_warehouse_sk#10, w_state#11] + +(13) CometFilter +Input [2]: [w_warehouse_sk#10, w_state#11] +Condition : isnotnull(w_warehouse_sk#10) + +(14) CometBroadcastExchange +Input [2]: [w_warehouse_sk#10, w_state#11] +Arguments: [w_warehouse_sk#10, w_state#11] + +(15) CometBroadcastHashJoin +Left output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8] +Right output [2]: [w_warehouse_sk#10, w_state#11] +Arguments: [cs_warehouse_sk#1], [w_warehouse_sk#10], Inner, BuildRight + +(16) CometProject +Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_warehouse_sk#10, w_state#11] +Arguments: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11], [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11] + +(17) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Arguments: [i_item_sk#12, i_item_id#13, i_current_price#14] + +(18) CometFilter +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Condition : (((isnotnull(i_current_price#14) AND (i_current_price#14 >= 0.99)) AND (i_current_price#14 <= 1.49)) AND isnotnull(i_item_sk#12)) + +(19) CometProject +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Arguments: [i_item_sk#12, i_item_id#13], [i_item_sk#12, i_item_id#13] + +(20) CometBroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: [i_item_sk#12, i_item_id#13] + +(21) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11] +Right output [2]: [i_item_sk#12, i_item_id#13] +Arguments: [cs_item_sk#2], [i_item_sk#12], Inner, BuildRight + +(22) CometProject +Input [7]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_sk#12, i_item_id#13] +Arguments: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13], [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13] + +(23) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#15, d_date#16] +Arguments: [d_date_sk#15, d_date#16] + +(24) CometFilter +Input [2]: [d_date_sk#15, d_date#16] +Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 2000-02-10)) AND (d_date#16 <= 2000-04-10)) AND isnotnull(d_date_sk#15)) + +(25) CometBroadcastExchange +Input [2]: [d_date_sk#15, d_date#16] +Arguments: [d_date_sk#15, d_date#16] + +(26) CometBroadcastHashJoin +Left output [5]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13] +Right output [2]: [d_date_sk#15, d_date#16] +Arguments: [cs_sold_date_sk#5], [d_date_sk#15], Inner, BuildRight + +(27) CometProject +Input [7]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date_sk#15, d_date#16] +Arguments: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16], [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] + +(28) ColumnarToRow [codegen id : 1] +Input [5]: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] + +(29) HashAggregate [codegen id : 1] +Input [5]: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] +Keys [2]: [w_state#11, i_item_id#13] +Functions [2]: [partial_sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)] +Aggregate Attributes [4]: [sum#17, isEmpty#18, sum#19, isEmpty#20] +Results [6]: [w_state#11, i_item_id#13, sum#21, isEmpty#22, sum#23, isEmpty#24] + +(30) Exchange +Input [6]: [w_state#11, i_item_id#13, sum#21, isEmpty#22, sum#23, isEmpty#24] +Arguments: hashpartitioning(w_state#11, i_item_id#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(31) HashAggregate [codegen id : 2] +Input [6]: [w_state#11, i_item_id#13, sum#21, isEmpty#22, sum#23, isEmpty#24] +Keys [2]: [w_state#11, i_item_id#13] +Functions [2]: [sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END), sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#25, sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#26] +Results [4]: [w_state#11, i_item_id#13, sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#25 AS sales_before#27, sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#26 AS sales_after#28] + +(32) TakeOrderedAndProject +Input [4]: [w_state#11, i_item_id#13, sales_before#27, sales_after#28] +Arguments: 100, [w_state#11 ASC NULLS FIRST, i_item_id#13 ASC NULLS FIRST], [w_state#11, i_item_id#13, sales_before#27, sales_after#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q40.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q40.native_datafusion/simplified.txt new file mode 100644 index 0000000000..3d73ff4008 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q40.native_datafusion/simplified.txt @@ -0,0 +1,36 @@ +TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] + WholeStageCodegen (2) + HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_state,i_item_id] #1 + WholeStageCodegen (1) + HashAggregate [w_state,i_item_id,d_date,cs_sales_price,cr_refunded_cash] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [cs_sales_price,cr_refunded_cash,w_state,i_item_id,d_date] + CometBroadcastHashJoin [cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_id,d_date_sk,d_date] + CometProject [cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state] + CometBroadcastHashJoin [cs_warehouse_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_warehouse_sk,w_state] + CometProject [cs_warehouse_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash] + CometSortMergeJoin [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number,cr_refunded_cash] + CometSort [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + CometExchange [cs_order_number,cs_item_sk] #2 + CometFilter [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_refunded_cash] + CometExchange [cr_order_number,cr_item_sk] #3 + CometProject [cr_item_sk,cr_order_number,cr_refunded_cash] + CometFilter [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_state] #4 + CometFilter [w_warehouse_sk,w_state] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_state] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometProject [i_item_sk,i_item_id] + CometFilter [i_item_sk,i_item_id,i_current_price] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_current_price] + CometBroadcastExchange [d_date_sk,d_date] #6 + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q41.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q41.native_datafusion/explain.txt new file mode 100644 index 0000000000..e1a62cf025 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q41.native_datafusion/explain.txt @@ -0,0 +1,102 @@ +== Physical Plan == +* ColumnarToRow (19) ++- CometTakeOrderedAndProject (18) + +- CometHashAggregate (17) + +- CometExchange (16) + +- CometHashAggregate (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (3) + : +- CometFilter (2) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometHashAggregate (9) + +- CometExchange (8) + +- CometHashAggregate (7) + +- CometProject (6) + +- CometFilter (5) + +- CometNativeScan: `spark_catalog`.`default`.`item` (4) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Arguments: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(2) CometFilter +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_manufact_id#1 <= 778)) AND isnotnull(i_manufact#2)) + +(3) CometProject +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Arguments: [i_manufact#2, i_product_name#3], [i_manufact#2, i_product_name#3] + +(4) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Arguments: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] + +(5) CometFilter +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Condition : (((((i_category#4 = Women ) AND (((((i_color#7 = powder ) OR (i_color#7 = khaki )) AND ((i_units#8 = Ounce ) OR (i_units#8 = Oz ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))) OR ((((i_color#7 = brown ) OR (i_color#7 = honeydew )) AND ((i_units#8 = Bunch ) OR (i_units#8 = Ton ))) AND ((i_size#6 = N/A ) OR (i_size#6 = small ))))) OR ((i_category#4 = Men ) AND (((((i_color#7 = floral ) OR (i_color#7 = deep )) AND ((i_units#8 = N/A ) OR (i_units#8 = Dozen ))) AND ((i_size#6 = petite ) OR (i_size#6 = large ))) OR ((((i_color#7 = light ) OR (i_color#7 = cornflower )) AND ((i_units#8 = Box ) OR (i_units#8 = Pound ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large )))))) OR (((i_category#4 = Women ) AND (((((i_color#7 = midnight ) OR (i_color#7 = snow )) AND ((i_units#8 = Pallet ) OR (i_units#8 = Gross ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))) OR ((((i_color#7 = cyan ) OR (i_color#7 = papaya )) AND ((i_units#8 = Cup ) OR (i_units#8 = Dram ))) AND ((i_size#6 = N/A ) OR (i_size#6 = small ))))) OR ((i_category#4 = Men ) AND (((((i_color#7 = orange ) OR (i_color#7 = frosted )) AND ((i_units#8 = Each ) OR (i_units#8 = Tbl ))) AND ((i_size#6 = petite ) OR (i_size#6 = large ))) OR ((((i_color#7 = forest ) OR (i_color#7 = ghost )) AND ((i_units#8 = Lb ) OR (i_units#8 = Bundle ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))))))) AND isnotnull(i_manufact#5)) + +(6) CometProject +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Arguments: [i_manufact#5], [i_manufact#5] + +(7) CometHashAggregate +Input [1]: [i_manufact#5] +Keys [1]: [i_manufact#5] +Functions [1]: [partial_count(1)] + +(8) CometExchange +Input [2]: [i_manufact#5, count#9] +Arguments: hashpartitioning(i_manufact#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(9) CometHashAggregate +Input [2]: [i_manufact#5, count#9] +Keys [1]: [i_manufact#5] +Functions [1]: [count(1)] + +(10) CometFilter +Input [2]: [item_cnt#10, i_manufact#5] +Condition : (item_cnt#10 > 0) + +(11) CometProject +Input [2]: [item_cnt#10, i_manufact#5] +Arguments: [i_manufact#5], [i_manufact#5] + +(12) CometBroadcastExchange +Input [1]: [i_manufact#5] +Arguments: [i_manufact#5] + +(13) CometBroadcastHashJoin +Left output [2]: [i_manufact#2, i_product_name#3] +Right output [1]: [i_manufact#5] +Arguments: [i_manufact#2], [i_manufact#5], Inner, BuildRight + +(14) CometProject +Input [3]: [i_manufact#2, i_product_name#3, i_manufact#5] +Arguments: [i_product_name#3], [i_product_name#3] + +(15) CometHashAggregate +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] + +(16) CometExchange +Input [1]: [i_product_name#3] +Arguments: hashpartitioning(i_product_name#3, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(17) CometHashAggregate +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] + +(18) CometTakeOrderedAndProject +Input [1]: [i_product_name#3] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_product_name#3 ASC NULLS FIRST], output=[i_product_name#3]), [i_product_name#3], 100, [i_product_name#3 ASC NULLS FIRST], [i_product_name#3] + +(19) ColumnarToRow [codegen id : 1] +Input [1]: [i_product_name#3] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q41.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q41.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ea03f38e2c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q41.native_datafusion/simplified.txt @@ -0,0 +1,21 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_product_name] + CometHashAggregate [i_product_name] + CometExchange [i_product_name] #1 + CometHashAggregate [i_product_name] + CometProject [i_product_name] + CometBroadcastHashJoin [i_manufact,i_product_name,i_manufact] + CometProject [i_manufact,i_product_name] + CometFilter [i_manufact_id,i_manufact,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_manufact_id,i_manufact,i_product_name] + CometBroadcastExchange [i_manufact] #2 + CometProject [i_manufact] + CometFilter [item_cnt,i_manufact] + CometHashAggregate [item_cnt,i_manufact,count,count(1)] + CometExchange [i_manufact] #3 + CometHashAggregate [i_manufact,count] + CometProject [i_manufact] + CometFilter [i_category,i_manufact,i_size,i_color,i_units] + CometNativeScan: `spark_catalog`.`default`.`item` [i_category,i_manufact,i_size,i_color,i_units] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q42.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q42.native_datafusion/explain.txt new file mode 100644 index 0000000000..63c16c9fbf --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q42.native_datafusion/explain.txt @@ -0,0 +1,105 @@ +== Physical Plan == +TakeOrderedAndProject (19) ++- * HashAggregate (18) + +- Exchange (17) + +- * HashAggregate (16) + +- * ColumnarToRow (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2, d_moy#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2], [d_date_sk#1, d_year#2] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_year#2] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5], [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] + +(10) CometFilter +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_category_id#8, i_category#9], [i_item_sk#7, i_category_id#8, i_category#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Arguments: [i_item_sk#7, i_category_id#8, i_category#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_category_id#8, i_category#9] +Arguments: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9], [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] + +(15) ColumnarToRow [codegen id : 1] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] + +(16) HashAggregate [codegen id : 1] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] + +(17) Exchange +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] +Arguments: hashpartitioning(d_year#2, i_category_id#8, i_category#9, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(18) HashAggregate [codegen id : 2] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_category_id#8, i_category#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS sum(ss_ext_sales_price)#14] + +(19) TakeOrderedAndProject +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#14] +Arguments: 100, [sum(ss_ext_sales_price)#14 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST, i_category#9 ASC NULLS FIRST], [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q42.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q42.native_datafusion/simplified.txt new file mode 100644 index 0000000000..001869f2a7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q42.native_datafusion/simplified.txt @@ -0,0 +1,23 @@ +TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category] + WholeStageCodegen (2) + HashAggregate [d_year,i_category_id,i_category,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] + InputAdapter + Exchange [d_year,i_category_id,i_category] #1 + WholeStageCodegen (1) + HashAggregate [d_year,i_category_id,i_category,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [d_year,ss_ext_sales_price,i_category_id,i_category] + CometBroadcastHashJoin [d_year,ss_item_sk,ss_ext_sales_price,i_item_sk,i_category_id,i_category] + CometProject [d_year,ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,d_year,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_year] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_category_id,i_category] #3 + CometProject [i_item_sk,i_category_id,i_category] + CometFilter [i_item_sk,i_category_id,i_category,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_category_id,i_category,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q43.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q43.native_datafusion/explain.txt new file mode 100644 index 0000000000..1c898bf879 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q43.native_datafusion/explain.txt @@ -0,0 +1,105 @@ +== Physical Plan == +TakeOrderedAndProject (19) ++- * HashAggregate (18) + +- Exchange (17) + +- * HashAggregate (16) + +- * ColumnarToRow (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Arguments: [d_date_sk#1, d_year#2, d_day_name#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Arguments: [d_date_sk#1, d_day_name#3], [d_date_sk#1, d_day_name#3] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_store_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_day_name#3] +Right output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_day_name#3, ss_store_sk#4, ss_sales_price#5], [d_day_name#3, ss_store_sk#4, ss_sales_price#5] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Arguments: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] + +(10) CometFilter +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Condition : ((isnotnull(s_gmt_offset#10) AND (s_gmt_offset#10 = -5.00)) AND isnotnull(s_store_sk#7)) + +(11) CometProject +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Arguments: [s_store_sk#7, s_store_id#8, s_store_name#9], [s_store_sk#7, s_store_id#8, s_store_name#9] + +(12) CometBroadcastExchange +Input [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: [s_store_sk#7, s_store_id#8, s_store_name#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5] +Right output [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: [ss_store_sk#4], [s_store_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9], [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] + +(15) ColumnarToRow [codegen id : 1] +Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] + +(16) HashAggregate [codegen id : 1] +Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Keys [2]: [s_store_name#9, s_store_id#8] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] +Aggregate Attributes [7]: [sum#11, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17] +Results [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] + +(17) Exchange +Input [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(s_store_name#9, s_store_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(18) HashAggregate [codegen id : 2] +Input [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [2]: [s_store_name#9, s_store_id#8] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#31] +Results [9]: [s_store_name#9, s_store_id#8, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#25,17,2) AS sun_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#26,17,2) AS mon_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#27,17,2) AS tue_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#28,17,2) AS wed_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#29,17,2) AS thu_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#30,17,2) AS fri_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#31,17,2) AS sat_sales#38] + +(19) TakeOrderedAndProject +Input [9]: [s_store_name#9, s_store_id#8, sun_sales#32, mon_sales#33, tue_sales#34, wed_sales#35, thu_sales#36, fri_sales#37, sat_sales#38] +Arguments: 100, [s_store_name#9 ASC NULLS FIRST, s_store_id#8 ASC NULLS FIRST, sun_sales#32 ASC NULLS FIRST, mon_sales#33 ASC NULLS FIRST, tue_sales#34 ASC NULLS FIRST, wed_sales#35 ASC NULLS FIRST, thu_sales#36 ASC NULLS FIRST, fri_sales#37 ASC NULLS FIRST, sat_sales#38 ASC NULLS FIRST], [s_store_name#9, s_store_id#8, sun_sales#32, mon_sales#33, tue_sales#34, wed_sales#35, thu_sales#36, fri_sales#37, sat_sales#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q43.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q43.native_datafusion/simplified.txt new file mode 100644 index 0000000000..151a17bda1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q43.native_datafusion/simplified.txt @@ -0,0 +1,23 @@ +TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + WholeStageCodegen (2) + HashAggregate [s_store_name,s_store_id,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_name,s_store_id] #1 + WholeStageCodegen (1) + HashAggregate [s_store_name,s_store_id,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [d_day_name,ss_sales_price,s_store_id,s_store_name] + CometBroadcastHashJoin [d_day_name,ss_store_sk,ss_sales_price,s_store_sk,s_store_id,s_store_name] + CometProject [d_day_name,ss_store_sk,ss_sales_price] + CometBroadcastHashJoin [d_date_sk,d_day_name,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_day_name] + CometFilter [d_date_sk,d_year,d_day_name] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_day_name] + CometBroadcastExchange [ss_store_sk,ss_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk,s_store_id,s_store_name] #3 + CometProject [s_store_sk,s_store_id,s_store_name] + CometFilter [s_store_sk,s_store_id,s_store_name,s_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id,s_store_name,s_gmt_offset] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q44.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q44.native_datafusion/explain.txt new file mode 100644 index 0000000000..8c3051bc5d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q44.native_datafusion/explain.txt @@ -0,0 +1,277 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (33) + : : +- * SortMergeJoin Inner (32) + : : :- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * Filter (15) + : : : +- Window (14) + : : : +- WindowGroupLimit (13) + : : : +- * Sort (12) + : : : +- Exchange (11) + : : : +- WindowGroupLimit (10) + : : : +- * Sort (9) + : : : +- * Filter (8) + : : : +- * HashAggregate (7) + : : : +- Exchange (6) + : : : +- * HashAggregate (5) + : : : +- * ColumnarToRow (4) + : : : +- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- * Sort (31) + : : +- Exchange (30) + : : +- * Project (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- WindowGroupLimit (26) + : : +- * Sort (25) + : : +- Exchange (24) + : : +- WindowGroupLimit (23) + : : +- * Sort (22) + : : +- * Filter (21) + : : +- * HashAggregate (20) + : : +- ReusedExchange (19) + : +- BroadcastExchange (37) + : +- * ColumnarToRow (36) + : +- CometFilter (35) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (34) + +- ReusedExchange (40) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) + +(3) CometProject +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_net_profit#3], [ss_item_sk#1, ss_net_profit#3] + +(4) ColumnarToRow [codegen id : 1] +Input [2]: [ss_item_sk#1, ss_net_profit#3] + +(5) HashAggregate [codegen id : 1] +Input [2]: [ss_item_sk#1, ss_net_profit#3] +Keys [1]: [ss_item_sk#1] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#5, count#6] +Results [3]: [ss_item_sk#1, sum#7, count#8] + +(6) Exchange +Input [3]: [ss_item_sk#1, sum#7, count#8] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(7) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#1, sum#7, count#8] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#9] +Results [2]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS rank_col#11] + +(8) Filter [codegen id : 2] +Input [2]: [item_sk#10, rank_col#11] +Condition : (isnotnull(rank_col#11) AND (cast(rank_col#11 as decimal(13,7)) > (0.9 * Subquery scalar-subquery#12, [id=#13]))) + +(9) Sort [codegen id : 2] +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], false, 0 + +(10) WindowGroupLimit +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], rank(rank_col#11), 10, Partial + +(11) Exchange +Input [2]: [item_sk#10, rank_col#11] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 3] +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], false, 0 + +(13) WindowGroupLimit +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], rank(rank_col#11), 10, Final + +(14) Window +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#14], [rank_col#11 ASC NULLS FIRST] + +(15) Filter [codegen id : 4] +Input [3]: [item_sk#10, rank_col#11, rnk#14] +Condition : ((rnk#14 < 11) AND isnotnull(item_sk#10)) + +(16) Project [codegen id : 4] +Output [2]: [item_sk#10, rnk#14] +Input [3]: [item_sk#10, rank_col#11, rnk#14] + +(17) Exchange +Input [2]: [item_sk#10, rnk#14] +Arguments: hashpartitioning(rnk#14, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 5] +Input [2]: [item_sk#10, rnk#14] +Arguments: [rnk#14 ASC NULLS FIRST], false, 0 + +(19) ReusedExchange [Reuses operator id: 6] +Output [3]: [ss_item_sk#15, sum#16, count#17] + +(20) HashAggregate [codegen id : 7] +Input [3]: [ss_item_sk#15, sum#16, count#17] +Keys [1]: [ss_item_sk#15] +Functions [1]: [avg(UnscaledValue(ss_net_profit#18))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#18))#19] +Results [2]: [ss_item_sk#15 AS item_sk#20, cast((avg(UnscaledValue(ss_net_profit#18))#19 / 100.0) as decimal(11,6)) AS rank_col#21] + +(21) Filter [codegen id : 7] +Input [2]: [item_sk#20, rank_col#21] +Condition : (isnotnull(rank_col#21) AND (cast(rank_col#21 as decimal(13,7)) > (0.9 * ReusedSubquery Subquery scalar-subquery#12, [id=#13]))) + +(22) Sort [codegen id : 7] +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank_col#21 DESC NULLS LAST], false, 0 + +(23) WindowGroupLimit +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank_col#21 DESC NULLS LAST], rank(rank_col#21), 10, Partial + +(24) Exchange +Input [2]: [item_sk#20, rank_col#21] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(25) Sort [codegen id : 8] +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank_col#21 DESC NULLS LAST], false, 0 + +(26) WindowGroupLimit +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank_col#21 DESC NULLS LAST], rank(rank_col#21), 10, Final + +(27) Window +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank(rank_col#21) windowspecdefinition(rank_col#21 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#22], [rank_col#21 DESC NULLS LAST] + +(28) Filter [codegen id : 9] +Input [3]: [item_sk#20, rank_col#21, rnk#22] +Condition : ((rnk#22 < 11) AND isnotnull(item_sk#20)) + +(29) Project [codegen id : 9] +Output [2]: [item_sk#20, rnk#22] +Input [3]: [item_sk#20, rank_col#21, rnk#22] + +(30) Exchange +Input [2]: [item_sk#20, rnk#22] +Arguments: hashpartitioning(rnk#22, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(31) Sort [codegen id : 10] +Input [2]: [item_sk#20, rnk#22] +Arguments: [rnk#22 ASC NULLS FIRST], false, 0 + +(32) SortMergeJoin [codegen id : 13] +Left keys [1]: [rnk#14] +Right keys [1]: [rnk#22] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 13] +Output [3]: [item_sk#10, rnk#14, item_sk#20] +Input [4]: [item_sk#10, rnk#14, item_sk#20, rnk#22] + +(34) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#23, i_product_name#24] +Arguments: [i_item_sk#23, i_product_name#24] + +(35) CometFilter +Input [2]: [i_item_sk#23, i_product_name#24] +Condition : isnotnull(i_item_sk#23) + +(36) ColumnarToRow [codegen id : 11] +Input [2]: [i_item_sk#23, i_product_name#24] + +(37) BroadcastExchange +Input [2]: [i_item_sk#23, i_product_name#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [item_sk#10] +Right keys [1]: [i_item_sk#23] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 13] +Output [3]: [rnk#14, item_sk#20, i_product_name#24] +Input [5]: [item_sk#10, rnk#14, item_sk#20, i_item_sk#23, i_product_name#24] + +(40) ReusedExchange [Reuses operator id: 37] +Output [2]: [i_item_sk#25, i_product_name#26] + +(41) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [item_sk#20] +Right keys [1]: [i_item_sk#25] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 13] +Output [3]: [rnk#14, i_product_name#24 AS best_performing#27, i_product_name#26 AS worst_performing#28] +Input [5]: [rnk#14, item_sk#20, i_product_name#24, i_item_sk#25, i_product_name#26] + +(43) TakeOrderedAndProject +Input [3]: [rnk#14, best_performing#27, worst_performing#28] +Arguments: 100, [rnk#14 ASC NULLS FIRST], [rnk#14, best_performing#27, worst_performing#28] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#12, [id=#13] +* HashAggregate (50) ++- Exchange (49) + +- * HashAggregate (48) + +- * ColumnarToRow (47) + +- CometProject (46) + +- CometFilter (45) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (44) + + +(44) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_addr_sk#29, ss_store_sk#30, ss_net_profit#31, ss_sold_date_sk#32] +Arguments: [ss_addr_sk#29, ss_store_sk#30, ss_net_profit#31, ss_sold_date_sk#32] + +(45) CometFilter +Input [4]: [ss_addr_sk#29, ss_store_sk#30, ss_net_profit#31, ss_sold_date_sk#32] +Condition : ((isnotnull(ss_store_sk#30) AND (ss_store_sk#30 = 4)) AND isnull(ss_addr_sk#29)) + +(46) CometProject +Input [4]: [ss_addr_sk#29, ss_store_sk#30, ss_net_profit#31, ss_sold_date_sk#32] +Arguments: [ss_store_sk#30, ss_net_profit#31], [ss_store_sk#30, ss_net_profit#31] + +(47) ColumnarToRow [codegen id : 1] +Input [2]: [ss_store_sk#30, ss_net_profit#31] + +(48) HashAggregate [codegen id : 1] +Input [2]: [ss_store_sk#30, ss_net_profit#31] +Keys [1]: [ss_store_sk#30] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#31))] +Aggregate Attributes [2]: [sum#33, count#34] +Results [3]: [ss_store_sk#30, sum#35, count#36] + +(49) Exchange +Input [3]: [ss_store_sk#30, sum#35, count#36] +Arguments: hashpartitioning(ss_store_sk#30, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(50) HashAggregate [codegen id : 2] +Input [3]: [ss_store_sk#30, sum#35, count#36] +Keys [1]: [ss_store_sk#30] +Functions [1]: [avg(UnscaledValue(ss_net_profit#31))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#31))#37] +Results [1]: [cast((avg(UnscaledValue(ss_net_profit#31))#37 / 100.0) as decimal(11,6)) AS rank_col#38] + +Subquery:2 Hosting operator id = 21 Hosting Expression = ReusedSubquery Subquery scalar-subquery#12, [id=#13] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q44.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q44.native_datafusion/simplified.txt new file mode 100644 index 0000000000..de76347fe1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q44.native_datafusion/simplified.txt @@ -0,0 +1,81 @@ +TakeOrderedAndProject [rnk,best_performing,worst_performing] + WholeStageCodegen (13) + Project [rnk,i_product_name,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [rnk,item_sk,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [item_sk,rnk,item_sk] + SortMergeJoin [rnk,rnk] + InputAdapter + WholeStageCodegen (5) + Sort [rnk] + InputAdapter + Exchange [rnk] #1 + WholeStageCodegen (4) + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WindowGroupLimit [rank_col] + WholeStageCodegen (3) + Sort [rank_col] + InputAdapter + Exchange #2 + WindowGroupLimit [rank_col] + WholeStageCodegen (2) + Sort [rank_col] + Filter [rank_col] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [ss_store_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] + InputAdapter + Exchange [ss_store_sk] #4 + WholeStageCodegen (1) + HashAggregate [ss_store_sk,ss_net_profit] [sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [ss_store_sk,ss_net_profit] + CometFilter [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + HashAggregate [ss_item_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),item_sk,rank_col,sum,count] + InputAdapter + Exchange [ss_item_sk] #3 + WholeStageCodegen (1) + HashAggregate [ss_item_sk,ss_net_profit] [sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [ss_item_sk,ss_net_profit] + CometFilter [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (10) + Sort [rnk] + InputAdapter + Exchange [rnk] #5 + WholeStageCodegen (9) + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WindowGroupLimit [rank_col] + WholeStageCodegen (8) + Sort [rank_col] + InputAdapter + Exchange #6 + WindowGroupLimit [rank_col] + WholeStageCodegen (7) + Sort [rank_col] + Filter [rank_col] + ReusedSubquery [rank_col] #1 + HashAggregate [ss_item_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),item_sk,rank_col,sum,count] + InputAdapter + ReusedExchange [ss_item_sk,sum,count] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + ColumnarToRow + InputAdapter + CometFilter [i_item_sk,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_product_name] + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q45.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q45.native_datafusion/explain.txt new file mode 100644 index 0000000000..acee604c97 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q45.native_datafusion/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * Filter (31) + +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (30) + :- * ColumnarToRow (24) + : +- CometProject (23) + : +- CometBroadcastHashJoin (22) + : :- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (8) + : : +- CometBroadcastExchange (16) + : : +- CometProject (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : +- CometBroadcastExchange (21) + : +- CometFilter (20) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (19) + +- BroadcastExchange (29) + +- * ColumnarToRow (28) + +- CometProject (27) + +- CometFilter (26) + +- CometNativeScan: `spark_catalog`.`default`.`item` (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Arguments: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] + +(2) CometFilter +Input [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Condition : (isnotnull(ws_bill_customer_sk#3) AND isnotnull(ws_item_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: [c_customer_sk#6, c_current_addr_sk#7] + +(4) CometFilter +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Condition : (isnotnull(c_customer_sk#6) AND isnotnull(c_current_addr_sk#7)) + +(5) CometBroadcastExchange +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: [c_customer_sk#6, c_current_addr_sk#7] + +(6) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Right output [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: [ws_bill_customer_sk#3], [c_customer_sk#6], Inner, BuildRight + +(7) CometProject +Input [6]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5, c_customer_sk#6, c_current_addr_sk#7] +Arguments: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7], [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [ca_address_sk#8, ca_city#9, ca_zip#10] + +(9) CometFilter +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Condition : isnotnull(ca_address_sk#8) + +(10) CometBroadcastExchange +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [ca_address_sk#8, ca_city#9, ca_zip#10] + +(11) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7] +Right output [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + +(12) CometProject +Input [7]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7, ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10], [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Arguments: [d_date_sk#11, d_year#12, d_qoy#13] + +(14) CometFilter +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Condition : ((((isnotnull(d_qoy#13) AND isnotnull(d_year#12)) AND (d_qoy#13 = 2)) AND (d_year#12 = 2001)) AND isnotnull(d_date_sk#11)) + +(15) CometProject +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Arguments: [d_date_sk#11], [d_date_sk#11] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: [d_date_sk#11] + +(17) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10] +Right output [1]: [d_date_sk#11] +Arguments: [ws_sold_date_sk#5], [d_date_sk#11], Inner, BuildRight + +(18) CometProject +Input [6]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10, d_date_sk#11] +Arguments: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10], [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10] + +(19) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#14, i_item_id#15] +Arguments: [i_item_sk#14, i_item_id#15] + +(20) CometFilter +Input [2]: [i_item_sk#14, i_item_id#15] +Condition : isnotnull(i_item_sk#14) + +(21) CometBroadcastExchange +Input [2]: [i_item_sk#14, i_item_id#15] +Arguments: [i_item_sk#14, i_item_id#15] + +(22) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10] +Right output [2]: [i_item_sk#14, i_item_id#15] +Arguments: [ws_item_sk#2], [i_item_sk#14], Inner, BuildRight + +(23) CometProject +Input [6]: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10, i_item_sk#14, i_item_id#15] +Arguments: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15], [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15] + +(24) ColumnarToRow [codegen id : 2] +Input [4]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15] + +(25) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#16, i_item_id#17] +Arguments: [i_item_sk#16, i_item_id#17] + +(26) CometFilter +Input [2]: [i_item_sk#16, i_item_id#17] +Condition : i_item_sk#16 IN (2,3,5,7,11,13,17,19,23,29) + +(27) CometProject +Input [2]: [i_item_sk#16, i_item_id#17] +Arguments: [i_item_id#17], [i_item_id#17] + +(28) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_id#17] + +(29) BroadcastExchange +Input [1]: [i_item_id#17] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=1] + +(30) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [i_item_id#15] +Right keys [1]: [i_item_id#17] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(31) Filter [codegen id : 2] +Input [5]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15, exists#1] +Condition : (substr(ca_zip#10, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) + +(32) Project [codegen id : 2] +Output [3]: [ws_sales_price#4, ca_city#9, ca_zip#10] +Input [5]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15, exists#1] + +(33) HashAggregate [codegen id : 2] +Input [3]: [ws_sales_price#4, ca_city#9, ca_zip#10] +Keys [2]: [ca_zip#10, ca_city#9] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [ca_zip#10, ca_city#9, sum#19] + +(34) Exchange +Input [3]: [ca_zip#10, ca_city#9, sum#19] +Arguments: hashpartitioning(ca_zip#10, ca_city#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(35) HashAggregate [codegen id : 3] +Input [3]: [ca_zip#10, ca_city#9, sum#19] +Keys [2]: [ca_zip#10, ca_city#9] +Functions [1]: [sum(UnscaledValue(ws_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#4))#20] +Results [3]: [ca_zip#10, ca_city#9, MakeDecimal(sum(UnscaledValue(ws_sales_price#4))#20,17,2) AS sum(ws_sales_price)#21] + +(36) TakeOrderedAndProject +Input [3]: [ca_zip#10, ca_city#9, sum(ws_sales_price)#21] +Arguments: 100, [ca_zip#10 ASC NULLS FIRST, ca_city#9 ASC NULLS FIRST], [ca_zip#10, ca_city#9, sum(ws_sales_price)#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q45.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q45.native_datafusion/simplified.txt new file mode 100644 index 0000000000..68d48dbfa2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q45.native_datafusion/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] + WholeStageCodegen (3) + HashAggregate [ca_zip,ca_city,sum] [sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price),sum] + InputAdapter + Exchange [ca_zip,ca_city] #1 + WholeStageCodegen (2) + HashAggregate [ca_zip,ca_city,ws_sales_price] [sum,sum] + Project [ws_sales_price,ca_city,ca_zip] + Filter [ca_zip,exists] + BroadcastHashJoin [i_item_id,i_item_id] + ColumnarToRow + InputAdapter + CometProject [ws_sales_price,ca_city,ca_zip,i_item_id] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ca_city,ca_zip,i_item_sk,i_item_id] + CometProject [ws_item_sk,ws_sales_price,ca_city,ca_zip] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,ca_city,ca_zip,d_date_sk] + CometProject [ws_item_sk,ws_sales_price,ws_sold_date_sk,ca_city,ca_zip] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,c_current_addr_sk,ca_address_sk,ca_city,ca_zip] + CometProject [ws_item_sk,ws_sales_price,ws_sold_date_sk,c_current_addr_sk] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk,c_customer_sk,c_current_addr_sk] + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #2 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk,ca_city,ca_zip] #3 + CometFilter [ca_address_sk,ca_city,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_city,ca_zip] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [i_item_id] + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q46.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q46.native_datafusion/explain.txt new file mode 100644 index 0000000000..c287a20249 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q46.native_datafusion/explain.txt @@ -0,0 +1,224 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * HashAggregate (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- * ColumnarToRow (26) + : : +- CometProject (25) + : : +- CometBroadcastHashJoin (24) + : : :- CometProject (20) + : : : +- CometBroadcastHashJoin (19) + : : : :- CometProject (14) + : : : : +- CometBroadcastHashJoin (13) + : : : : :- CometProject (8) + : : : : : +- CometBroadcastHashJoin (7) + : : : : : :- CometFilter (2) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : +- CometBroadcastExchange (6) + : : : : : +- CometProject (5) + : : : : : +- CometFilter (4) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : : +- CometBroadcastExchange (12) + : : : : +- CometProject (11) + : : : : +- CometFilter (10) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : : : +- CometBroadcastExchange (18) + : : : +- CometProject (17) + : : : +- CometFilter (16) + : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + : : +- CometBroadcastExchange (23) + : : +- CometFilter (22) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (21) + : +- BroadcastExchange (33) + : +- * ColumnarToRow (32) + : +- CometFilter (31) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (30) + +- BroadcastExchange (39) + +- * ColumnarToRow (38) + +- CometFilter (37) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (36) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Arguments: [d_date_sk#9, d_year#10, d_dow#11] + +(4) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : ((d_dow#11 IN (6,0) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(5) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#12, s_city#13] +Arguments: [s_store_sk#12, s_city#13] + +(10) CometFilter +Input [2]: [s_store_sk#12, s_city#13] +Condition : (s_city#13 IN (Fairview,Midway) AND isnotnull(s_store_sk#12)) + +(11) CometProject +Input [2]: [s_store_sk#12, s_city#13] +Arguments: [s_store_sk#12], [s_store_sk#12] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#12] +Arguments: [s_store_sk#12] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [1]: [s_store_sk#12] +Arguments: [ss_store_sk#4], [s_store_sk#12], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#12] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Arguments: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] + +(16) CometFilter +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : (((hd_dep_count#15 = 4) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) + +(17) CometProject +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Arguments: [hd_demo_sk#14], [hd_demo_sk#14] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#14] +Arguments: [hd_demo_sk#14] + +(19) CometBroadcastHashJoin +Left output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [1]: [hd_demo_sk#14] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#14], Inner, BuildRight + +(20) CometProject +Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#14] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(21) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#17, ca_city#18] +Arguments: [ca_address_sk#17, ca_city#18] + +(22) CometFilter +Input [2]: [ca_address_sk#17, ca_city#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_city#18)) + +(23) CometBroadcastExchange +Input [2]: [ca_address_sk#17, ca_city#18] +Arguments: [ca_address_sk#17, ca_city#18] + +(24) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [2]: [ca_address_sk#17, ca_city#18] +Arguments: [ss_addr_sk#3], [ca_address_sk#17], Inner, BuildRight + +(25) CometProject +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#17, ca_city#18] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] + +(26) ColumnarToRow [codegen id : 1] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] + +(27) HashAggregate [codegen id : 1] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum#19, sum#20] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#21, sum#22] + +(28) Exchange +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#21, sum#22] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(29) HashAggregate [codegen id : 4] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#21, sum#22] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#23, sum(UnscaledValue(ss_net_profit#7))#24] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#18 AS bought_city#25, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#23,17,2) AS amt#26, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#24,17,2) AS profit#27] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Arguments: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] + +(31) CometFilter +Input [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Condition : (isnotnull(c_customer_sk#28) AND isnotnull(c_current_addr_sk#29)) + +(32) ColumnarToRow [codegen id : 2] +Input [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] + +(33) BroadcastExchange +Input [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(34) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#28] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 4] +Output [7]: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#25, amt#26, profit#27, c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] + +(36) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#32, ca_city#33] +Arguments: [ca_address_sk#32, ca_city#33] + +(37) CometFilter +Input [2]: [ca_address_sk#32, ca_city#33] +Condition : (isnotnull(ca_address_sk#32) AND isnotnull(ca_city#33)) + +(38) ColumnarToRow [codegen id : 3] +Input [2]: [ca_address_sk#32, ca_city#33] + +(39) BroadcastExchange +Input [2]: [ca_address_sk#32, ca_city#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(40) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [c_current_addr_sk#29] +Right keys [1]: [ca_address_sk#32] +Join type: Inner +Join condition: NOT (ca_city#33 = bought_city#25) + +(41) Project [codegen id : 4] +Output [7]: [c_last_name#31, c_first_name#30, ca_city#33, bought_city#25, ss_ticket_number#5, amt#26, profit#27] +Input [9]: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#29, c_first_name#30, c_last_name#31, ca_address_sk#32, ca_city#33] + +(42) TakeOrderedAndProject +Input [7]: [c_last_name#31, c_first_name#30, ca_city#33, bought_city#25, ss_ticket_number#5, amt#26, profit#27] +Arguments: 100, [c_last_name#31 ASC NULLS FIRST, c_first_name#30 ASC NULLS FIRST, ca_city#33 ASC NULLS FIRST, bought_city#25 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#31, c_first_name#30, ca_city#33, bought_city#25, ss_ticket_number#5, amt#26, profit#27] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q46.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q46.native_datafusion/simplified.txt new file mode 100644 index 0000000000..708b8bb3ec --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q46.native_datafusion/simplified.txt @@ -0,0 +1,52 @@ +TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + WholeStageCodegen (4) + Project [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [ss_ticket_number,bought_city,amt,profit,c_current_addr_sk,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum] [sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),bought_city,amt,profit,sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen (1) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,ss_coupon_amt,ss_net_profit] [sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ca_city] + CometBroadcastHashJoin [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ca_address_sk,ca_city] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dow] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dow] + CometBroadcastExchange [s_store_sk] #3 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_city] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_city] + CometBroadcastExchange [hd_demo_sk] #4 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [ca_address_sk,ca_city] #5 + CometFilter [ca_address_sk,ca_city] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometFilter [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_city] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_city] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q47.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q47.native_datafusion/explain.txt new file mode 100644 index 0000000000..fe442b233c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q47.native_datafusion/explain.txt @@ -0,0 +1,241 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (28) + : : +- * Filter (27) + : : +- Window (26) + : : +- * Filter (25) + : : +- Window (24) + : : +- * Sort (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- Exchange (20) + : : +- * HashAggregate (19) + : : +- * ColumnarToRow (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (13) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- Window (33) + : +- * Sort (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (42) + +- * Project (41) + +- Window (40) + +- * Sort (39) + +- ReusedExchange (38) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Arguments: [i_item_sk#1, i_brand#2, i_category#3] + +(2) CometFilter +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(4) CometFilter +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) + +(5) CometBroadcastExchange +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Right output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [i_item_sk#1], [ss_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7], [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(9) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(11) CometBroadcastHashJoin +Left output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] +Arguments: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10], [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] + +(13) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(14) CometFilter +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Condition : ((isnotnull(s_store_sk#11) AND isnotnull(s_store_name#12)) AND isnotnull(s_company_name#13)) + +(15) CometBroadcastExchange +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(16) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] +Right output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [ss_store_sk#5], [s_store_sk#11], Inner, BuildRight + +(17) CometProject +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10, s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13], [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] + +(18) ColumnarToRow [codegen id : 1] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] + +(19) HashAggregate [codegen id : 1] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum#14] +Results [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] + +(20) Exchange +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(21) HashAggregate [codegen id : 2] +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#16] +Results [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#16,17,2) AS sum_sales#17, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#16,17,2) AS _w0#18] + +(22) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(23) Sort [codegen id : 3] +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST, s_company_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], false, 0 + +(24) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#19], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(25) Filter [codegen id : 4] +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(26) Window +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19] +Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9] + +(27) Filter [codegen id : 13] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN ((abs((sum_sales#17 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END) + +(28) Project [codegen id : 13] +Output [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] + +(29) ReusedExchange [Reuses operator id: 20] +Output [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum#27] + +(30) HashAggregate [codegen id : 6] +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum#27] +Keys [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26] +Functions [1]: [sum(UnscaledValue(ss_sales_price#28))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#28))#16] +Results [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, MakeDecimal(sum(UnscaledValue(ss_sales_price#28))#16,17,2) AS sum_sales#29] + +(31) Exchange +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#29] +Arguments: hashpartitioning(i_category#21, i_brand#22, s_store_name#23, s_company_name#24, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(32) Sort [codegen id : 7] +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#29] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, s_store_name#23 ASC NULLS FIRST, s_company_name#24 ASC NULLS FIRST, d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST], false, 0 + +(33) Window +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#29] +Arguments: [rank(d_year#25, d_moy#26) windowspecdefinition(i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#30], [i_category#21, i_brand#22, s_store_name#23, s_company_name#24], [d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST] + +(34) Project [codegen id : 8] +Output [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#29, rn#30] +Input [8]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#29, rn#30] + +(35) BroadcastExchange +Input [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#29, rn#30] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=4] + +(36) BroadcastHashJoin [codegen id : 13] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#19] +Right keys [5]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, (rn#30 + 1)] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 13] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#29] +Input [15]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#29, rn#30] + +(38) ReusedExchange [Reuses operator id: 31] +Output [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#37] + +(39) Sort [codegen id : 11] +Input [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#37] +Arguments: [i_category#31 ASC NULLS FIRST, i_brand#32 ASC NULLS FIRST, s_store_name#33 ASC NULLS FIRST, s_company_name#34 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +(40) Window +Input [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#37] +Arguments: [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#38], [i_category#31, i_brand#32, s_store_name#33, s_company_name#34], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] + +(41) Project [codegen id : 12] +Output [6]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#37, rn#38] +Input [8]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#37, rn#38] + +(42) BroadcastExchange +Input [6]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#37, rn#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=5] + +(43) BroadcastHashJoin [codegen id : 13] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#19] +Right keys [5]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, (rn#38 - 1)] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 13] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, sum_sales#29 AS psum#39, sum_sales#37 AS nsum#40] +Input [16]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#29, i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#37, rn#38] + +(45) TakeOrderedAndProject +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, psum#39, nsum#40] +Arguments: 100, [(sum_sales#17 - avg_monthly_sales#20) ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, psum#39, nsum#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q47.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q47.native_datafusion/simplified.txt new file mode 100644 index 0000000000..429e65385e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q47.native_datafusion/simplified.txt @@ -0,0 +1,67 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_brand,s_company_name,d_year,d_moy,psum,nsum] + WholeStageCodegen (13) + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] + WholeStageCodegen (4) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (3) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen (2) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2 + WholeStageCodegen (1) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] + CometBroadcastHashJoin [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy,s_store_sk,s_store_name,s_company_name] + CometProject [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] + CometBroadcastHashJoin [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_moy] + CometProject [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_category,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_category] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_name] #5 + CometFilter [s_store_sk,s_store_name,s_company_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (7) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #7 + WholeStageCodegen (6) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (12) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (11) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q48.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q48.native_datafusion/explain.txt new file mode 100644 index 0000000000..97925f1aca --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q48.native_datafusion/explain.txt @@ -0,0 +1,152 @@ +== Physical Plan == +* HashAggregate (28) ++- Exchange (27) + +- * HashAggregate (26) + +- * ColumnarToRow (25) + +- CometProject (24) + +- CometBroadcastHashJoin (23) + :- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (13) + +- CometBroadcastExchange (22) + +- CometProject (21) + +- CometFilter (20) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (19) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((((isnotnull(ss_store_sk#3) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_addr_sk#2)) AND ((((ss_sales_price#5 >= 100.00) AND (ss_sales_price#5 <= 150.00)) OR ((ss_sales_price#5 >= 50.00) AND (ss_sales_price#5 <= 100.00))) OR ((ss_sales_price#5 >= 150.00) AND (ss_sales_price#5 <= 200.00)))) AND ((((ss_net_profit#6 >= 0.00) AND (ss_net_profit#6 <= 2000.00)) OR ((ss_net_profit#6 >= 150.00) AND (ss_net_profit#6 <= 3000.00))) OR ((ss_net_profit#6 >= 50.00) AND (ss_net_profit#6 <= 25000.00)))) + +(3) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#8] +Arguments: [s_store_sk#8] + +(4) CometFilter +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(5) CometBroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: [s_store_sk#8] + +(6) CometBroadcastHashJoin +Left output [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [1]: [s_store_sk#8] +Arguments: [ss_store_sk#3], [s_store_sk#8], Inner, BuildRight + +(7) CometProject +Input [8]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, s_store_sk#8] +Arguments: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7], [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + +(9) CometFilter +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Condition : (isnotnull(cd_demo_sk#9) AND ((((cd_marital_status#10 = M) AND (cd_education_status#11 = 4 yr Degree )) OR ((cd_marital_status#10 = D) AND (cd_education_status#11 = 2 yr Degree ))) OR ((cd_marital_status#10 = S) AND (cd_education_status#11 = College )))) + +(10) CometBroadcastExchange +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + +(11) CometBroadcastHashJoin +Left output [6]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [ss_cdemo_sk#1], [cd_demo_sk#9], Inner, ((((((cd_marital_status#10 = M) AND (cd_education_status#11 = 4 yr Degree )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#10 = D) AND (cd_education_status#11 = 2 yr Degree )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#10 = S) AND (cd_education_status#11 = College )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00))), BuildRight + +(12) CometProject +Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7], [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Arguments: [ca_address_sk#12, ca_state#13, ca_country#14] + +(14) CometFilter +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Condition : (((isnotnull(ca_country#14) AND (ca_country#14 = United States)) AND isnotnull(ca_address_sk#12)) AND ((ca_state#13 IN (CO,OH,TX) OR ca_state#13 IN (OR,MN,KY)) OR ca_state#13 IN (VA,CA,MS))) + +(15) CometProject +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Arguments: [ca_address_sk#12, ca_state#13], [ca_address_sk#12, ca_state#13] + +(16) CometBroadcastExchange +Input [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ca_address_sk#12, ca_state#13] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7] +Right output [2]: [ca_address_sk#12, ca_state#13] +Arguments: [ss_addr_sk#2], [ca_address_sk#12], Inner, ((((ca_state#13 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#13 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#13 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00))), BuildRight + +(18) CometProject +Input [6]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7, ca_address_sk#12, ca_state#13] +Arguments: [ss_quantity#4, ss_sold_date_sk#7], [ss_quantity#4, ss_sold_date_sk#7] + +(19) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15, d_year#16] + +(20) CometFilter +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(21) CometProject +Input [2]: [d_date_sk#15, d_year#16] +Arguments: [d_date_sk#15], [d_date_sk#15] + +(22) CometBroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: [d_date_sk#15] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_quantity#4, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#15] +Arguments: [ss_sold_date_sk#7], [d_date_sk#15], Inner, BuildRight + +(24) CometProject +Input [3]: [ss_quantity#4, ss_sold_date_sk#7, d_date_sk#15] +Arguments: [ss_quantity#4], [ss_quantity#4] + +(25) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#4] + +(26) HashAggregate [codegen id : 1] +Input [1]: [ss_quantity#4] +Keys: [] +Functions [1]: [partial_sum(ss_quantity#4)] +Aggregate Attributes [1]: [sum#17] +Results [1]: [sum#18] + +(27) Exchange +Input [1]: [sum#18] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=1] + +(28) HashAggregate [codegen id : 2] +Input [1]: [sum#18] +Keys: [] +Functions [1]: [sum(ss_quantity#4)] +Aggregate Attributes [1]: [sum(ss_quantity#4)#19] +Results [1]: [sum(ss_quantity#4)#19 AS sum(ss_quantity)#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q48.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q48.native_datafusion/simplified.txt new file mode 100644 index 0000000000..73c1d61bf6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q48.native_datafusion/simplified.txt @@ -0,0 +1,32 @@ +WholeStageCodegen (2) + HashAggregate [sum] [sum(ss_quantity),sum(ss_quantity),sum] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ss_quantity] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_quantity] + CometBroadcastHashJoin [ss_quantity,ss_sold_date_sk,d_date_sk] + CometProject [ss_quantity,ss_sold_date_sk] + CometBroadcastHashJoin [ss_addr_sk,ss_quantity,ss_net_profit,ss_sold_date_sk,ca_address_sk,ca_state] + CometProject [ss_addr_sk,ss_quantity,ss_net_profit,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastHashJoin [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk,s_store_sk] + CometFilter [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk] #2 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #3 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [ca_address_sk,ca_state] #4 + CometProject [ca_address_sk,ca_state] + CometFilter [ca_address_sk,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q49.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q49.native_datafusion/explain.txt new file mode 100644 index 0000000000..9aa373d6a8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q49.native_datafusion/explain.txt @@ -0,0 +1,237 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * HashAggregate (44) + +- Exchange (43) + +- * HashAggregate (42) + +- Union (41) + :- * Project (26) + : +- * Filter (25) + : +- Window (24) + : +- * Sort (23) + : +- Window (22) + : +- * Sort (21) + : +- Exchange (20) + : +- * HashAggregate (19) + : +- Exchange (18) + : +- * HashAggregate (17) + : +- * ColumnarToRow (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometBroadcastExchange (4) + : : : +- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : +- CometProject (7) + : : +- CometFilter (6) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (5) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (10) + :- * Project (33) + : +- * Filter (32) + : +- Window (31) + : +- * Sort (30) + : +- Window (29) + : +- * Sort (28) + : +- ReusedExchange (27) + +- * Project (40) + +- * Filter (39) + +- Window (38) + +- * Sort (37) + +- Window (36) + +- * Sort (35) + +- ReusedExchange (34) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Condition : (((((((isnotnull(ws_net_profit#5) AND isnotnull(ws_net_paid#4)) AND isnotnull(ws_quantity#3)) AND (ws_net_profit#5 > 1.00)) AND (ws_net_paid#4 > 0.00)) AND (ws_quantity#3 > 0)) AND isnotnull(ws_order_number#2)) AND isnotnull(ws_item_sk#1)) + +(3) CometProject +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6], [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] + +(4) CometBroadcastExchange +Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] + +(5) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Arguments: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(6) CometFilter +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(7) CometProject +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Arguments: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10], [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(8) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Right output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: [ws_order_number#2, ws_item_sk#1], [wr_order_number#8, wr_item_sk#7], Inner, BuildLeft + +(9) CometProject +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10], [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] + +(10) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Arguments: [d_date_sk#12, d_year#13, d_moy#14] + +(11) CometFilter +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(12) CometProject +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(14) CometBroadcastHashJoin +Left output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] +Right output [1]: [d_date_sk#12] +Arguments: [ws_sold_date_sk#6], [d_date_sk#12], Inner, BuildRight + +(15) CometProject +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] +Arguments: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10], [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] + +(16) ColumnarToRow [codegen id : 1] +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] + +(17) HashAggregate [codegen id : 1] +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#1] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#9, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Results [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] + +(18) Exchange +Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(19) HashAggregate [codegen id : 2] +Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Keys [1]: [ws_item_sk#1] +Functions [4]: [sum(coalesce(wr_return_quantity#9, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#9, 0))#27, sum(coalesce(ws_quantity#3, 0))#28, sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#29, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#30] +Results [3]: [ws_item_sk#1 AS item#31, (cast(sum(coalesce(wr_return_quantity#9, 0))#27 as decimal(15,4)) / cast(sum(coalesce(ws_quantity#3, 0))#28 as decimal(15,4))) AS return_ratio#32, (cast(sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#29 as decimal(15,4)) / cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#30 as decimal(15,4))) AS currency_ratio#33] + +(20) Exchange +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(21) Sort [codegen id : 3] +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: [return_ratio#32 ASC NULLS FIRST], false, 0 + +(22) Window +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: [rank(return_ratio#32) windowspecdefinition(return_ratio#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#34], [return_ratio#32 ASC NULLS FIRST] + +(23) Sort [codegen id : 4] +Input [4]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34] +Arguments: [currency_ratio#33 ASC NULLS FIRST], false, 0 + +(24) Window +Input [4]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34] +Arguments: [rank(currency_ratio#33) windowspecdefinition(currency_ratio#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#35], [currency_ratio#33 ASC NULLS FIRST] + +(25) Filter [codegen id : 5] +Input [5]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34, currency_rank#35] +Condition : ((return_rank#34 <= 10) OR (currency_rank#35 <= 10)) + +(26) Project [codegen id : 5] +Output [5]: [web AS channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Input [5]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34, currency_rank#35] + +(27) ReusedExchange [Reuses operator id: 20] +Output [3]: [item#37, return_ratio#38, currency_ratio#39] + +(28) Sort [codegen id : 8] +Input [3]: [item#37, return_ratio#38, currency_ratio#39] +Arguments: [return_ratio#38 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [item#37, return_ratio#38, currency_ratio#39] +Arguments: [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#40], [return_ratio#38 ASC NULLS FIRST] + +(30) Sort [codegen id : 9] +Input [4]: [item#37, return_ratio#38, currency_ratio#39, return_rank#40] +Arguments: [currency_ratio#39 ASC NULLS FIRST], false, 0 + +(31) Window +Input [4]: [item#37, return_ratio#38, currency_ratio#39, return_rank#40] +Arguments: [rank(currency_ratio#39) windowspecdefinition(currency_ratio#39 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#41], [currency_ratio#39 ASC NULLS FIRST] + +(32) Filter [codegen id : 10] +Input [5]: [item#37, return_ratio#38, currency_ratio#39, return_rank#40, currency_rank#41] +Condition : ((return_rank#40 <= 10) OR (currency_rank#41 <= 10)) + +(33) Project [codegen id : 10] +Output [5]: [catalog AS channel#42, item#37, return_ratio#38, return_rank#40, currency_rank#41] +Input [5]: [item#37, return_ratio#38, currency_ratio#39, return_rank#40, currency_rank#41] + +(34) ReusedExchange [Reuses operator id: 20] +Output [3]: [item#43, return_ratio#44, currency_ratio#45] + +(35) Sort [codegen id : 13] +Input [3]: [item#43, return_ratio#44, currency_ratio#45] +Arguments: [return_ratio#44 ASC NULLS FIRST], false, 0 + +(36) Window +Input [3]: [item#43, return_ratio#44, currency_ratio#45] +Arguments: [rank(return_ratio#44) windowspecdefinition(return_ratio#44 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#46], [return_ratio#44 ASC NULLS FIRST] + +(37) Sort [codegen id : 14] +Input [4]: [item#43, return_ratio#44, currency_ratio#45, return_rank#46] +Arguments: [currency_ratio#45 ASC NULLS FIRST], false, 0 + +(38) Window +Input [4]: [item#43, return_ratio#44, currency_ratio#45, return_rank#46] +Arguments: [rank(currency_ratio#45) windowspecdefinition(currency_ratio#45 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#47], [currency_ratio#45 ASC NULLS FIRST] + +(39) Filter [codegen id : 15] +Input [5]: [item#43, return_ratio#44, currency_ratio#45, return_rank#46, currency_rank#47] +Condition : ((return_rank#46 <= 10) OR (currency_rank#47 <= 10)) + +(40) Project [codegen id : 15] +Output [5]: [store AS channel#48, item#43, return_ratio#44, return_rank#46, currency_rank#47] +Input [5]: [item#43, return_ratio#44, currency_ratio#45, return_rank#46, currency_rank#47] + +(41) Union + +(42) HashAggregate [codegen id : 16] +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Keys [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + +(43) Exchange +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Arguments: hashpartitioning(channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(44) HashAggregate [codegen id : 17] +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Keys [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + +(45) TakeOrderedAndProject +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Arguments: 100, [channel#36 ASC NULLS FIRST, return_rank#34 ASC NULLS FIRST, currency_rank#35 ASC NULLS FIRST], [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q49.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q49.native_datafusion/simplified.txt new file mode 100644 index 0000000000..36e05fccb5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q49.native_datafusion/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] + WholeStageCodegen (17) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 + WholeStageCodegen (16) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Union + WholeStageCodegen (5) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (4) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (3) + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen (2) + HashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(wr_return_quantity, 0)),sum(coalesce(ws_quantity, 0)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (1) + HashAggregate [ws_item_sk,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + CometBroadcastHashJoin [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt,d_date_sk] + CometProject [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometBroadcastExchange [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] #4 + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] + CometFilter [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + CometProject [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometFilter [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + WholeStageCodegen (10) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (9) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (8) + Sort [return_ratio] + InputAdapter + ReusedExchange [item,return_ratio,currency_ratio] #2 + WholeStageCodegen (15) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (14) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (13) + Sort [return_ratio] + InputAdapter + ReusedExchange [item,return_ratio,currency_ratio] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q5.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q5.native_datafusion/explain.txt new file mode 100644 index 0000000000..b4ec3d7599 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q5.native_datafusion/explain.txt @@ -0,0 +1,277 @@ +== Physical Plan == +TakeOrderedAndProject (51) ++- * HashAggregate (50) + +- Exchange (49) + +- * HashAggregate (48) + +- * Expand (47) + +- Union (46) + :- * HashAggregate (22) + : +- Exchange (21) + : +- * HashAggregate (20) + : +- * ColumnarToRow (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometUnion (7) + : : : :- CometProject (3) + : : : : +- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometProject (6) + : : : +- CometFilter (5) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (4) + : : +- CometBroadcastExchange (11) + : : +- CometProject (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : +- CometBroadcastExchange (16) + : +- CometFilter (15) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (14) + :- * HashAggregate (24) + : +- ReusedExchange (23) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * ColumnarToRow (42) + +- CometProject (41) + +- CometBroadcastHashJoin (40) + :- CometProject (38) + : +- CometBroadcastHashJoin (37) + : :- CometUnion (35) + : : :- CometProject (27) + : : : +- CometFilter (26) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (25) + : : +- CometProject (34) + : : +- CometBroadcastHashJoin (33) + : : :- CometBroadcastExchange (29) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (28) + : : +- CometProject (32) + : : +- CometFilter (31) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (30) + : +- ReusedExchange (36) + +- ReusedExchange (39) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(3) CometProject +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10], [ss_store_sk#1 AS store_sk#5, ss_sold_date_sk#4 AS date_sk#6, ss_ext_sales_price#2 AS sales_price#7, ss_net_profit#3 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Arguments: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(5) CometFilter +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#11) + +(6) CometProject +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Arguments: [store_sk#15, date_sk#16, sales_price#17, profit#18, return_amt#19, net_loss#20], [sr_store_sk#11 AS store_sk#15, sr_returned_date_sk#14 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#12 AS return_amt#19, sr_net_loss#13 AS net_loss#20] + +(7) CometUnion +Child 0 Input [6]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10] +Child 1 Input [6]: [store_sk#15, date_sk#16, sales_price#17, profit#18, return_amt#19, net_loss#20] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#21, d_date#22] +Arguments: [d_date_sk#21, d_date#22] + +(9) CometFilter +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-08-23)) AND (d_date#22 <= 2000-09-06)) AND isnotnull(d_date_sk#21)) + +(10) CometProject +Input [2]: [d_date_sk#21, d_date#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(12) CometBroadcastHashJoin +Left output [6]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10] +Right output [1]: [d_date_sk#21] +Arguments: [date_sk#6], [d_date_sk#21], Inner, BuildRight + +(13) CometProject +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] +Arguments: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10], [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] + +(14) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#23, s_store_id#24] +Arguments: [s_store_sk#23, s_store_id#24] + +(15) CometFilter +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(16) CometBroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: [s_store_sk#23, s_store_id#24] + +(17) CometBroadcastHashJoin +Left output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Right output [2]: [s_store_sk#23, s_store_id#24] +Arguments: [store_sk#5], [s_store_sk#23], Inner, BuildRight + +(18) CometProject +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#23, s_store_id#24] +Arguments: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24], [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] + +(19) ColumnarToRow [codegen id : 1] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] + +(20) HashAggregate [codegen id : 1] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum#25, sum#26, sum#27, sum#28] +Results [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] + +(21) Exchange +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(22) HashAggregate [codegen id : 2] +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Keys [1]: [s_store_id#24] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#33, sum(UnscaledValue(return_amt#9))#34, sum(UnscaledValue(profit#8))#35, sum(UnscaledValue(net_loss#10))#36] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#33,17,2) AS sales#37, MakeDecimal(sum(UnscaledValue(return_amt#9))#34,17,2) AS returns#38, (MakeDecimal(sum(UnscaledValue(profit#8))#35,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#10))#36,17,2)) AS profit#39, store channel AS channel#40, concat(store, s_store_id#24) AS id#41] + +(23) ReusedExchange [Reuses operator id: 21] +Output [5]: [cp_catalog_page_id#42, sum#43, sum#44, sum#45, sum#46] + +(24) HashAggregate [codegen id : 4] +Input [5]: [cp_catalog_page_id#42, sum#43, sum#44, sum#45, sum#46] +Keys [1]: [cp_catalog_page_id#42] +Functions [4]: [sum(UnscaledValue(sales_price#47)), sum(UnscaledValue(return_amt#48)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#50))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#47))#51, sum(UnscaledValue(return_amt#48))#52, sum(UnscaledValue(profit#49))#53, sum(UnscaledValue(net_loss#50))#54] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#47))#51,17,2) AS sales#55, MakeDecimal(sum(UnscaledValue(return_amt#48))#52,17,2) AS returns#56, (MakeDecimal(sum(UnscaledValue(profit#49))#53,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#50))#54,17,2)) AS profit#57, catalog channel AS channel#58, concat(catalog_page, cp_catalog_page_id#42) AS id#59] + +(25) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_web_site_sk#60, ws_ext_sales_price#61, ws_net_profit#62, ws_sold_date_sk#63] +Arguments: [ws_web_site_sk#60, ws_ext_sales_price#61, ws_net_profit#62, ws_sold_date_sk#63] + +(26) CometFilter +Input [4]: [ws_web_site_sk#60, ws_ext_sales_price#61, ws_net_profit#62, ws_sold_date_sk#63] +Condition : isnotnull(ws_web_site_sk#60) + +(27) CometProject +Input [4]: [ws_web_site_sk#60, ws_ext_sales_price#61, ws_net_profit#62, ws_sold_date_sk#63] +Arguments: [wsr_web_site_sk#64, date_sk#65, sales_price#66, profit#67, return_amt#68, net_loss#69], [ws_web_site_sk#60 AS wsr_web_site_sk#64, ws_sold_date_sk#63 AS date_sk#65, ws_ext_sales_price#61 AS sales_price#66, ws_net_profit#62 AS profit#67, 0.00 AS return_amt#68, 0.00 AS net_loss#69] + +(28) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [5]: [wr_item_sk#70, wr_order_number#71, wr_return_amt#72, wr_net_loss#73, wr_returned_date_sk#74] +Arguments: [wr_item_sk#70, wr_order_number#71, wr_return_amt#72, wr_net_loss#73, wr_returned_date_sk#74] + +(29) CometBroadcastExchange +Input [5]: [wr_item_sk#70, wr_order_number#71, wr_return_amt#72, wr_net_loss#73, wr_returned_date_sk#74] +Arguments: [wr_item_sk#70, wr_order_number#71, wr_return_amt#72, wr_net_loss#73, wr_returned_date_sk#74] + +(30) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_item_sk#75, ws_web_site_sk#76, ws_order_number#77, ws_sold_date_sk#78] +Arguments: [ws_item_sk#75, ws_web_site_sk#76, ws_order_number#77, ws_sold_date_sk#78] + +(31) CometFilter +Input [4]: [ws_item_sk#75, ws_web_site_sk#76, ws_order_number#77, ws_sold_date_sk#78] +Condition : ((isnotnull(ws_item_sk#75) AND isnotnull(ws_order_number#77)) AND isnotnull(ws_web_site_sk#76)) + +(32) CometProject +Input [4]: [ws_item_sk#75, ws_web_site_sk#76, ws_order_number#77, ws_sold_date_sk#78] +Arguments: [ws_item_sk#75, ws_web_site_sk#76, ws_order_number#77], [ws_item_sk#75, ws_web_site_sk#76, ws_order_number#77] + +(33) CometBroadcastHashJoin +Left output [5]: [wr_item_sk#70, wr_order_number#71, wr_return_amt#72, wr_net_loss#73, wr_returned_date_sk#74] +Right output [3]: [ws_item_sk#75, ws_web_site_sk#76, ws_order_number#77] +Arguments: [wr_item_sk#70, wr_order_number#71], [ws_item_sk#75, ws_order_number#77], Inner, BuildLeft + +(34) CometProject +Input [8]: [wr_item_sk#70, wr_order_number#71, wr_return_amt#72, wr_net_loss#73, wr_returned_date_sk#74, ws_item_sk#75, ws_web_site_sk#76, ws_order_number#77] +Arguments: [wsr_web_site_sk#79, date_sk#80, sales_price#81, profit#82, return_amt#83, net_loss#84], [ws_web_site_sk#76 AS wsr_web_site_sk#79, wr_returned_date_sk#74 AS date_sk#80, 0.00 AS sales_price#81, 0.00 AS profit#82, wr_return_amt#72 AS return_amt#83, wr_net_loss#73 AS net_loss#84] + +(35) CometUnion +Child 0 Input [6]: [wsr_web_site_sk#64, date_sk#65, sales_price#66, profit#67, return_amt#68, net_loss#69] +Child 1 Input [6]: [wsr_web_site_sk#79, date_sk#80, sales_price#81, profit#82, return_amt#83, net_loss#84] + +(36) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#85] + +(37) CometBroadcastHashJoin +Left output [6]: [wsr_web_site_sk#64, date_sk#65, sales_price#66, profit#67, return_amt#68, net_loss#69] +Right output [1]: [d_date_sk#85] +Arguments: [date_sk#65], [d_date_sk#85], Inner, BuildRight + +(38) CometProject +Input [7]: [wsr_web_site_sk#64, date_sk#65, sales_price#66, profit#67, return_amt#68, net_loss#69, d_date_sk#85] +Arguments: [wsr_web_site_sk#64, sales_price#66, profit#67, return_amt#68, net_loss#69], [wsr_web_site_sk#64, sales_price#66, profit#67, return_amt#68, net_loss#69] + +(39) ReusedExchange [Reuses operator id: 16] +Output [2]: [web_site_sk#86, web_site_id#87] + +(40) CometBroadcastHashJoin +Left output [5]: [wsr_web_site_sk#64, sales_price#66, profit#67, return_amt#68, net_loss#69] +Right output [2]: [web_site_sk#86, web_site_id#87] +Arguments: [wsr_web_site_sk#64], [web_site_sk#86], Inner, BuildRight + +(41) CometProject +Input [7]: [wsr_web_site_sk#64, sales_price#66, profit#67, return_amt#68, net_loss#69, web_site_sk#86, web_site_id#87] +Arguments: [sales_price#66, profit#67, return_amt#68, net_loss#69, web_site_id#87], [sales_price#66, profit#67, return_amt#68, net_loss#69, web_site_id#87] + +(42) ColumnarToRow [codegen id : 5] +Input [5]: [sales_price#66, profit#67, return_amt#68, net_loss#69, web_site_id#87] + +(43) HashAggregate [codegen id : 5] +Input [5]: [sales_price#66, profit#67, return_amt#68, net_loss#69, web_site_id#87] +Keys [1]: [web_site_id#87] +Functions [4]: [partial_sum(UnscaledValue(sales_price#66)), partial_sum(UnscaledValue(return_amt#68)), partial_sum(UnscaledValue(profit#67)), partial_sum(UnscaledValue(net_loss#69))] +Aggregate Attributes [4]: [sum#88, sum#89, sum#90, sum#91] +Results [5]: [web_site_id#87, sum#92, sum#93, sum#94, sum#95] + +(44) Exchange +Input [5]: [web_site_id#87, sum#92, sum#93, sum#94, sum#95] +Arguments: hashpartitioning(web_site_id#87, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(45) HashAggregate [codegen id : 6] +Input [5]: [web_site_id#87, sum#92, sum#93, sum#94, sum#95] +Keys [1]: [web_site_id#87] +Functions [4]: [sum(UnscaledValue(sales_price#66)), sum(UnscaledValue(return_amt#68)), sum(UnscaledValue(profit#67)), sum(UnscaledValue(net_loss#69))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#66))#96, sum(UnscaledValue(return_amt#68))#97, sum(UnscaledValue(profit#67))#98, sum(UnscaledValue(net_loss#69))#99] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#66))#96,17,2) AS sales#100, MakeDecimal(sum(UnscaledValue(return_amt#68))#97,17,2) AS returns#101, (MakeDecimal(sum(UnscaledValue(profit#67))#98,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#69))#99,17,2)) AS profit#102, web channel AS channel#103, concat(web_site, web_site_id#87) AS id#104] + +(46) Union + +(47) Expand [codegen id : 7] +Input [5]: [sales#37, returns#38, profit#39, channel#40, id#41] +Arguments: [[sales#37, returns#38, profit#39, channel#40, id#41, 0], [sales#37, returns#38, profit#39, channel#40, null, 1], [sales#37, returns#38, profit#39, null, null, 3]], [sales#37, returns#38, profit#39, channel#105, id#106, spark_grouping_id#107] + +(48) HashAggregate [codegen id : 7] +Input [6]: [sales#37, returns#38, profit#39, channel#105, id#106, spark_grouping_id#107] +Keys [3]: [channel#105, id#106, spark_grouping_id#107] +Functions [3]: [partial_sum(sales#37), partial_sum(returns#38), partial_sum(profit#39)] +Aggregate Attributes [6]: [sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113] +Results [9]: [channel#105, id#106, spark_grouping_id#107, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119] + +(49) Exchange +Input [9]: [channel#105, id#106, spark_grouping_id#107, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119] +Arguments: hashpartitioning(channel#105, id#106, spark_grouping_id#107, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(50) HashAggregate [codegen id : 8] +Input [9]: [channel#105, id#106, spark_grouping_id#107, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119] +Keys [3]: [channel#105, id#106, spark_grouping_id#107] +Functions [3]: [sum(sales#37), sum(returns#38), sum(profit#39)] +Aggregate Attributes [3]: [sum(sales#37)#120, sum(returns#38)#121, sum(profit#39)#122] +Results [5]: [channel#105, id#106, sum(sales#37)#120 AS sales#123, sum(returns#38)#121 AS returns#124, sum(profit#39)#122 AS profit#125] + +(51) TakeOrderedAndProject +Input [5]: [channel#105, id#106, sales#123, returns#124, profit#125] +Arguments: 100, [channel#105 ASC NULLS FIRST, id#106 ASC NULLS FIRST], [channel#105, id#106, sales#123, returns#124, profit#125] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q5.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q5.native_datafusion/simplified.txt new file mode 100644 index 0000000000..11227fb08a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q5.native_datafusion/simplified.txt @@ -0,0 +1,65 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (8) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (7) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (2) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen (1) + HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [sales_price,profit,return_amt,net_loss,s_store_id] + CometBroadcastHashJoin [store_sk,sales_price,profit,return_amt,net_loss,s_store_sk,s_store_id] + CometProject [store_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [store_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometProject [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk,s_store_id] #4 + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + WholeStageCodegen (4) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + InputAdapter + ReusedExchange [cp_catalog_page_id,sum,sum,sum,sum] #2 + WholeStageCodegen (6) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + InputAdapter + Exchange [web_site_id] #5 + WholeStageCodegen (5) + HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [sales_price,profit,return_amt,net_loss,web_site_id] + CometBroadcastHashJoin [wsr_web_site_sk,sales_price,profit,return_amt,net_loss,web_site_sk,web_site_id] + CometProject [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometProject [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk,ws_item_sk,ws_web_site_sk,ws_order_number] + CometBroadcastExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] #6 + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + CometProject [ws_item_sk,ws_web_site_sk,ws_order_number] + CometFilter [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [web_site_sk,web_site_id] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q50.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q50.native_datafusion/explain.txt new file mode 100644 index 0000000000..5260461358 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q50.native_datafusion/explain.txt @@ -0,0 +1,152 @@ +== Physical Plan == +TakeOrderedAndProject (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * ColumnarToRow (24) + +- CometProject (23) + +- CometBroadcastHashJoin (22) + :- CometProject (17) + : +- CometBroadcastHashJoin (16) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (8) + : +- CometBroadcastExchange (15) + : +- CometFilter (14) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + +- CometBroadcastExchange (21) + +- CometProject (20) + +- CometFilter (19) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (18) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_store_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(4) CometFilter +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : ((isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#6)) AND isnotnull(sr_customer_sk#7)) + +(5) CometBroadcastExchange +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(6) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [ss_ticket_number#4, ss_item_sk#1, ss_customer_sk#2], [sr_ticket_number#8, sr_item_sk#6, sr_customer_sk#7], Inner, BuildRight + +(7) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9], [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`store` +Output [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(9) CometFilter +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Condition : isnotnull(s_store_sk#10) + +(10) CometBroadcastExchange +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(11) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9] +Right output [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(12) CometProject +Input [14]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20], [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(14) CometFilter +Input [1]: [d_date_sk#21] +Condition : isnotnull(d_date_sk#21) + +(15) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(16) CometBroadcastHashJoin +Left output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Right output [1]: [d_date_sk#21] +Arguments: [ss_sold_date_sk#5], [d_date_sk#21], Inner, BuildRight + +(17) CometProject +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, d_date_sk#21] +Arguments: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20], [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(18) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#22, d_year#23, d_moy#24] +Arguments: [d_date_sk#22, d_year#23, d_moy#24] + +(19) CometFilter +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Condition : ((((isnotnull(d_year#23) AND isnotnull(d_moy#24)) AND (d_year#23 = 2001)) AND (d_moy#24 = 8)) AND isnotnull(d_date_sk#22)) + +(20) CometProject +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Arguments: [d_date_sk#22], [d_date_sk#22] + +(21) CometBroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: [d_date_sk#22] + +(22) CometBroadcastHashJoin +Left output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Right output [1]: [d_date_sk#22] +Arguments: [sr_returned_date_sk#9], [d_date_sk#22], Inner, BuildRight + +(23) CometProject +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, d_date_sk#22] +Arguments: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20], [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(24) ColumnarToRow [codegen id : 1] +Input [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(25) HashAggregate [codegen id : 1] +Input [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Keys [10]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum#25, sum#26, sum#27, sum#28, sum#29] +Results [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#30, sum#31, sum#32, sum#33, sum#34] + +(26) Exchange +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#30, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(27) HashAggregate [codegen id : 2] +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#30, sum#31, sum#32, sum#33, sum#34] +Keys [10]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#35, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#36, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#37, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#38, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#39] +Results [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#35 AS 30 days #40, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#36 AS 31 - 60 days #41, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#37 AS 61 - 90 days #42, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#38 AS 91 - 120 days #43, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#39 AS >120 days #44] + +(28) TakeOrderedAndProject +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #40, 31 - 60 days #41, 61 - 90 days #42, 91 - 120 days #43, >120 days #44] +Arguments: 100, [s_store_name#11 ASC NULLS FIRST, s_company_id#12 ASC NULLS FIRST, s_street_number#13 ASC NULLS FIRST, s_street_name#14 ASC NULLS FIRST, s_street_type#15 ASC NULLS FIRST, s_suite_number#16 ASC NULLS FIRST, s_city#17 ASC NULLS FIRST, s_county#18 ASC NULLS FIRST, s_state#19 ASC NULLS FIRST, s_zip#20 ASC NULLS FIRST], [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #40, 31 - 60 days #41, 61 - 90 days #42, 91 - 120 days #43, >120 days #44] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q50.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q50.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8b0db61368 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q50.native_datafusion/simplified.txt @@ -0,0 +1,32 @@ +TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (2) + HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(CASE WHEN ((sr_returned_date_sk - ss_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 30) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 60) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 90) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((sr_returned_date_sk - ss_sold_date_sk) > 120) THEN 1 ELSE 0 END),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1 + WholeStageCodegen (1) + HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sr_returned_date_sk,ss_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastHashJoin [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,d_date_sk] + CometProject [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastHashJoin [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,d_date_sk] + CometProject [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastHashJoin [ss_store_sk,ss_sold_date_sk,sr_returned_date_sk,s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometProject [ss_store_sk,ss_sold_date_sk,sr_returned_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] #2 + CometFilter [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #3 + CometFilter [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + CometBroadcastExchange [d_date_sk] #4 + CometFilter [d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q51.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q51.native_datafusion/explain.txt new file mode 100644 index 0000000000..8dd829b3cc --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q51.native_datafusion/explain.txt @@ -0,0 +1,145 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * Filter (26) + +- Window (25) + +- * Sort (24) + +- Exchange (23) + +- * Project (22) + +- * SortMergeJoin FullOuter (21) + :- * Sort (18) + : +- Exchange (17) + : +- * Project (16) + : +- Window (15) + : +- * Sort (14) + : +- Exchange (13) + : +- * HashAggregate (12) + : +- Exchange (11) + : +- * HashAggregate (10) + : +- * ColumnarToRow (9) + : +- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : +- CometBroadcastExchange (6) + : +- CometProject (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + +- * Sort (20) + +- ReusedExchange (19) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Arguments: [d_date_sk#4, d_date#5, d_month_seq#6] + +(4) CometFilter +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Arguments: [d_date_sk#4, d_date#5], [d_date_sk#4, d_date#5] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: [d_date_sk#4, d_date#5] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Right output [2]: [d_date_sk#4, d_date#5] +Arguments: [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [5]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3, d_date_sk#4, d_date#5] +Arguments: [ws_item_sk#1, ws_sales_price#2, d_date#5], [ws_item_sk#1, ws_sales_price#2, d_date#5] + +(9) ColumnarToRow [codegen id : 1] +Input [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] + +(10) HashAggregate [codegen id : 1] +Input [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#2))] +Aggregate Attributes [1]: [sum#7] +Results [3]: [ws_item_sk#1, d_date#5, sum#8] + +(11) Exchange +Input [3]: [ws_item_sk#1, d_date#5, sum#8] +Arguments: hashpartitioning(ws_item_sk#1, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(12) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#1, d_date#5, sum#8] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#9] +Results [4]: [ws_item_sk#1 AS item_sk#10, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#9,17,2) AS _w0#11, ws_item_sk#1] + +(13) Exchange +Input [4]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(14) Sort [codegen id : 3] +Input [4]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1] +Arguments: [ws_item_sk#1 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(15) Window +Input [4]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1] +Arguments: [sum(_w0#11) windowspecdefinition(ws_item_sk#1, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ws_item_sk#1], [d_date#5 ASC NULLS FIRST] + +(16) Project [codegen id : 4] +Output [3]: [item_sk#10, d_date#5, cume_sales#12] +Input [5]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1, cume_sales#12] + +(17) Exchange +Input [3]: [item_sk#10, d_date#5, cume_sales#12] +Arguments: hashpartitioning(item_sk#10, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 5] +Input [3]: [item_sk#10, d_date#5, cume_sales#12] +Arguments: [item_sk#10 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(19) ReusedExchange [Reuses operator id: 17] +Output [3]: [item_sk#13, d_date#14, cume_sales#15] + +(20) Sort [codegen id : 10] +Input [3]: [item_sk#13, d_date#14, cume_sales#15] +Arguments: [item_sk#13 ASC NULLS FIRST, d_date#14 ASC NULLS FIRST], false, 0 + +(21) SortMergeJoin [codegen id : 11] +Left keys [2]: [item_sk#10, d_date#5] +Right keys [2]: [item_sk#13, d_date#14] +Join type: FullOuter +Join condition: None + +(22) Project [codegen id : 11] +Output [4]: [CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#13 END AS item_sk#16, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#14 END AS d_date#17, cume_sales#12 AS web_sales#18, cume_sales#15 AS store_sales#19] +Input [6]: [item_sk#10, d_date#5, cume_sales#12, item_sk#13, d_date#14, cume_sales#15] + +(23) Exchange +Input [4]: [item_sk#16, d_date#17, web_sales#18, store_sales#19] +Arguments: hashpartitioning(item_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) Sort [codegen id : 12] +Input [4]: [item_sk#16, d_date#17, web_sales#18, store_sales#19] +Arguments: [item_sk#16 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0 + +(25) Window +Input [4]: [item_sk#16, d_date#17, web_sales#18, store_sales#19] +Arguments: [max(web_sales#18) windowspecdefinition(item_sk#16, d_date#17 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#20, max(store_sales#19) windowspecdefinition(item_sk#16, d_date#17 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#21], [item_sk#16], [d_date#17 ASC NULLS FIRST] + +(26) Filter [codegen id : 13] +Input [6]: [item_sk#16, d_date#17, web_sales#18, store_sales#19, web_cumulative#20, store_cumulative#21] +Condition : ((isnotnull(web_cumulative#20) AND isnotnull(store_cumulative#21)) AND (web_cumulative#20 > store_cumulative#21)) + +(27) TakeOrderedAndProject +Input [6]: [item_sk#16, d_date#17, web_sales#18, store_sales#19, web_cumulative#20, store_cumulative#21] +Arguments: 100, [item_sk#16 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], [item_sk#16, d_date#17, web_sales#18, store_sales#19, web_cumulative#20, store_cumulative#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q51.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q51.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c6a24b17f2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q51.native_datafusion/simplified.txt @@ -0,0 +1,46 @@ +TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] + WholeStageCodegen (13) + Filter [web_cumulative,store_cumulative] + InputAdapter + Window [web_sales,item_sk,d_date,store_sales] + WholeStageCodegen (12) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (11) + Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] + SortMergeJoin [item_sk,d_date,item_sk,d_date] + InputAdapter + WholeStageCodegen (5) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen (4) + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ws_item_sk,d_date] + WholeStageCodegen (3) + Sort [ws_item_sk,d_date] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (2) + HashAggregate [ws_item_sk,d_date,sum] [sum(UnscaledValue(ws_sales_price)),item_sk,_w0,sum] + InputAdapter + Exchange [ws_item_sk,d_date] #4 + WholeStageCodegen (1) + HashAggregate [ws_item_sk,d_date,ws_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ws_item_sk,ws_sales_price,d_date] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,d_date_sk,d_date] + CometFilter [ws_item_sk,ws_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_sales_price,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #5 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_month_seq] + InputAdapter + WholeStageCodegen (10) + Sort [item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,cume_sales] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q52.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q52.native_datafusion/explain.txt new file mode 100644 index 0000000000..48652a0632 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q52.native_datafusion/explain.txt @@ -0,0 +1,105 @@ +== Physical Plan == +TakeOrderedAndProject (19) ++- * HashAggregate (18) + +- Exchange (17) + +- * HashAggregate (16) + +- * ColumnarToRow (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2, d_moy#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2], [d_date_sk#1, d_year#2] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [2]: [d_date_sk#1, d_year#2] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5], [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(10) CometFilter +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9], [i_item_sk#7, i_brand_id#8, i_brand#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9] + +(13) CometBroadcastHashJoin +Left output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9], [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(15) ColumnarToRow [codegen id : 1] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(16) HashAggregate [codegen id : 1] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] + +(17) Exchange +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(18) HashAggregate [codegen id : 2] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS ext_price#16] + +(19) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#14, brand#15, ext_price#16] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#2, brand_id#14, brand#15, ext_price#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q52.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q52.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2e1cfd5a58 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q52.native_datafusion/simplified.txt @@ -0,0 +1,23 @@ +TakeOrderedAndProject [d_year,ext_price,brand_id,brand] + WholeStageCodegen (2) + HashAggregate [d_year,i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (1) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [d_year,ss_ext_sales_price,i_brand_id,i_brand] + CometBroadcastHashJoin [d_year,ss_item_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand] + CometProject [d_year,ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,d_year,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk,d_year] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q53.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q53.native_datafusion/explain.txt new file mode 100644 index 0000000000..eabc3792e1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q53.native_datafusion/explain.txt @@ -0,0 +1,156 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * Project (28) + +- * Filter (27) + +- Window (26) + +- * Sort (25) + +- Exchange (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * ColumnarToRow (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (14) + : +- CometBroadcastHashJoin (13) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : +- CometBroadcastExchange (6) + : : +- CometFilter (5) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + : +- CometBroadcastExchange (12) + : +- CometProject (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + +- CometBroadcastExchange (17) + +- CometFilter (16) + +- CometNativeScan: `spark_catalog`.`default`.`store` (15) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(2) CometFilter +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Condition : ((((i_category#4 IN (Books ,Children ,Electronics ) AND i_class#3 IN (personal ,portable ,reference ,self-help )) AND i_brand#2 IN (scholaramalgamalg #6 ,scholaramalgamalg #7 ,exportiunivamalg #8 ,scholaramalgamalg #8 )) OR ((i_category#4 IN (Women ,Music ,Men ) AND i_class#3 IN (accessories ,classical ,fragrances ,pants )) AND i_brand#2 IN (amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ))) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_manufact_id#5], [i_item_sk#1, i_manufact_id#5] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(5) CometFilter +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) + +(6) CometBroadcastExchange +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(7) CometBroadcastHashJoin +Left output [2]: [i_item_sk#1, i_manufact_id#5] +Right output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_item_sk#1], [ss_item_sk#10], Inner, BuildRight + +(8) CometProject +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13], [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Arguments: [d_date_sk#14, d_month_seq#15, d_qoy#16] + +(10) CometFilter +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Condition : (d_month_seq#15 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#14)) + +(11) CometProject +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Arguments: [d_date_sk#14, d_qoy#16], [d_date_sk#14, d_qoy#16] + +(12) CometBroadcastExchange +Input [2]: [d_date_sk#14, d_qoy#16] +Arguments: [d_date_sk#14, d_qoy#16] + +(13) CometBroadcastHashJoin +Left output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Right output [2]: [d_date_sk#14, d_qoy#16] +Arguments: [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + +(14) CometProject +Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_qoy#16] +Arguments: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16], [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16] + +(15) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#17] +Arguments: [s_store_sk#17] + +(16) CometFilter +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: [s_store_sk#17] + +(18) CometBroadcastHashJoin +Left output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16] +Right output [1]: [s_store_sk#17] +Arguments: [ss_store_sk#11], [s_store_sk#17], Inner, BuildRight + +(19) CometProject +Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16, s_store_sk#17] +Arguments: [i_manufact_id#5, ss_sales_price#12, d_qoy#16], [i_manufact_id#5, ss_sales_price#12, d_qoy#16] + +(20) ColumnarToRow [codegen id : 1] +Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] + +(21) HashAggregate [codegen id : 1] +Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Keys [2]: [i_manufact_id#5, d_qoy#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manufact_id#5, d_qoy#16, sum#19] + +(22) Exchange +Input [3]: [i_manufact_id#5, d_qoy#16, sum#19] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#16, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(23) HashAggregate [codegen id : 2] +Input [3]: [i_manufact_id#5, d_qoy#16, sum#19] +Keys [2]: [i_manufact_id#5, d_qoy#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] + +(24) Exchange +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(25) Sort [codegen id : 3] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 + +(26) Window +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#23], [i_manufact_id#5] + +(27) Filter [codegen id : 4] +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] +Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_quarterly_sales#23)) / avg_quarterly_sales#23) > 0.1000000000000000) ELSE false END + +(28) Project [codegen id : 4] +Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] + +(29) TakeOrderedAndProject +Input [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Arguments: 100, [avg_quarterly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q53.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q53.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ca0948944d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q53.native_datafusion/simplified.txt @@ -0,0 +1,37 @@ +TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] + WholeStageCodegen (4) + Project [i_manufact_id,sum_sales,avg_quarterly_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen (3) + Sort [i_manufact_id] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (2) + HashAggregate [i_manufact_id,d_qoy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_manufact_id,d_qoy] #2 + WholeStageCodegen (1) + HashAggregate [i_manufact_id,d_qoy,ss_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [i_manufact_id,ss_sales_price,d_qoy] + CometBroadcastHashJoin [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy,s_store_sk] + CometProject [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] + CometBroadcastHashJoin [i_manufact_id,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_qoy] + CometProject [i_manufact_id,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_manufact_id,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometProject [i_item_sk,i_manufact_id] + CometFilter [i_item_sk,i_brand,i_class,i_category,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_manufact_id] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_qoy] #4 + CometProject [d_date_sk,d_qoy] + CometFilter [d_date_sk,d_month_seq,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq,d_qoy] + CometBroadcastExchange [s_store_sk] #5 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q55.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q55.native_datafusion/explain.txt new file mode 100644 index 0000000000..9204cb2bde --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q55.native_datafusion/explain.txt @@ -0,0 +1,105 @@ +== Physical Plan == +TakeOrderedAndProject (19) ++- * HashAggregate (18) + +- Exchange (17) + +- * HashAggregate (16) + +- * ColumnarToRow (15) + +- CometProject (14) + +- CometBroadcastHashJoin (13) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (1) + : +- CometBroadcastExchange (6) + : +- CometFilter (5) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + +- CometBroadcastExchange (12) + +- CometProject (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1, d_year#2, d_moy#3] + +(2) CometFilter +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND isnotnull(d_date_sk#1)) + +(3) CometProject +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Arguments: [d_date_sk#1], [d_date_sk#1] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(6) CometBroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [1]: [d_date_sk#1] +Right output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [d_date_sk#1], [ss_sold_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#4, ss_ext_sales_price#5], [ss_item_sk#4, ss_ext_sales_price#5] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(10) CometFilter +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 28)) AND isnotnull(i_item_sk#7)) + +(11) CometProject +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9], [i_item_sk#7, i_brand_id#8, i_brand#9] + +(12) CometBroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [i_item_sk#7, i_brand_id#8, i_brand#9] + +(13) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#4, ss_ext_sales_price#5] +Right output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_item_sk#4], [i_item_sk#7], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9], [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(15) ColumnarToRow [codegen id : 1] +Input [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] + +(16) HashAggregate [codegen id : 1] +Input [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [2]: [i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [3]: [i_brand#9, i_brand_id#8, sum#12] + +(17) Exchange +Input [3]: [i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(18) HashAggregate [codegen id : 2] +Input [3]: [i_brand#9, i_brand_id#8, sum#12] +Keys [2]: [i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [3]: [i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS ext_price#16] + +(19) TakeOrderedAndProject +Input [3]: [brand_id#14, brand#15, ext_price#16] +Arguments: 100, [ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [brand_id#14, brand#15, ext_price#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q55.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q55.native_datafusion/simplified.txt new file mode 100644 index 0000000000..93d3c9b4fb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q55.native_datafusion/simplified.txt @@ -0,0 +1,23 @@ +TakeOrderedAndProject [ext_price,brand_id,brand] + WholeStageCodegen (2) + HashAggregate [i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [i_brand,i_brand_id] #1 + WholeStageCodegen (1) + HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,i_brand_id,i_brand] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_brand_id,i_brand] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [d_date_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] #2 + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q56.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q56.native_datafusion/explain.txt new file mode 100644 index 0000000000..c9298fb775 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q56.native_datafusion/explain.txt @@ -0,0 +1,273 @@ +== Physical Plan == +TakeOrderedAndProject (50) ++- * HashAggregate (49) + +- Exchange (48) + +- * HashAggregate (47) + +- Union (46) + :- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * ColumnarToRow (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (9) + : +- CometBroadcastExchange (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (16) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (15) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (17) + :- * HashAggregate (43) + : +- Exchange (42) + : +- * HashAggregate (41) + : +- * ColumnarToRow (40) + : +- CometProject (39) + : +- CometBroadcastHashJoin (38) + : :- CometProject (36) + : : +- CometBroadcastHashJoin (35) + : : :- CometProject (33) + : : : +- CometBroadcastHashJoin (32) + : : : :- CometFilter (30) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (29) + : : : +- ReusedExchange (31) + : : +- ReusedExchange (34) + : +- ReusedExchange (37) + +- * HashAggregate (45) + +- ReusedExchange (44) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5, d_year#6, d_moy#7] + +(4) CometFilter +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 2)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3], [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8, ca_gmt_offset#9] + +(10) CometFilter +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(11) CometProject +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8], [ca_address_sk#8] + +(12) CometBroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: [ca_address_sk#8] + +(13) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Right output [1]: [ca_address_sk#8] +Arguments: [ss_addr_sk#2], [ca_address_sk#8], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] +Arguments: [ss_item_sk#1, ss_ext_sales_price#3], [ss_item_sk#1, ss_ext_sales_price#3] + +(15) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#10, i_item_id#11] +Arguments: [i_item_sk#10, i_item_id#11] + +(16) CometFilter +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(17) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_id#12, i_color#13] +Arguments: [i_item_id#12, i_color#13] + +(18) CometFilter +Input [2]: [i_item_id#12, i_color#13] +Condition : i_color#13 IN (slate ,blanched ,burnished ) + +(19) CometProject +Input [2]: [i_item_id#12, i_color#13] +Arguments: [i_item_id#12], [i_item_id#12] + +(20) CometBroadcastExchange +Input [1]: [i_item_id#12] +Arguments: [i_item_id#12] + +(21) CometBroadcastHashJoin +Left output [2]: [i_item_sk#10, i_item_id#11] +Right output [1]: [i_item_id#12] +Arguments: [i_item_id#11], [i_item_id#12], LeftSemi, BuildRight + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: [i_item_sk#10, i_item_id#11] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Right output [2]: [i_item_sk#10, i_item_id#11] +Arguments: [ss_item_sk#1], [i_item_sk#10], Inner, BuildRight + +(24) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] +Arguments: [ss_ext_sales_price#3, i_item_id#11], [ss_ext_sales_price#3, i_item_id#11] + +(25) ColumnarToRow [codegen id : 1] +Input [2]: [ss_ext_sales_price#3, i_item_id#11] + +(26) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_sales_price#3, i_item_id#11] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [2]: [i_item_id#11, sum#15] + +(27) Exchange +Input [2]: [i_item_id#11, sum#15] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(28) HashAggregate [codegen id : 2] +Input [2]: [i_item_id#11, sum#15] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [2]: [i_item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS total_sales#17] + +(29) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Arguments: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] + +(30) CometFilter +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Condition : (isnotnull(cs_bill_addr_sk#18) AND isnotnull(cs_item_sk#19)) + +(31) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#22] + +(32) CometBroadcastHashJoin +Left output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Right output [1]: [d_date_sk#22] +Arguments: [cs_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + +(33) CometProject +Input [5]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21, d_date_sk#22] +Arguments: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20], [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] + +(34) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#23] + +(35) CometBroadcastHashJoin +Left output [3]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Right output [1]: [ca_address_sk#23] +Arguments: [cs_bill_addr_sk#18], [ca_address_sk#23], Inner, BuildRight + +(36) CometProject +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, ca_address_sk#23] +Arguments: [cs_item_sk#19, cs_ext_sales_price#20], [cs_item_sk#19, cs_ext_sales_price#20] + +(37) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#24, i_item_id#25] + +(38) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#19, cs_ext_sales_price#20] +Right output [2]: [i_item_sk#24, i_item_id#25] +Arguments: [cs_item_sk#19], [i_item_sk#24], Inner, BuildRight + +(39) CometProject +Input [4]: [cs_item_sk#19, cs_ext_sales_price#20, i_item_sk#24, i_item_id#25] +Arguments: [cs_ext_sales_price#20, i_item_id#25], [cs_ext_sales_price#20, i_item_id#25] + +(40) ColumnarToRow [codegen id : 3] +Input [2]: [cs_ext_sales_price#20, i_item_id#25] + +(41) HashAggregate [codegen id : 3] +Input [2]: [cs_ext_sales_price#20, i_item_id#25] +Keys [1]: [i_item_id#25] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum#26] +Results [2]: [i_item_id#25, sum#27] + +(42) Exchange +Input [2]: [i_item_id#25, sum#27] +Arguments: hashpartitioning(i_item_id#25, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(43) HashAggregate [codegen id : 4] +Input [2]: [i_item_id#25, sum#27] +Keys [1]: [i_item_id#25] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#20))#28] +Results [2]: [i_item_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#20))#28,17,2) AS total_sales#29] + +(44) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_id#30, sum#31] + +(45) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#30, sum#31] +Keys [1]: [i_item_id#30] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#32))#33] +Results [2]: [i_item_id#30, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#32))#33,17,2) AS total_sales#34] + +(46) Union + +(47) HashAggregate [codegen id : 7] +Input [2]: [i_item_id#11, total_sales#17] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(total_sales#17)] +Aggregate Attributes [2]: [sum#35, isEmpty#36] +Results [3]: [i_item_id#11, sum#37, isEmpty#38] + +(48) Exchange +Input [3]: [i_item_id#11, sum#37, isEmpty#38] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(49) HashAggregate [codegen id : 8] +Input [3]: [i_item_id#11, sum#37, isEmpty#38] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(total_sales#17)] +Aggregate Attributes [1]: [sum(total_sales#17)#39] +Results [2]: [i_item_id#11, sum(total_sales#17)#39 AS total_sales#40] + +(50) TakeOrderedAndProject +Input [2]: [i_item_id#11, total_sales#40] +Arguments: 100, [total_sales#40 ASC NULLS FIRST], [i_item_id#11, total_sales#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q56.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q56.native_datafusion/simplified.txt new file mode 100644 index 0000000000..cdddff0dcb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q56.native_datafusion/simplified.txt @@ -0,0 +1,64 @@ +TakeOrderedAndProject [total_sales,i_item_id] + WholeStageCodegen (8) + HashAggregate [i_item_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (7) + HashAggregate [i_item_id,total_sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (2) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen (1) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ca_address_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ca_address_sk] #4 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_id] + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [i_item_id] #6 + CometProject [i_item_id] + CometFilter [i_item_id,i_color] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_id,i_color] + WholeStageCodegen (4) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (3) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [cs_ext_sales_price,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,ca_address_sk] + CometProject [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_item_id] #5 + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + ReusedExchange [i_item_id,sum] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q57.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q57.native_datafusion/explain.txt new file mode 100644 index 0000000000..564f4218e9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q57.native_datafusion/explain.txt @@ -0,0 +1,241 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (28) + : : +- * Filter (27) + : : +- Window (26) + : : +- * Filter (25) + : : +- Window (24) + : : +- * Sort (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- Exchange (20) + : : +- * HashAggregate (19) + : : +- * ColumnarToRow (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`call_center` (13) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- Window (33) + : +- * Sort (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (42) + +- * Project (41) + +- Window (40) + +- * Sort (39) + +- ReusedExchange (38) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Arguments: [i_item_sk#1, i_brand#2, i_category#3] + +(2) CometFilter +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(4) CometFilter +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) + +(5) CometBroadcastExchange +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Right output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [i_item_sk#1], [cs_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7], [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(9) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(11) CometBroadcastHashJoin +Left output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] +Right output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [cs_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] +Arguments: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10], [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] + +(13) CometNativeScan: `spark_catalog`.`default`.`call_center` +Output [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cc_call_center_sk#11, cc_name#12] + +(14) CometFilter +Input [2]: [cc_call_center_sk#11, cc_name#12] +Condition : (isnotnull(cc_call_center_sk#11) AND isnotnull(cc_name#12)) + +(15) CometBroadcastExchange +Input [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cc_call_center_sk#11, cc_name#12] + +(16) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] +Right output [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cs_call_center_sk#4], [cc_call_center_sk#11], Inner, BuildRight + +(17) CometProject +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10, cc_call_center_sk#11, cc_name#12] +Arguments: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12], [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] + +(18) ColumnarToRow [codegen id : 1] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] + +(19) HashAggregate [codegen id : 1] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] + +(20) Exchange +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(21) HashAggregate [codegen id : 2] +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#6))#15] +Results [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#15,17,2) AS sum_sales#16, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#15,17,2) AS _w0#17] + +(22) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(23) Sort [codegen id : 3] +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], false, 0 + +(24) Window +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#3, i_brand#2, cc_name#12], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(25) Filter [codegen id : 4] +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(26) Window +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18] +Arguments: [avg(_w0#17) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#19], [i_category#3, i_brand#2, cc_name#12, d_year#9] + +(27) Filter [codegen id : 13] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] +Condition : ((isnotnull(avg_monthly_sales#19) AND (avg_monthly_sales#19 > 0.000000)) AND CASE WHEN (avg_monthly_sales#19 > 0.000000) THEN ((abs((sum_sales#16 - avg_monthly_sales#19)) / avg_monthly_sales#19) > 0.1000000000000000) END) + +(28) Project [codegen id : 13] +Output [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] + +(29) ReusedExchange [Reuses operator id: 20] +Output [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum#25] + +(30) HashAggregate [codegen id : 6] +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum#25] +Keys [5]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24] +Functions [1]: [sum(UnscaledValue(cs_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#26))#15] +Results [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, MakeDecimal(sum(UnscaledValue(cs_sales_price#26))#15,17,2) AS sum_sales#27] + +(31) Exchange +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#27] +Arguments: hashpartitioning(i_category#20, i_brand#21, cc_name#22, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(32) Sort [codegen id : 7] +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#27] +Arguments: [i_category#20 ASC NULLS FIRST, i_brand#21 ASC NULLS FIRST, cc_name#22 ASC NULLS FIRST, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST], false, 0 + +(33) Window +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#27] +Arguments: [rank(d_year#23, d_moy#24) windowspecdefinition(i_category#20, i_brand#21, cc_name#22, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#28], [i_category#20, i_brand#21, cc_name#22], [d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(34) Project [codegen id : 8] +Output [5]: [i_category#20, i_brand#21, cc_name#22, sum_sales#27, rn#28] +Input [7]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#27, rn#28] + +(35) BroadcastExchange +Input [5]: [i_category#20, i_brand#21, cc_name#22, sum_sales#27, rn#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=4] + +(36) BroadcastHashJoin [codegen id : 13] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#18] +Right keys [4]: [i_category#20, i_brand#21, cc_name#22, (rn#28 + 1)] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 13] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#27] +Input [13]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, cc_name#22, sum_sales#27, rn#28] + +(38) ReusedExchange [Reuses operator id: 31] +Output [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#34] + +(39) Sort [codegen id : 11] +Input [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#34] +Arguments: [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, cc_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + +(40) Window +Input [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#34] +Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#29, i_brand#30, cc_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#29, i_brand#30, cc_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + +(41) Project [codegen id : 12] +Output [5]: [i_category#29, i_brand#30, cc_name#31, sum_sales#34, rn#35] +Input [7]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#34, rn#35] + +(42) BroadcastExchange +Input [5]: [i_category#29, i_brand#30, cc_name#31, sum_sales#34, rn#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=5] + +(43) BroadcastHashJoin [codegen id : 13] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#18] +Right keys [4]: [i_category#29, i_brand#30, cc_name#31, (rn#35 - 1)] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 13] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, sum_sales#27 AS psum#36, sum_sales#34 AS nsum#37] +Input [14]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#27, i_category#29, i_brand#30, cc_name#31, sum_sales#34, rn#35] + +(45) TakeOrderedAndProject +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q57.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q57.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c3152d8cf9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q57.native_datafusion/simplified.txt @@ -0,0 +1,67 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_year,d_moy,psum,nsum] + WholeStageCodegen (13) + Project [i_category,i_brand,cc_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,cc_name,d_year] + WholeStageCodegen (4) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (3) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #1 + WholeStageCodegen (2) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,cc_name,d_year,d_moy] #2 + WholeStageCodegen (1) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] + CometBroadcastHashJoin [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy,cc_call_center_sk,cc_name] + CometProject [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] + CometBroadcastHashJoin [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk,d_date_sk,d_year,d_moy] + CometProject [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_category,cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_category] + CometBroadcastExchange [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] #3 + CometFilter [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [cc_call_center_sk,cc_name] #5 + CometFilter [cc_call_center_sk,cc_name] + CometNativeScan: `spark_catalog`.`default`.`call_center` [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (7) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #7 + WholeStageCodegen (6) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (12) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (11) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q59.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q59.native_datafusion/explain.txt new file mode 100644 index 0000000000..cfeddd402c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q59.native_datafusion/explain.txt @@ -0,0 +1,232 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * HashAggregate (11) + : : : +- Exchange (10) + : : : +- * HashAggregate (9) + : : : +- * ColumnarToRow (8) + : : : +- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- BroadcastExchange (15) + : : +- * ColumnarToRow (14) + : : +- CometFilter (13) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (12) + : +- BroadcastExchange (22) + : +- * ColumnarToRow (21) + : +- CometProject (20) + : +- CometFilter (19) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (18) + +- BroadcastExchange (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (32) + : +- * BroadcastHashJoin Inner BuildRight (31) + : :- * HashAggregate (26) + : : +- ReusedExchange (25) + : +- BroadcastExchange (30) + : +- * ColumnarToRow (29) + : +- CometFilter (28) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (27) + +- BroadcastExchange (37) + +- * ColumnarToRow (36) + +- CometProject (35) + +- CometFilter (34) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (33) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Arguments: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(4) CometFilter +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) + +(5) CometBroadcastExchange +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Right output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(7) CometProject +Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6], [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] + +(8) ColumnarToRow [codegen id : 1] +Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] + +(9) HashAggregate [codegen id : 1] +Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] +Aggregate Attributes [7]: [sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Results [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] + +(10) Exchange +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(11) HashAggregate [codegen id : 8] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] + +(12) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Arguments: [s_store_sk#35, s_store_id#36, s_store_name#37] + +(13) CometFilter +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Condition : (isnotnull(s_store_sk#35) AND isnotnull(s_store_id#36)) + +(14) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] + +(15) BroadcastExchange +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#35] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 8] +Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37] +Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37] + +(18) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_month_seq#38, d_week_seq#39] +Arguments: [d_month_seq#38, d_week_seq#39] + +(19) CometFilter +Input [2]: [d_month_seq#38, d_week_seq#39] +Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1212)) AND (d_month_seq#38 <= 1223)) AND isnotnull(d_week_seq#39)) + +(20) CometProject +Input [2]: [d_month_seq#38, d_week_seq#39] +Arguments: [d_week_seq#39], [d_week_seq#39] + +(21) ColumnarToRow [codegen id : 3] +Input [1]: [d_week_seq#39] + +(22) BroadcastExchange +Input [1]: [d_week_seq#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#39] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 8] +Output [10]: [s_store_name#37 AS s_store_name1#40, d_week_seq#5 AS d_week_seq1#41, s_store_id#36 AS s_store_id1#42, sun_sales#28 AS sun_sales1#43, mon_sales#29 AS mon_sales1#44, tue_sales#30 AS tue_sales1#45, wed_sales#31 AS wed_sales1#46, thu_sales#32 AS thu_sales1#47, fri_sales#33 AS fri_sales1#48, sat_sales#34 AS sat_sales1#49] +Input [11]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37, d_week_seq#39] + +(25) ReusedExchange [Reuses operator id: 10] +Output [9]: [d_week_seq#50, ss_store_sk#51, sum#52, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58] + +(26) HashAggregate [codegen id : 7] +Input [9]: [d_week_seq#50, ss_store_sk#51, sum#52, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58] +Keys [2]: [d_week_seq#50, ss_store_sk#51] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#59 = Sunday ) THEN ss_sales_price#60 END)), sum(UnscaledValue(CASE WHEN (d_day_name#59 = Monday ) THEN ss_sales_price#60 END)), sum(UnscaledValue(CASE WHEN (d_day_name#59 = Tuesday ) THEN ss_sales_price#60 END)), sum(UnscaledValue(CASE WHEN (d_day_name#59 = Wednesday) THEN ss_sales_price#60 END)), sum(UnscaledValue(CASE WHEN (d_day_name#59 = Thursday ) THEN ss_sales_price#60 END)), sum(UnscaledValue(CASE WHEN (d_day_name#59 = Friday ) THEN ss_sales_price#60 END)), sum(UnscaledValue(CASE WHEN (d_day_name#59 = Saturday ) THEN ss_sales_price#60 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#59 = Sunday ) THEN ss_sales_price#60 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#59 = Monday ) THEN ss_sales_price#60 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#59 = Tuesday ) THEN ss_sales_price#60 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#59 = Wednesday) THEN ss_sales_price#60 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#59 = Thursday ) THEN ss_sales_price#60 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#59 = Friday ) THEN ss_sales_price#60 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#59 = Saturday ) THEN ss_sales_price#60 END))#27] +Results [9]: [d_week_seq#50, ss_store_sk#51, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#59 = Sunday ) THEN ss_sales_price#60 END))#21,17,2) AS sun_sales#61, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#59 = Monday ) THEN ss_sales_price#60 END))#22,17,2) AS mon_sales#62, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#59 = Tuesday ) THEN ss_sales_price#60 END))#23,17,2) AS tue_sales#63, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#59 = Wednesday) THEN ss_sales_price#60 END))#24,17,2) AS wed_sales#64, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#59 = Thursday ) THEN ss_sales_price#60 END))#25,17,2) AS thu_sales#65, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#59 = Friday ) THEN ss_sales_price#60 END))#26,17,2) AS fri_sales#66, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#59 = Saturday ) THEN ss_sales_price#60 END))#27,17,2) AS sat_sales#67] + +(27) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#68, s_store_id#69] +Arguments: [s_store_sk#68, s_store_id#69] + +(28) CometFilter +Input [2]: [s_store_sk#68, s_store_id#69] +Condition : (isnotnull(s_store_sk#68) AND isnotnull(s_store_id#69)) + +(29) ColumnarToRow [codegen id : 5] +Input [2]: [s_store_sk#68, s_store_id#69] + +(30) BroadcastExchange +Input [2]: [s_store_sk#68, s_store_id#69] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(31) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_store_sk#51] +Right keys [1]: [s_store_sk#68] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 7] +Output [9]: [d_week_seq#50, sun_sales#61, mon_sales#62, tue_sales#63, wed_sales#64, thu_sales#65, fri_sales#66, sat_sales#67, s_store_id#69] +Input [11]: [d_week_seq#50, ss_store_sk#51, sun_sales#61, mon_sales#62, tue_sales#63, wed_sales#64, thu_sales#65, fri_sales#66, sat_sales#67, s_store_sk#68, s_store_id#69] + +(33) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_month_seq#70, d_week_seq#71] +Arguments: [d_month_seq#70, d_week_seq#71] + +(34) CometFilter +Input [2]: [d_month_seq#70, d_week_seq#71] +Condition : (((isnotnull(d_month_seq#70) AND (d_month_seq#70 >= 1224)) AND (d_month_seq#70 <= 1235)) AND isnotnull(d_week_seq#71)) + +(35) CometProject +Input [2]: [d_month_seq#70, d_week_seq#71] +Arguments: [d_week_seq#71], [d_week_seq#71] + +(36) ColumnarToRow [codegen id : 6] +Input [1]: [d_week_seq#71] + +(37) BroadcastExchange +Input [1]: [d_week_seq#71] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(38) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [d_week_seq#50] +Right keys [1]: [d_week_seq#71] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 7] +Output [9]: [d_week_seq#50 AS d_week_seq2#72, s_store_id#69 AS s_store_id2#73, sun_sales#61 AS sun_sales2#74, mon_sales#62 AS mon_sales2#75, tue_sales#63 AS tue_sales2#76, wed_sales#64 AS wed_sales2#77, thu_sales#65 AS thu_sales2#78, fri_sales#66 AS fri_sales2#79, sat_sales#67 AS sat_sales2#80] +Input [10]: [d_week_seq#50, sun_sales#61, mon_sales#62, tue_sales#63, wed_sales#64, thu_sales#65, fri_sales#66, sat_sales#67, s_store_id#69, d_week_seq#71] + +(40) BroadcastExchange +Input [9]: [d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [s_store_id1#42, d_week_seq1#41] +Right keys [2]: [s_store_id2#73, (d_week_seq2#72 - 52)] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 8] +Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1#43 / sun_sales2#74) AS (sun_sales1 / sun_sales2)#81, (mon_sales1#44 / mon_sales2#75) AS (mon_sales1 / mon_sales2)#82, (tue_sales1#45 / tue_sales2#76) AS (tue_sales1 / tue_sales2)#83, (wed_sales1#46 / wed_sales2#77) AS (wed_sales1 / wed_sales2)#84, (thu_sales1#47 / thu_sales2#78) AS (thu_sales1 / thu_sales2)#85, (fri_sales1#48 / fri_sales2#79) AS (fri_sales1 / fri_sales2)#86, (sat_sales1#49 / sat_sales2#80) AS (sat_sales1 / sat_sales2)#87] +Input [19]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] + +(43) TakeOrderedAndProject +Input [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#81, (mon_sales1 / mon_sales2)#82, (tue_sales1 / tue_sales2)#83, (wed_sales1 / wed_sales2)#84, (thu_sales1 / thu_sales2)#85, (fri_sales1 / fri_sales2)#86, (sat_sales1 / sat_sales2)#87] +Arguments: 100, [s_store_name1#40 ASC NULLS FIRST, s_store_id1#42 ASC NULLS FIRST, d_week_seq1#41 ASC NULLS FIRST], [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#81, (mon_sales1 / mon_sales2)#82, (tue_sales1 / tue_sales2)#83, (wed_sales1 / wed_sales2)#84, (thu_sales1 / thu_sales2)#85, (fri_sales1 / fri_sales2)#86, (sat_sales1 / sat_sales2)#87] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q59.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q59.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bb9542ccb7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q59.native_datafusion/simplified.txt @@ -0,0 +1,62 @@ +TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_sales2),(mon_sales1 / mon_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(sat_sales1 / sat_sales2)] + WholeStageCodegen (8) + Project [s_store_name1,s_store_id1,d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] + BroadcastHashJoin [s_store_id1,d_week_seq1,s_store_id2,d_week_seq2] + Project [s_store_name,d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #1 + WholeStageCodegen (1) + HashAggregate [d_week_seq,ss_store_sk,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] + CometBroadcastHashJoin [ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_week_seq,d_day_name] + CometFilter [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_week_seq,d_day_name] #2 + CometFilter [d_date_sk,d_week_seq,d_day_name] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometFilter [s_store_sk,s_store_id,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_month_seq,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_month_seq,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + Project [d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_month_seq,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_month_seq,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q60.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q60.native_datafusion/explain.txt new file mode 100644 index 0000000000..7ed058c86b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q60.native_datafusion/explain.txt @@ -0,0 +1,273 @@ +== Physical Plan == +TakeOrderedAndProject (50) ++- * HashAggregate (49) + +- Exchange (48) + +- * HashAggregate (47) + +- Union (46) + :- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * ColumnarToRow (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (9) + : +- CometBroadcastExchange (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (16) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (15) + : +- CometBroadcastExchange (20) + : +- CometProject (19) + : +- CometFilter (18) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (17) + :- * HashAggregate (43) + : +- Exchange (42) + : +- * HashAggregate (41) + : +- * ColumnarToRow (40) + : +- CometProject (39) + : +- CometBroadcastHashJoin (38) + : :- CometProject (36) + : : +- CometBroadcastHashJoin (35) + : : :- CometProject (33) + : : : +- CometBroadcastHashJoin (32) + : : : :- CometFilter (30) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (29) + : : : +- ReusedExchange (31) + : : +- ReusedExchange (34) + : +- ReusedExchange (37) + +- * HashAggregate (45) + +- ReusedExchange (44) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5, d_year#6, d_moy#7] + +(4) CometFilter +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 9)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3], [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8, ca_gmt_offset#9] + +(10) CometFilter +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(11) CometProject +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Arguments: [ca_address_sk#8], [ca_address_sk#8] + +(12) CometBroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: [ca_address_sk#8] + +(13) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Right output [1]: [ca_address_sk#8] +Arguments: [ss_addr_sk#2], [ca_address_sk#8], Inner, BuildRight + +(14) CometProject +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] +Arguments: [ss_item_sk#1, ss_ext_sales_price#3], [ss_item_sk#1, ss_ext_sales_price#3] + +(15) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#10, i_item_id#11] +Arguments: [i_item_sk#10, i_item_id#11] + +(16) CometFilter +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(17) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_id#12, i_category#13] +Arguments: [i_item_id#12, i_category#13] + +(18) CometFilter +Input [2]: [i_item_id#12, i_category#13] +Condition : (isnotnull(i_category#13) AND (i_category#13 = Music )) + +(19) CometProject +Input [2]: [i_item_id#12, i_category#13] +Arguments: [i_item_id#12], [i_item_id#12] + +(20) CometBroadcastExchange +Input [1]: [i_item_id#12] +Arguments: [i_item_id#12] + +(21) CometBroadcastHashJoin +Left output [2]: [i_item_sk#10, i_item_id#11] +Right output [1]: [i_item_id#12] +Arguments: [i_item_id#11], [i_item_id#12], LeftSemi, BuildRight + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: [i_item_sk#10, i_item_id#11] + +(23) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Right output [2]: [i_item_sk#10, i_item_id#11] +Arguments: [ss_item_sk#1], [i_item_sk#10], Inner, BuildRight + +(24) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] +Arguments: [ss_ext_sales_price#3, i_item_id#11], [ss_ext_sales_price#3, i_item_id#11] + +(25) ColumnarToRow [codegen id : 1] +Input [2]: [ss_ext_sales_price#3, i_item_id#11] + +(26) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_sales_price#3, i_item_id#11] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [2]: [i_item_id#11, sum#15] + +(27) Exchange +Input [2]: [i_item_id#11, sum#15] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(28) HashAggregate [codegen id : 2] +Input [2]: [i_item_id#11, sum#15] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [2]: [i_item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS total_sales#17] + +(29) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Arguments: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] + +(30) CometFilter +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Condition : (isnotnull(cs_bill_addr_sk#18) AND isnotnull(cs_item_sk#19)) + +(31) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#22] + +(32) CometBroadcastHashJoin +Left output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Right output [1]: [d_date_sk#22] +Arguments: [cs_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + +(33) CometProject +Input [5]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21, d_date_sk#22] +Arguments: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20], [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] + +(34) ReusedExchange [Reuses operator id: 12] +Output [1]: [ca_address_sk#23] + +(35) CometBroadcastHashJoin +Left output [3]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Right output [1]: [ca_address_sk#23] +Arguments: [cs_bill_addr_sk#18], [ca_address_sk#23], Inner, BuildRight + +(36) CometProject +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, ca_address_sk#23] +Arguments: [cs_item_sk#19, cs_ext_sales_price#20], [cs_item_sk#19, cs_ext_sales_price#20] + +(37) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#24, i_item_id#25] + +(38) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#19, cs_ext_sales_price#20] +Right output [2]: [i_item_sk#24, i_item_id#25] +Arguments: [cs_item_sk#19], [i_item_sk#24], Inner, BuildRight + +(39) CometProject +Input [4]: [cs_item_sk#19, cs_ext_sales_price#20, i_item_sk#24, i_item_id#25] +Arguments: [cs_ext_sales_price#20, i_item_id#25], [cs_ext_sales_price#20, i_item_id#25] + +(40) ColumnarToRow [codegen id : 3] +Input [2]: [cs_ext_sales_price#20, i_item_id#25] + +(41) HashAggregate [codegen id : 3] +Input [2]: [cs_ext_sales_price#20, i_item_id#25] +Keys [1]: [i_item_id#25] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum#26] +Results [2]: [i_item_id#25, sum#27] + +(42) Exchange +Input [2]: [i_item_id#25, sum#27] +Arguments: hashpartitioning(i_item_id#25, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(43) HashAggregate [codegen id : 4] +Input [2]: [i_item_id#25, sum#27] +Keys [1]: [i_item_id#25] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#20))#28] +Results [2]: [i_item_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#20))#28,17,2) AS total_sales#29] + +(44) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_id#30, sum#31] + +(45) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#30, sum#31] +Keys [1]: [i_item_id#30] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#32))#33] +Results [2]: [i_item_id#30, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#32))#33,17,2) AS total_sales#34] + +(46) Union + +(47) HashAggregate [codegen id : 7] +Input [2]: [i_item_id#11, total_sales#17] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(total_sales#17)] +Aggregate Attributes [2]: [sum#35, isEmpty#36] +Results [3]: [i_item_id#11, sum#37, isEmpty#38] + +(48) Exchange +Input [3]: [i_item_id#11, sum#37, isEmpty#38] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(49) HashAggregate [codegen id : 8] +Input [3]: [i_item_id#11, sum#37, isEmpty#38] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(total_sales#17)] +Aggregate Attributes [1]: [sum(total_sales#17)#39] +Results [2]: [i_item_id#11, sum(total_sales#17)#39 AS total_sales#40] + +(50) TakeOrderedAndProject +Input [2]: [i_item_id#11, total_sales#40] +Arguments: 100, [i_item_id#11 ASC NULLS FIRST, total_sales#40 ASC NULLS FIRST], [i_item_id#11, total_sales#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q60.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q60.native_datafusion/simplified.txt new file mode 100644 index 0000000000..00efd9736a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q60.native_datafusion/simplified.txt @@ -0,0 +1,64 @@ +TakeOrderedAndProject [i_item_id,total_sales] + WholeStageCodegen (8) + HashAggregate [i_item_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (7) + HashAggregate [i_item_id,total_sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (2) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen (1) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ca_address_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [ca_address_sk] #4 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_id] + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [i_item_id] #6 + CometProject [i_item_id] + CometFilter [i_item_id,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_id,i_category] + WholeStageCodegen (4) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (3) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [cs_ext_sales_price,i_item_id] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,ca_address_sk] + CometProject [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + CometBroadcastHashJoin [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + ReusedExchange [ca_address_sk] #4 + ReusedExchange [i_item_sk,i_item_id] #5 + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + ReusedExchange [i_item_id,sum] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q61.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q61.native_datafusion/explain.txt new file mode 100644 index 0000000000..396c7ee90d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q61.native_datafusion/explain.txt @@ -0,0 +1,328 @@ +== Physical Plan == +* Project (62) ++- * BroadcastNestedLoopJoin Inner BuildRight (61) + :- * HashAggregate (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- * ColumnarToRow (35) + : +- CometProject (34) + : +- CometBroadcastHashJoin (33) + : :- CometProject (28) + : : +- CometBroadcastHashJoin (27) + : : :- CometProject (25) + : : : +- CometBroadcastHashJoin (24) + : : : :- CometProject (20) + : : : : +- CometBroadcastHashJoin (19) + : : : : :- CometProject (14) + : : : : : +- CometBroadcastHashJoin (13) + : : : : : :- CometProject (8) + : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometBroadcastExchange (6) + : : : : : : +- CometProject (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (3) + : : : : : +- CometBroadcastExchange (12) + : : : : : +- CometProject (11) + : : : : : +- CometFilter (10) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (9) + : : : : +- CometBroadcastExchange (18) + : : : : +- CometProject (17) + : : : : +- CometFilter (16) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (15) + : : : +- CometBroadcastExchange (23) + : : : +- CometFilter (22) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (21) + : : +- ReusedExchange (26) + : +- CometBroadcastExchange (32) + : +- CometProject (31) + : +- CometFilter (30) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (29) + +- BroadcastExchange (60) + +- * HashAggregate (59) + +- Exchange (58) + +- * HashAggregate (57) + +- * ColumnarToRow (56) + +- CometProject (55) + +- CometBroadcastHashJoin (54) + :- CometProject (52) + : +- CometBroadcastHashJoin (51) + : :- CometProject (49) + : : +- CometBroadcastHashJoin (48) + : : :- CometProject (46) + : : : +- CometBroadcastHashJoin (45) + : : : :- CometProject (43) + : : : : +- CometBroadcastHashJoin (42) + : : : : :- CometFilter (40) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (39) + : : : : +- ReusedExchange (41) + : : : +- ReusedExchange (44) + : : +- ReusedExchange (47) + : +- ReusedExchange (50) + +- ReusedExchange (53) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_store_sk#3) AND isnotnull(ss_promo_sk#4)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#7, s_gmt_offset#8] +Arguments: [s_store_sk#7, s_gmt_offset#8] + +(4) CometFilter +Input [2]: [s_store_sk#7, s_gmt_offset#8] +Condition : ((isnotnull(s_gmt_offset#8) AND (s_gmt_offset#8 = -5.00)) AND isnotnull(s_store_sk#7)) + +(5) CometProject +Input [2]: [s_store_sk#7, s_gmt_offset#8] +Arguments: [s_store_sk#7], [s_store_sk#7] + +(6) CometBroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(7) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [s_store_sk#7] +Arguments: [ss_store_sk#3], [s_store_sk#7], Inner, BuildRight + +(8) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, s_store_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6], [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(9) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Arguments: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] + +(10) CometFilter +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Condition : ((((p_channel_dmail#10 = Y) OR (p_channel_email#11 = Y)) OR (p_channel_tv#12 = Y)) AND isnotnull(p_promo_sk#9)) + +(11) CometProject +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Arguments: [p_promo_sk#9], [p_promo_sk#9] + +(12) CometBroadcastExchange +Input [1]: [p_promo_sk#9] +Arguments: [p_promo_sk#9] + +(13) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [p_promo_sk#9] +Arguments: [ss_promo_sk#4], [p_promo_sk#9], Inner, BuildRight + +(14) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, p_promo_sk#9] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6], [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(15) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#13, d_year#14, d_moy#15] +Arguments: [d_date_sk#13, d_year#14, d_moy#15] + +(16) CometFilter +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Condition : ((((isnotnull(d_year#14) AND isnotnull(d_moy#15)) AND (d_year#14 = 1998)) AND (d_moy#15 = 11)) AND isnotnull(d_date_sk#13)) + +(17) CometProject +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(18) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(19) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#6], [d_date_sk#13], Inner, BuildRight + +(20) CometProject +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5], [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5] + +(21) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: [c_customer_sk#16, c_current_addr_sk#17] + +(22) CometFilter +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Condition : (isnotnull(c_customer_sk#16) AND isnotnull(c_current_addr_sk#17)) + +(23) CometBroadcastExchange +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: [c_customer_sk#16, c_current_addr_sk#17] + +(24) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5] +Right output [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: [ss_customer_sk#2], [c_customer_sk#16], Inner, BuildRight + +(25) CometProject +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, c_customer_sk#16, c_current_addr_sk#17] +Arguments: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17], [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17] + +(26) ReusedExchange [Reuses operator id: 6] +Output [1]: [ca_address_sk#18] + +(27) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17] +Right output [1]: [ca_address_sk#18] +Arguments: [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight + +(28) CometProject +Input [4]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17, ca_address_sk#18] +Arguments: [ss_item_sk#1, ss_ext_sales_price#5], [ss_item_sk#1, ss_ext_sales_price#5] + +(29) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#19, i_category#20] +Arguments: [i_item_sk#19, i_category#20] + +(30) CometFilter +Input [2]: [i_item_sk#19, i_category#20] +Condition : ((isnotnull(i_category#20) AND (i_category#20 = Jewelry )) AND isnotnull(i_item_sk#19)) + +(31) CometProject +Input [2]: [i_item_sk#19, i_category#20] +Arguments: [i_item_sk#19], [i_item_sk#19] + +(32) CometBroadcastExchange +Input [1]: [i_item_sk#19] +Arguments: [i_item_sk#19] + +(33) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#1, ss_ext_sales_price#5] +Right output [1]: [i_item_sk#19] +Arguments: [ss_item_sk#1], [i_item_sk#19], Inner, BuildRight + +(34) CometProject +Input [3]: [ss_item_sk#1, ss_ext_sales_price#5, i_item_sk#19] +Arguments: [ss_ext_sales_price#5], [ss_ext_sales_price#5] + +(35) ColumnarToRow [codegen id : 1] +Input [1]: [ss_ext_sales_price#5] + +(36) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_sales_price#5] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#21] +Results [1]: [sum#22] + +(37) Exchange +Input [1]: [sum#22] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=1] + +(38) HashAggregate [codegen id : 4] +Input [1]: [sum#22] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#23] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#23,17,2) AS promotions#24] + +(39) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29] +Arguments: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29] + +(40) CometFilter +Input [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29] +Condition : ((isnotnull(ss_store_sk#27) AND isnotnull(ss_customer_sk#26)) AND isnotnull(ss_item_sk#25)) + +(41) ReusedExchange [Reuses operator id: 6] +Output [1]: [s_store_sk#30] + +(42) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29] +Right output [1]: [s_store_sk#30] +Arguments: [ss_store_sk#27], [s_store_sk#30], Inner, BuildRight + +(43) CometProject +Input [6]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29, s_store_sk#30] +Arguments: [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28, ss_sold_date_sk#29], [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28, ss_sold_date_sk#29] + +(44) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#31] + +(45) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28, ss_sold_date_sk#29] +Right output [1]: [d_date_sk#31] +Arguments: [ss_sold_date_sk#29], [d_date_sk#31], Inner, BuildRight + +(46) CometProject +Input [5]: [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28, ss_sold_date_sk#29, d_date_sk#31] +Arguments: [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28], [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28] + +(47) ReusedExchange [Reuses operator id: 23] +Output [2]: [c_customer_sk#32, c_current_addr_sk#33] + +(48) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28] +Right output [2]: [c_customer_sk#32, c_current_addr_sk#33] +Arguments: [ss_customer_sk#26], [c_customer_sk#32], Inner, BuildRight + +(49) CometProject +Input [5]: [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28, c_customer_sk#32, c_current_addr_sk#33] +Arguments: [ss_item_sk#25, ss_ext_sales_price#28, c_current_addr_sk#33], [ss_item_sk#25, ss_ext_sales_price#28, c_current_addr_sk#33] + +(50) ReusedExchange [Reuses operator id: 6] +Output [1]: [ca_address_sk#34] + +(51) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#25, ss_ext_sales_price#28, c_current_addr_sk#33] +Right output [1]: [ca_address_sk#34] +Arguments: [c_current_addr_sk#33], [ca_address_sk#34], Inner, BuildRight + +(52) CometProject +Input [4]: [ss_item_sk#25, ss_ext_sales_price#28, c_current_addr_sk#33, ca_address_sk#34] +Arguments: [ss_item_sk#25, ss_ext_sales_price#28], [ss_item_sk#25, ss_ext_sales_price#28] + +(53) ReusedExchange [Reuses operator id: 32] +Output [1]: [i_item_sk#35] + +(54) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#25, ss_ext_sales_price#28] +Right output [1]: [i_item_sk#35] +Arguments: [ss_item_sk#25], [i_item_sk#35], Inner, BuildRight + +(55) CometProject +Input [3]: [ss_item_sk#25, ss_ext_sales_price#28, i_item_sk#35] +Arguments: [ss_ext_sales_price#28], [ss_ext_sales_price#28] + +(56) ColumnarToRow [codegen id : 2] +Input [1]: [ss_ext_sales_price#28] + +(57) HashAggregate [codegen id : 2] +Input [1]: [ss_ext_sales_price#28] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#28))] +Aggregate Attributes [1]: [sum#36] +Results [1]: [sum#37] + +(58) Exchange +Input [1]: [sum#37] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(59) HashAggregate [codegen id : 3] +Input [1]: [sum#37] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#28))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#28))#38] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#28))#38,17,2) AS total#39] + +(60) BroadcastExchange +Input [1]: [total#39] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(61) BroadcastNestedLoopJoin [codegen id : 4] +Join type: Inner +Join condition: None + +(62) Project [codegen id : 4] +Output [3]: [promotions#24, total#39, ((cast(promotions#24 as decimal(15,4)) / cast(total#39 as decimal(15,4))) * 100) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40] +Input [2]: [promotions#24, total#39] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q61.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q61.native_datafusion/simplified.txt new file mode 100644 index 0000000000..158c3657ff --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q61.native_datafusion/simplified.txt @@ -0,0 +1,71 @@ +WholeStageCodegen (4) + Project [promotions,total] + BroadcastNestedLoopJoin + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,c_current_addr_sk,ca_address_sk] + CometProject [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,c_customer_sk,c_current_addr_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk,p_promo_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk,s_store_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk] #2 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_gmt_offset] + CometBroadcastExchange [p_promo_sk] #3 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #5 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + ReusedExchange [ca_address_sk] #2 + CometBroadcastExchange [i_item_sk] #6 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_category] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (2) + HashAggregate [ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,i_item_sk] + CometProject [ss_item_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,c_current_addr_sk,ca_address_sk] + CometProject [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,c_customer_sk,c_current_addr_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,s_store_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + ReusedExchange [s_store_sk] #2 + ReusedExchange [d_date_sk] #4 + ReusedExchange [c_customer_sk,c_current_addr_sk] #5 + ReusedExchange [ca_address_sk] #2 + ReusedExchange [i_item_sk] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q62.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q62.native_datafusion/explain.txt new file mode 100644 index 0000000000..a2042e5626 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q62.native_datafusion/explain.txt @@ -0,0 +1,130 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * ColumnarToRow (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (10) + : : +- CometBroadcastHashJoin (9) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (3) + : : +- ReusedExchange (8) + : +- ReusedExchange (11) + +- CometBroadcastExchange (17) + +- CometProject (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Arguments: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_ship_mode_sk#3)) AND isnotnull(ws_web_site_sk#2)) AND isnotnull(ws_ship_date_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(4) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(5) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [ws_warehouse_sk#4], [w_warehouse_sk#6], Inner, BuildRight + +(7) CometProject +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7], [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7] + +(8) ReusedExchange [Reuses operator id: 5] +Output [2]: [sm_ship_mode_sk#8, sm_type#9] + +(9) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7] +Right output [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: [ws_ship_mode_sk#3], [sm_ship_mode_sk#8], Inner, BuildRight + +(10) CometProject +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] +Arguments: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9], [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9] + +(11) ReusedExchange [Reuses operator id: 5] +Output [2]: [web_site_sk#10, web_name#11] + +(12) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Right output [2]: [web_site_sk#10, web_name#11] +Arguments: [ws_web_site_sk#2], [web_site_sk#10], Inner, BuildRight + +(13) CometProject +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_site_sk#10, web_name#11] +Arguments: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11], [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11] + +(14) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#12, d_month_seq#13] +Arguments: [d_date_sk#12, d_month_seq#13] + +(15) CometFilter +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) + +(16) CometProject +Input [2]: [d_date_sk#12, d_month_seq#13] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(17) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(18) CometBroadcastHashJoin +Left output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11] +Right output [1]: [d_date_sk#12] +Arguments: [ws_ship_date_sk#1], [d_date_sk#12], Inner, BuildRight + +(19) CometProject +Input [6]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11, d_date_sk#12] +Arguments: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, _groupingexpression#14], [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] + +(20) ColumnarToRow [codegen id : 1] +Input [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, _groupingexpression#14] + +(21) HashAggregate [codegen id : 1] +Input [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] +Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] + +(22) Exchange +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, web_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(23) HashAggregate [codegen id : 2] +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] +Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29] +Results [8]: [_groupingexpression#14 AS substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25 AS 30 days #31, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26 AS 31 - 60 days #32, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27 AS 61 - 90 days #33, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28 AS 91 - 120 days #34, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29 AS >120 days #35] + +(24) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#30 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, web_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q62.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q62.native_datafusion/simplified.txt new file mode 100644 index 0000000000..0f5b318b16 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q62.native_datafusion/simplified.txt @@ -0,0 +1,28 @@ +TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (2) + HashAggregate [_groupingexpression,sm_type,web_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + InputAdapter + Exchange [_groupingexpression,sm_type,web_name] #1 + WholeStageCodegen (1) + HashAggregate [_groupingexpression,sm_type,web_name,ws_ship_date_sk,ws_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [w_warehouse_name] [ws_ship_date_sk,ws_sold_date_sk,sm_type,web_name,_groupingexpression] + CometBroadcastHashJoin [ws_ship_date_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_name,d_date_sk] + CometProject [ws_ship_date_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_name] + CometBroadcastHashJoin [ws_ship_date_sk,ws_web_site_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_site_sk,web_name] + CometProject [ws_ship_date_sk,ws_web_site_sk,ws_sold_date_sk,w_warehouse_name,sm_type] + CometBroadcastHashJoin [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,w_warehouse_name,sm_ship_mode_sk,sm_type] + CometProject [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,w_warehouse_name] + CometBroadcastHashJoin [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk,w_warehouse_sk,w_warehouse_name] + CometFilter [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #2 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + ReusedExchange [sm_ship_mode_sk,sm_type] #2 + ReusedExchange [web_site_sk,web_name] #2 + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q63.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q63.native_datafusion/explain.txt new file mode 100644 index 0000000000..4ee8154ba1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q63.native_datafusion/explain.txt @@ -0,0 +1,156 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * Project (28) + +- * Filter (27) + +- Window (26) + +- * Sort (25) + +- Exchange (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * ColumnarToRow (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (14) + : +- CometBroadcastHashJoin (13) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : +- CometBroadcastExchange (6) + : : +- CometFilter (5) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + : +- CometBroadcastExchange (12) + : +- CometProject (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + +- CometBroadcastExchange (17) + +- CometFilter (16) + +- CometNativeScan: `spark_catalog`.`default`.`store` (15) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Arguments: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(2) CometFilter +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Condition : ((((i_category#4 IN (Books ,Children ,Electronics ) AND i_class#3 IN (personal ,portable ,refernece ,self-help )) AND i_brand#2 IN (scholaramalgamalg #6 ,scholaramalgamalg #7 ,exportiunivamalg #8 ,scholaramalgamalg #8 )) OR ((i_category#4 IN (Women ,Music ,Men ) AND i_class#3 IN (accessories ,classical ,fragrances ,pants )) AND i_brand#2 IN (amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ))) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Arguments: [i_item_sk#1, i_manager_id#5], [i_item_sk#1, i_manager_id#5] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(5) CometFilter +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) + +(6) CometBroadcastExchange +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(7) CometBroadcastHashJoin +Left output [2]: [i_item_sk#1, i_manager_id#5] +Right output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_item_sk#1], [ss_item_sk#10], Inner, BuildRight + +(8) CometProject +Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13], [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Arguments: [d_date_sk#14, d_month_seq#15, d_moy#16] + +(10) CometFilter +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Condition : (d_month_seq#15 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#14)) + +(11) CometProject +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Arguments: [d_date_sk#14, d_moy#16], [d_date_sk#14, d_moy#16] + +(12) CometBroadcastExchange +Input [2]: [d_date_sk#14, d_moy#16] +Arguments: [d_date_sk#14, d_moy#16] + +(13) CometBroadcastHashJoin +Left output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Right output [2]: [d_date_sk#14, d_moy#16] +Arguments: [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + +(14) CometProject +Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_moy#16] +Arguments: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16], [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16] + +(15) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#17] +Arguments: [s_store_sk#17] + +(16) CometFilter +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: [s_store_sk#17] + +(18) CometBroadcastHashJoin +Left output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16] +Right output [1]: [s_store_sk#17] +Arguments: [ss_store_sk#11], [s_store_sk#17], Inner, BuildRight + +(19) CometProject +Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16, s_store_sk#17] +Arguments: [i_manager_id#5, ss_sales_price#12, d_moy#16], [i_manager_id#5, ss_sales_price#12, d_moy#16] + +(20) ColumnarToRow [codegen id : 1] +Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] + +(21) HashAggregate [codegen id : 1] +Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Keys [2]: [i_manager_id#5, d_moy#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manager_id#5, d_moy#16, sum#19] + +(22) Exchange +Input [3]: [i_manager_id#5, d_moy#16, sum#19] +Arguments: hashpartitioning(i_manager_id#5, d_moy#16, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(23) HashAggregate [codegen id : 2] +Input [3]: [i_manager_id#5, d_moy#16, sum#19] +Keys [2]: [i_manager_id#5, d_moy#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] + +(24) Exchange +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(25) Sort [codegen id : 3] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 + +(26) Window +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_manager_id#5] + +(27) Filter [codegen id : 4] +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] +Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_monthly_sales#23)) / avg_monthly_sales#23) > 0.1000000000000000) ELSE false END + +(28) Project [codegen id : 4] +Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] + +(29) TakeOrderedAndProject +Input [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST], [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q63.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q63.native_datafusion/simplified.txt new file mode 100644 index 0000000000..eca9e8c6ee --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q63.native_datafusion/simplified.txt @@ -0,0 +1,37 @@ +TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] + WholeStageCodegen (4) + Project [i_manager_id,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen (3) + Sort [i_manager_id] + InputAdapter + Exchange [i_manager_id] #1 + WholeStageCodegen (2) + HashAggregate [i_manager_id,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_manager_id,d_moy] #2 + WholeStageCodegen (1) + HashAggregate [i_manager_id,d_moy,ss_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [i_manager_id,ss_sales_price,d_moy] + CometBroadcastHashJoin [i_manager_id,ss_store_sk,ss_sales_price,d_moy,s_store_sk] + CometProject [i_manager_id,ss_store_sk,ss_sales_price,d_moy] + CometBroadcastHashJoin [i_manager_id,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_moy] + CometProject [i_manager_id,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_manager_id,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometProject [i_item_sk,i_manager_id] + CometFilter [i_item_sk,i_brand,i_class,i_category,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_manager_id] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_moy] #4 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_month_seq,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq,d_moy] + CometBroadcastExchange [s_store_sk] #5 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q64.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q64.native_datafusion/explain.txt new file mode 100644 index 0000000000..b9f587226f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q64.native_datafusion/explain.txt @@ -0,0 +1,936 @@ +== Physical Plan == +* ColumnarToRow (176) ++- CometSort (175) + +- CometColumnarExchange (174) + +- * Project (173) + +- * SortMergeJoin Inner (172) + :- * Sort (109) + : +- Exchange (108) + : +- * HashAggregate (107) + : +- * HashAggregate (106) + : +- * Project (105) + : +- * BroadcastHashJoin Inner BuildRight (104) + : :- * Project (98) + : : +- * BroadcastHashJoin Inner BuildRight (97) + : : :- * Project (95) + : : : +- * BroadcastHashJoin Inner BuildRight (94) + : : : :- * Project (92) + : : : : +- * BroadcastHashJoin Inner BuildRight (91) + : : : : :- * Project (89) + : : : : : +- * BroadcastHashJoin Inner BuildRight (88) + : : : : : :- * Project (83) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (82) + : : : : : : :- * Project (80) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (79) + : : : : : : : :- * Project (74) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (73) + : : : : : : : : :- * Project (68) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (67) + : : : : : : : : : :- * Project (65) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (64) + : : : : : : : : : : :- * Project (59) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : : : : : : : : :- * Project (56) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (55) + : : : : : : : : : : : : :- * Project (50) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (49) + : : : : : : : : : : : : : :- * Project (44) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (43) + : : : : : : : : : : : : : : :- * Project (38) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : : : : : : : : : : : : :- * Project (32) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (31) + : : : : : : : : : : : : : : : : :- * ColumnarToRow (11) + : : : : : : : : : : : : : : : : : +- CometSort (10) + : : : : : : : : : : : : : : : : : +- CometExchange (9) + : : : : : : : : : : : : : : : : : +- CometProject (8) + : : : : : : : : : : : : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : : : : : : : : : : : : :- CometBroadcastExchange (3) + : : : : : : : : : : : : : : : : : : +- CometFilter (2) + : : : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : : : : : : : : : : : : +- CometProject (6) + : : : : : : : : : : : : : : : : : +- CometFilter (5) + : : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (4) + : : : : : : : : : : : : : : : : +- * Sort (30) + : : : : : : : : : : : : : : : : +- * Project (29) + : : : : : : : : : : : : : : : : +- * Filter (28) + : : : : : : : : : : : : : : : : +- * HashAggregate (27) + : : : : : : : : : : : : : : : : +- Exchange (26) + : : : : : : : : : : : : : : : : +- * HashAggregate (25) + : : : : : : : : : : : : : : : : +- * ColumnarToRow (24) + : : : : : : : : : : : : : : : : +- CometProject (23) + : : : : : : : : : : : : : : : : +- CometSortMergeJoin (22) + : : : : : : : : : : : : : : : : :- CometSort (16) + : : : : : : : : : : : : : : : : : +- CometExchange (15) + : : : : : : : : : : : : : : : : : +- CometProject (14) + : : : : : : : : : : : : : : : : : +- CometFilter (13) + : : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (12) + : : : : : : : : : : : : : : : : +- CometSort (21) + : : : : : : : : : : : : : : : : +- CometExchange (20) + : : : : : : : : : : : : : : : : +- CometProject (19) + : : : : : : : : : : : : : : : : +- CometFilter (18) + : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (17) + : : : : : : : : : : : : : : : +- BroadcastExchange (36) + : : : : : : : : : : : : : : : +- * ColumnarToRow (35) + : : : : : : : : : : : : : : : +- CometFilter (34) + : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (33) + : : : : : : : : : : : : : : +- BroadcastExchange (42) + : : : : : : : : : : : : : : +- * ColumnarToRow (41) + : : : : : : : : : : : : : : +- CometFilter (40) + : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (39) + : : : : : : : : : : : : : +- BroadcastExchange (48) + : : : : : : : : : : : : : +- * ColumnarToRow (47) + : : : : : : : : : : : : : +- CometFilter (46) + : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (45) + : : : : : : : : : : : : +- BroadcastExchange (54) + : : : : : : : : : : : : +- * ColumnarToRow (53) + : : : : : : : : : : : : +- CometFilter (52) + : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (51) + : : : : : : : : : : : +- ReusedExchange (57) + : : : : : : : : : : +- BroadcastExchange (63) + : : : : : : : : : : +- * ColumnarToRow (62) + : : : : : : : : : : +- CometFilter (61) + : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (60) + : : : : : : : : : +- ReusedExchange (66) + : : : : : : : : +- BroadcastExchange (72) + : : : : : : : : +- * ColumnarToRow (71) + : : : : : : : : +- CometFilter (70) + : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (69) + : : : : : : : +- BroadcastExchange (78) + : : : : : : : +- * ColumnarToRow (77) + : : : : : : : +- CometFilter (76) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (75) + : : : : : : +- ReusedExchange (81) + : : : : : +- BroadcastExchange (87) + : : : : : +- * ColumnarToRow (86) + : : : : : +- CometFilter (85) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (84) + : : : : +- ReusedExchange (90) + : : : +- ReusedExchange (93) + : : +- ReusedExchange (96) + : +- BroadcastExchange (103) + : +- * ColumnarToRow (102) + : +- CometProject (101) + : +- CometFilter (100) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (99) + +- * Sort (171) + +- Exchange (170) + +- * HashAggregate (169) + +- * HashAggregate (168) + +- * Project (167) + +- * BroadcastHashJoin Inner BuildRight (166) + :- * Project (164) + : +- * BroadcastHashJoin Inner BuildRight (163) + : :- * Project (161) + : : +- * BroadcastHashJoin Inner BuildRight (160) + : : :- * Project (158) + : : : +- * BroadcastHashJoin Inner BuildRight (157) + : : : :- * Project (155) + : : : : +- * BroadcastHashJoin Inner BuildRight (154) + : : : : :- * Project (152) + : : : : : +- * BroadcastHashJoin Inner BuildRight (151) + : : : : : :- * Project (149) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (148) + : : : : : : :- * Project (146) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (145) + : : : : : : : :- * Project (143) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (142) + : : : : : : : : :- * Project (140) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (139) + : : : : : : : : : :- * Project (137) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (136) + : : : : : : : : : : :- * Project (134) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (133) + : : : : : : : : : : : :- * Project (131) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (130) + : : : : : : : : : : : : :- * Project (128) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (127) + : : : : : : : : : : : : : :- * Project (125) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (124) + : : : : : : : : : : : : : : :- * Project (119) + : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (118) + : : : : : : : : : : : : : : : :- * ColumnarToRow (112) + : : : : : : : : : : : : : : : : +- CometSort (111) + : : : : : : : : : : : : : : : : +- ReusedExchange (110) + : : : : : : : : : : : : : : : +- * Sort (117) + : : : : : : : : : : : : : : : +- * Project (116) + : : : : : : : : : : : : : : : +- * Filter (115) + : : : : : : : : : : : : : : : +- * HashAggregate (114) + : : : : : : : : : : : : : : : +- ReusedExchange (113) + : : : : : : : : : : : : : : +- BroadcastExchange (123) + : : : : : : : : : : : : : : +- * ColumnarToRow (122) + : : : : : : : : : : : : : : +- CometFilter (121) + : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (120) + : : : : : : : : : : : : : +- ReusedExchange (126) + : : : : : : : : : : : : +- ReusedExchange (129) + : : : : : : : : : : : +- ReusedExchange (132) + : : : : : : : : : : +- ReusedExchange (135) + : : : : : : : : : +- ReusedExchange (138) + : : : : : : : : +- ReusedExchange (141) + : : : : : : : +- ReusedExchange (144) + : : : : : : +- ReusedExchange (147) + : : : : : +- ReusedExchange (150) + : : : : +- ReusedExchange (153) + : : : +- ReusedExchange (156) + : : +- ReusedExchange (159) + : +- ReusedExchange (162) + +- ReusedExchange (165) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(2) CometFilter +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) + +(3) CometBroadcastExchange +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Arguments: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(5) CometFilter +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(6) CometProject +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Arguments: [sr_item_sk#13, sr_ticket_number#14], [sr_item_sk#13, sr_ticket_number#14] + +(7) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [2]: [sr_item_sk#13, sr_ticket_number#14] +Arguments: [ss_item_sk#1, ss_ticket_number#8], [sr_item_sk#13, sr_ticket_number#14], Inner, BuildLeft + +(8) CometProject +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#13, sr_ticket_number#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(9) CometExchange +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometSort +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1 ASC NULLS FIRST] + +(11) ColumnarToRow [codegen id : 1] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(12) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(13) CometFilter +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(14) CometProject +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18], [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] + +(15) CometExchange +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: hashpartitioning(cs_item_sk#16, cs_order_number#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(16) CometSort +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18], [cs_item_sk#16 ASC NULLS FIRST, cs_order_number#17 ASC NULLS FIRST] + +(17) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(18) CometFilter +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Condition : (isnotnull(cr_item_sk#20) AND isnotnull(cr_order_number#21)) + +(19) CometProject +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(20) CometExchange +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: hashpartitioning(cr_item_sk#20, cr_order_number#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(21) CometSort +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cr_item_sk#20 ASC NULLS FIRST, cr_order_number#21 ASC NULLS FIRST] + +(22) CometSortMergeJoin +Left output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Right output [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cs_item_sk#16, cs_order_number#17], [cr_item_sk#20, cr_order_number#21], Inner + +(23) CometProject +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(24) ColumnarToRow [codegen id : 2] +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(25) HashAggregate [codegen id : 2] +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] +Aggregate Attributes [3]: [sum#26, sum#27, isEmpty#28] +Results [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] + +(26) Exchange +Input [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] +Arguments: hashpartitioning(cs_item_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 3] +Input [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#18))#32, sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))#33] +Results [3]: [cs_item_sk#16, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#18))#32,17,2) AS sale#34, sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))#33 AS refund#35] + +(28) Filter [codegen id : 3] +Input [3]: [cs_item_sk#16, sale#34, refund#35] +Condition : ((isnotnull(sale#34) AND isnotnull(refund#35)) AND (cast(sale#34 as decimal(21,2)) > (2 * refund#35))) + +(29) Project [codegen id : 3] +Output [1]: [cs_item_sk#16] +Input [3]: [cs_item_sk#16, sale#34, refund#35] + +(30) Sort [codegen id : 3] +Input [1]: [cs_item_sk#16] +Arguments: [cs_item_sk#16 ASC NULLS FIRST], false, 0 + +(31) SortMergeJoin [codegen id : 19] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [cs_item_sk#16] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 19] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#16] + +(33) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#36, d_year#37] +Arguments: [d_date_sk#36, d_year#37] + +(34) CometFilter +Input [2]: [d_date_sk#36, d_year#37] +Condition : ((isnotnull(d_year#37) AND (d_year#37 = 1999)) AND isnotnull(d_date_sk#36)) + +(35) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#36, d_year#37] + +(36) BroadcastExchange +Input [2]: [d_date_sk#36, d_year#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(37) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_sold_date_sk#12] +Right keys [1]: [d_date_sk#36] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 19] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#36, d_year#37] + +(39) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Arguments: [s_store_sk#38, s_store_name#39, s_zip#40] + +(40) CometFilter +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Condition : ((isnotnull(s_store_sk#38) AND isnotnull(s_store_name#39)) AND isnotnull(s_zip#40)) + +(41) ColumnarToRow [codegen id : 5] +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] + +(42) BroadcastExchange +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(43) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#38] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 19] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_sk#38, s_store_name#39, s_zip#40] + +(45) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Arguments: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(46) CometFilter +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Condition : (((((isnotnull(c_customer_sk#41) AND isnotnull(c_first_sales_date_sk#46)) AND isnotnull(c_first_shipto_date_sk#45)) AND isnotnull(c_current_cdemo_sk#42)) AND isnotnull(c_current_hdemo_sk#43)) AND isnotnull(c_current_addr_sk#44)) + +(47) ColumnarToRow [codegen id : 6] +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(48) BroadcastExchange +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(49) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#41] +Join type: Inner +Join condition: None + +(50) Project [codegen id : 19] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(51) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#47, d_year#48] +Arguments: [d_date_sk#47, d_year#48] + +(52) CometFilter +Input [2]: [d_date_sk#47, d_year#48] +Condition : isnotnull(d_date_sk#47) + +(53) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#47, d_year#48] + +(54) BroadcastExchange +Input [2]: [d_date_sk#47, d_year#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(55) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [c_first_sales_date_sk#46] +Right keys [1]: [d_date_sk#47] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 19] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, d_year#48] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46, d_date_sk#47, d_year#48] + +(57) ReusedExchange [Reuses operator id: 54] +Output [2]: [d_date_sk#49, d_year#50] + +(58) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [c_first_shipto_date_sk#45] +Right keys [1]: [d_date_sk#49] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 19] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, d_year#48, d_date_sk#49, d_year#50] + +(60) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [2]: [cd_demo_sk#51, cd_marital_status#52] +Arguments: [cd_demo_sk#51, cd_marital_status#52] + +(61) CometFilter +Input [2]: [cd_demo_sk#51, cd_marital_status#52] +Condition : (isnotnull(cd_demo_sk#51) AND isnotnull(cd_marital_status#52)) + +(62) ColumnarToRow [codegen id : 9] +Input [2]: [cd_demo_sk#51, cd_marital_status#52] + +(63) BroadcastExchange +Input [2]: [cd_demo_sk#51, cd_marital_status#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(64) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#51] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 19] +Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_marital_status#52] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_demo_sk#51, cd_marital_status#52] + +(66) ReusedExchange [Reuses operator id: 63] +Output [2]: [cd_demo_sk#53, cd_marital_status#54] + +(67) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [c_current_cdemo_sk#42] +Right keys [1]: [cd_demo_sk#53] +Join type: Inner +Join condition: NOT (cd_marital_status#52 = cd_marital_status#54) + +(68) Project [codegen id : 19] +Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_marital_status#52, cd_demo_sk#53, cd_marital_status#54] + +(69) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [1]: [p_promo_sk#55] +Arguments: [p_promo_sk#55] + +(70) CometFilter +Input [1]: [p_promo_sk#55] +Condition : isnotnull(p_promo_sk#55) + +(71) ColumnarToRow [codegen id : 11] +Input [1]: [p_promo_sk#55] + +(72) BroadcastExchange +Input [1]: [p_promo_sk#55] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(73) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_promo_sk#7] +Right keys [1]: [p_promo_sk#55] +Join type: Inner +Join condition: None + +(74) Project [codegen id : 19] +Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, p_promo_sk#55] + +(75) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Arguments: [hd_demo_sk#56, hd_income_band_sk#57] + +(76) CometFilter +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Condition : (isnotnull(hd_demo_sk#56) AND isnotnull(hd_income_band_sk#57)) + +(77) ColumnarToRow [codegen id : 12] +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] + +(78) BroadcastExchange +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=11] + +(79) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_hdemo_sk#4] +Right keys [1]: [hd_demo_sk#56] +Join type: Inner +Join condition: None + +(80) Project [codegen id : 19] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_demo_sk#56, hd_income_band_sk#57] + +(81) ReusedExchange [Reuses operator id: 78] +Output [2]: [hd_demo_sk#58, hd_income_band_sk#59] + +(82) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [c_current_hdemo_sk#43] +Right keys [1]: [hd_demo_sk#58] +Join type: Inner +Join condition: None + +(83) Project [codegen id : 19] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59] +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_demo_sk#58, hd_income_band_sk#59] + +(84) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(85) CometFilter +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Condition : isnotnull(ca_address_sk#60) + +(86) ColumnarToRow [codegen id : 14] +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(87) BroadcastExchange +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(88) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_addr_sk#5] +Right keys [1]: [ca_address_sk#60] +Join type: Inner +Join condition: None + +(89) Project [codegen id : 19] +Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(90) ReusedExchange [Reuses operator id: 87] +Output [5]: [ca_address_sk#65, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] + +(91) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [c_current_addr_sk#44] +Right keys [1]: [ca_address_sk#65] +Join type: Inner +Join condition: None + +(92) Project [codegen id : 19] +Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_address_sk#65, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] + +(93) ReusedExchange [Reuses operator id: 72] +Output [1]: [ib_income_band_sk#70] + +(94) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [hd_income_band_sk#57] +Right keys [1]: [ib_income_band_sk#70] +Join type: Inner +Join condition: None + +(95) Project [codegen id : 19] +Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, ib_income_band_sk#70] + +(96) ReusedExchange [Reuses operator id: 72] +Output [1]: [ib_income_band_sk#71] + +(97) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [hd_income_band_sk#59] +Right keys [1]: [ib_income_band_sk#71] +Join type: Inner +Join condition: None + +(98) Project [codegen id : 19] +Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, ib_income_band_sk#71] + +(99) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Arguments: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] + +(100) CometFilter +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Condition : ((((((isnotnull(i_current_price#73) AND i_color#74 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#73 >= 64.00)) AND (i_current_price#73 <= 74.00)) AND (i_current_price#73 >= 65.00)) AND (i_current_price#73 <= 79.00)) AND isnotnull(i_item_sk#72)) + +(101) CometProject +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Arguments: [i_item_sk#72, i_product_name#75], [i_item_sk#72, i_product_name#75] + +(102) ColumnarToRow [codegen id : 18] +Input [2]: [i_item_sk#72, i_product_name#75] + +(103) BroadcastExchange +Input [2]: [i_item_sk#72, i_product_name#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] + +(104) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#72] +Join type: Inner +Join condition: None + +(105) Project [codegen id : 19] +Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, d_year#48, d_year#50, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] + +(106) HashAggregate [codegen id : 19] +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, d_year#48, d_year#50, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] +Keys [15]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] +Aggregate Attributes [4]: [count#76, sum#77, sum#78, sum#79] +Results [19]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50, count#80, sum#81, sum#82, sum#83] + +(107) HashAggregate [codegen id : 19] +Input [19]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50, count#80, sum#81, sum#82, sum#83] +Keys [15]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] +Aggregate Attributes [4]: [count(1)#84, sum(UnscaledValue(ss_wholesale_cost#9))#85, sum(UnscaledValue(ss_list_price#10))#86, sum(UnscaledValue(ss_coupon_amt#11))#87] +Results [17]: [i_product_name#75 AS product_name#88, i_item_sk#72 AS item_sk#89, s_store_name#39 AS store_name#90, s_zip#40 AS store_zip#91, ca_street_number#61 AS b_street_number#92, ca_street_name#62 AS b_streen_name#93, ca_city#63 AS b_city#94, ca_zip#64 AS b_zip#95, ca_street_number#66 AS c_street_number#96, ca_street_name#67 AS c_street_name#97, ca_city#68 AS c_city#98, ca_zip#69 AS c_zip#99, d_year#37 AS syear#100, count(1)#84 AS cnt#101, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#85,17,2) AS s1#102, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#86,17,2) AS s2#103, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#87,17,2) AS s3#104] + +(108) Exchange +Input [17]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104] +Arguments: hashpartitioning(item_sk#89, store_name#90, store_zip#91, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(109) Sort [codegen id : 20] +Input [17]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104] +Arguments: [item_sk#89 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, store_zip#91 ASC NULLS FIRST], false, 0 + +(110) ReusedExchange [Reuses operator id: 9] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] + +(111) CometSort +Input [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] +Arguments: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115], [ss_item_sk#105 ASC NULLS FIRST] + +(112) ColumnarToRow [codegen id : 21] +Input [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] + +(113) ReusedExchange [Reuses operator id: 26] +Output [4]: [cs_item_sk#116, sum#117, sum#118, isEmpty#119] + +(114) HashAggregate [codegen id : 23] +Input [4]: [cs_item_sk#116, sum#117, sum#118, isEmpty#119] +Keys [1]: [cs_item_sk#116] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#120)), sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#120))#32, sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))#33] +Results [3]: [cs_item_sk#116, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#120))#32,17,2) AS sale#124, sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))#33 AS refund#125] + +(115) Filter [codegen id : 23] +Input [3]: [cs_item_sk#116, sale#124, refund#125] +Condition : ((isnotnull(sale#124) AND isnotnull(refund#125)) AND (cast(sale#124 as decimal(21,2)) > (2 * refund#125))) + +(116) Project [codegen id : 23] +Output [1]: [cs_item_sk#116] +Input [3]: [cs_item_sk#116, sale#124, refund#125] + +(117) Sort [codegen id : 23] +Input [1]: [cs_item_sk#116] +Arguments: [cs_item_sk#116 ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin [codegen id : 39] +Left keys [1]: [ss_item_sk#105] +Right keys [1]: [cs_item_sk#116] +Join type: Inner +Join condition: None + +(119) Project [codegen id : 39] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] +Input [12]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115, cs_item_sk#116] + +(120) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#126, d_year#127] +Arguments: [d_date_sk#126, d_year#127] + +(121) CometFilter +Input [2]: [d_date_sk#126, d_year#127] +Condition : ((isnotnull(d_year#127) AND (d_year#127 = 2000)) AND isnotnull(d_date_sk#126)) + +(122) ColumnarToRow [codegen id : 24] +Input [2]: [d_date_sk#126, d_year#127] + +(123) BroadcastExchange +Input [2]: [d_date_sk#126, d_year#127] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=15] + +(124) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_sold_date_sk#115] +Right keys [1]: [d_date_sk#126] +Join type: Inner +Join condition: None + +(125) Project [codegen id : 39] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127] +Input [13]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115, d_date_sk#126, d_year#127] + +(126) ReusedExchange [Reuses operator id: 42] +Output [3]: [s_store_sk#128, s_store_name#129, s_zip#130] + +(127) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_store_sk#110] +Right keys [1]: [s_store_sk#128] +Join type: Inner +Join condition: None + +(128) Project [codegen id : 39] +Output [12]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130] +Input [14]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_sk#128, s_store_name#129, s_zip#130] + +(129) ReusedExchange [Reuses operator id: 48] +Output [6]: [c_customer_sk#131, c_current_cdemo_sk#132, c_current_hdemo_sk#133, c_current_addr_sk#134, c_first_shipto_date_sk#135, c_first_sales_date_sk#136] + +(130) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_customer_sk#106] +Right keys [1]: [c_customer_sk#131] +Join type: Inner +Join condition: None + +(131) Project [codegen id : 39] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_cdemo_sk#132, c_current_hdemo_sk#133, c_current_addr_sk#134, c_first_shipto_date_sk#135, c_first_sales_date_sk#136] +Input [18]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_customer_sk#131, c_current_cdemo_sk#132, c_current_hdemo_sk#133, c_current_addr_sk#134, c_first_shipto_date_sk#135, c_first_sales_date_sk#136] + +(132) ReusedExchange [Reuses operator id: 54] +Output [2]: [d_date_sk#137, d_year#138] + +(133) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_first_sales_date_sk#136] +Right keys [1]: [d_date_sk#137] +Join type: Inner +Join condition: None + +(134) Project [codegen id : 39] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_cdemo_sk#132, c_current_hdemo_sk#133, c_current_addr_sk#134, c_first_shipto_date_sk#135, d_year#138] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_cdemo_sk#132, c_current_hdemo_sk#133, c_current_addr_sk#134, c_first_shipto_date_sk#135, c_first_sales_date_sk#136, d_date_sk#137, d_year#138] + +(135) ReusedExchange [Reuses operator id: 54] +Output [2]: [d_date_sk#139, d_year#140] + +(136) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_first_shipto_date_sk#135] +Right keys [1]: [d_date_sk#139] +Join type: Inner +Join condition: None + +(137) Project [codegen id : 39] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_cdemo_sk#132, c_current_hdemo_sk#133, c_current_addr_sk#134, d_year#138, d_year#140] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_cdemo_sk#132, c_current_hdemo_sk#133, c_current_addr_sk#134, c_first_shipto_date_sk#135, d_year#138, d_date_sk#139, d_year#140] + +(138) ReusedExchange [Reuses operator id: 63] +Output [2]: [cd_demo_sk#141, cd_marital_status#142] + +(139) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_cdemo_sk#107] +Right keys [1]: [cd_demo_sk#141] +Join type: Inner +Join condition: None + +(140) Project [codegen id : 39] +Output [16]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_cdemo_sk#132, c_current_hdemo_sk#133, c_current_addr_sk#134, d_year#138, d_year#140, cd_marital_status#142] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_cdemo_sk#132, c_current_hdemo_sk#133, c_current_addr_sk#134, d_year#138, d_year#140, cd_demo_sk#141, cd_marital_status#142] + +(141) ReusedExchange [Reuses operator id: 63] +Output [2]: [cd_demo_sk#143, cd_marital_status#144] + +(142) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_current_cdemo_sk#132] +Right keys [1]: [cd_demo_sk#143] +Join type: Inner +Join condition: NOT (cd_marital_status#142 = cd_marital_status#144) + +(143) Project [codegen id : 39] +Output [14]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_hdemo_sk#133, c_current_addr_sk#134, d_year#138, d_year#140] +Input [18]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_cdemo_sk#132, c_current_hdemo_sk#133, c_current_addr_sk#134, d_year#138, d_year#140, cd_marital_status#142, cd_demo_sk#143, cd_marital_status#144] + +(144) ReusedExchange [Reuses operator id: 72] +Output [1]: [p_promo_sk#145] + +(145) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_promo_sk#111] +Right keys [1]: [p_promo_sk#145] +Join type: Inner +Join condition: None + +(146) Project [codegen id : 39] +Output [13]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_hdemo_sk#133, c_current_addr_sk#134, d_year#138, d_year#140] +Input [15]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_hdemo_sk#133, c_current_addr_sk#134, d_year#138, d_year#140, p_promo_sk#145] + +(147) ReusedExchange [Reuses operator id: 78] +Output [2]: [hd_demo_sk#146, hd_income_band_sk#147] + +(148) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_hdemo_sk#108] +Right keys [1]: [hd_demo_sk#146] +Join type: Inner +Join condition: None + +(149) Project [codegen id : 39] +Output [13]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_hdemo_sk#133, c_current_addr_sk#134, d_year#138, d_year#140, hd_income_band_sk#147] +Input [15]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_hdemo_sk#133, c_current_addr_sk#134, d_year#138, d_year#140, hd_demo_sk#146, hd_income_band_sk#147] + +(150) ReusedExchange [Reuses operator id: 78] +Output [2]: [hd_demo_sk#148, hd_income_band_sk#149] + +(151) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_current_hdemo_sk#133] +Right keys [1]: [hd_demo_sk#148] +Join type: Inner +Join condition: None + +(152) Project [codegen id : 39] +Output [13]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_addr_sk#134, d_year#138, d_year#140, hd_income_band_sk#147, hd_income_band_sk#149] +Input [15]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_hdemo_sk#133, c_current_addr_sk#134, d_year#138, d_year#140, hd_income_band_sk#147, hd_demo_sk#148, hd_income_band_sk#149] + +(153) ReusedExchange [Reuses operator id: 87] +Output [5]: [ca_address_sk#150, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154] + +(154) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_addr_sk#109] +Right keys [1]: [ca_address_sk#150] +Join type: Inner +Join condition: None + +(155) Project [codegen id : 39] +Output [16]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_addr_sk#134, d_year#138, d_year#140, hd_income_band_sk#147, hd_income_band_sk#149, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154] +Input [18]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_addr_sk#134, d_year#138, d_year#140, hd_income_band_sk#147, hd_income_band_sk#149, ca_address_sk#150, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154] + +(156) ReusedExchange [Reuses operator id: 87] +Output [5]: [ca_address_sk#155, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159] + +(157) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_current_addr_sk#134] +Right keys [1]: [ca_address_sk#155] +Join type: Inner +Join condition: None + +(158) Project [codegen id : 39] +Output [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, d_year#138, d_year#140, hd_income_band_sk#147, hd_income_band_sk#149, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159] +Input [21]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, c_current_addr_sk#134, d_year#138, d_year#140, hd_income_band_sk#147, hd_income_band_sk#149, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_address_sk#155, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159] + +(159) ReusedExchange [Reuses operator id: 72] +Output [1]: [ib_income_band_sk#160] + +(160) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [hd_income_band_sk#147] +Right keys [1]: [ib_income_band_sk#160] +Join type: Inner +Join condition: None + +(161) Project [codegen id : 39] +Output [18]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, d_year#138, d_year#140, hd_income_band_sk#149, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159] +Input [20]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, d_year#138, d_year#140, hd_income_band_sk#147, hd_income_band_sk#149, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159, ib_income_band_sk#160] + +(162) ReusedExchange [Reuses operator id: 72] +Output [1]: [ib_income_band_sk#161] + +(163) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [hd_income_band_sk#149] +Right keys [1]: [ib_income_band_sk#161] +Join type: Inner +Join condition: None + +(164) Project [codegen id : 39] +Output [17]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, d_year#138, d_year#140, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159] +Input [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, d_year#138, d_year#140, hd_income_band_sk#149, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159, ib_income_band_sk#161] + +(165) ReusedExchange [Reuses operator id: 103] +Output [2]: [i_item_sk#162, i_product_name#163] + +(166) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_item_sk#105] +Right keys [1]: [i_item_sk#162] +Join type: Inner +Join condition: None + +(167) Project [codegen id : 39] +Output [18]: [ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, d_year#138, d_year#140, s_store_name#129, s_zip#130, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159, i_item_sk#162, i_product_name#163] +Input [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, s_store_name#129, s_zip#130, d_year#138, d_year#140, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159, i_item_sk#162, i_product_name#163] + +(168) HashAggregate [codegen id : 39] +Input [18]: [ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#127, d_year#138, d_year#140, s_store_name#129, s_zip#130, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159, i_item_sk#162, i_product_name#163] +Keys [15]: [i_product_name#163, i_item_sk#162, s_store_name#129, s_zip#130, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159, d_year#127, d_year#138, d_year#140] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#112)), partial_sum(UnscaledValue(ss_list_price#113)), partial_sum(UnscaledValue(ss_coupon_amt#114))] +Aggregate Attributes [4]: [count#76, sum#164, sum#165, sum#166] +Results [19]: [i_product_name#163, i_item_sk#162, s_store_name#129, s_zip#130, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159, d_year#127, d_year#138, d_year#140, count#80, sum#167, sum#168, sum#169] + +(169) HashAggregate [codegen id : 39] +Input [19]: [i_product_name#163, i_item_sk#162, s_store_name#129, s_zip#130, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159, d_year#127, d_year#138, d_year#140, count#80, sum#167, sum#168, sum#169] +Keys [15]: [i_product_name#163, i_item_sk#162, s_store_name#129, s_zip#130, ca_street_number#151, ca_street_name#152, ca_city#153, ca_zip#154, ca_street_number#156, ca_street_name#157, ca_city#158, ca_zip#159, d_year#127, d_year#138, d_year#140] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#112)), sum(UnscaledValue(ss_list_price#113)), sum(UnscaledValue(ss_coupon_amt#114))] +Aggregate Attributes [4]: [count(1)#84, sum(UnscaledValue(ss_wholesale_cost#112))#85, sum(UnscaledValue(ss_list_price#113))#86, sum(UnscaledValue(ss_coupon_amt#114))#87] +Results [8]: [i_item_sk#162 AS item_sk#170, s_store_name#129 AS store_name#171, s_zip#130 AS store_zip#172, d_year#127 AS syear#173, count(1)#84 AS cnt#174, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#112))#85,17,2) AS s1#175, MakeDecimal(sum(UnscaledValue(ss_list_price#113))#86,17,2) AS s2#176, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#114))#87,17,2) AS s3#177] + +(170) Exchange +Input [8]: [item_sk#170, store_name#171, store_zip#172, syear#173, cnt#174, s1#175, s2#176, s3#177] +Arguments: hashpartitioning(item_sk#170, store_name#171, store_zip#172, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(171) Sort [codegen id : 40] +Input [8]: [item_sk#170, store_name#171, store_zip#172, syear#173, cnt#174, s1#175, s2#176, s3#177] +Arguments: [item_sk#170 ASC NULLS FIRST, store_name#171 ASC NULLS FIRST, store_zip#172 ASC NULLS FIRST], false, 0 + +(172) SortMergeJoin [codegen id : 41] +Left keys [3]: [item_sk#89, store_name#90, store_zip#91] +Right keys [3]: [item_sk#170, store_name#171, store_zip#172] +Join type: Inner +Join condition: (cnt#174 <= cnt#101) + +(173) Project [codegen id : 41] +Output [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#175, s2#176, s3#177, syear#173, cnt#174] +Input [25]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, item_sk#170, store_name#171, store_zip#172, syear#173, cnt#174, s1#175, s2#176, s3#177] + +(174) CometColumnarExchange +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#175, s2#176, s3#177, syear#173, cnt#174] +Arguments: rangepartitioning(product_name#88 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, cnt#174 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=17] + +(175) CometSort +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#175, s2#176, s3#177, syear#173, cnt#174] +Arguments: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#175, s2#176, s3#177, syear#173, cnt#174], [product_name#88 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, cnt#174 ASC NULLS FIRST] + +(176) ColumnarToRow [codegen id : 42] +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#175, s2#176, s3#177, syear#173, cnt#174] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q64.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q64.native_datafusion/simplified.txt new file mode 100644 index 0000000000..dae7ae193a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q64.native_datafusion/simplified.txt @@ -0,0 +1,251 @@ +WholeStageCodegen (42) + ColumnarToRow + InputAdapter + CometSort [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + CometColumnarExchange [product_name,store_name,cnt] #1 + WholeStageCodegen (41) + Project [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + SortMergeJoin [item_sk,store_name,store_zip,item_sk,store_name,store_zip,cnt,cnt] + InputAdapter + WholeStageCodegen (20) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #2 + WholeStageCodegen (19) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + SortMergeJoin [ss_item_sk,cs_item_sk] + InputAdapter + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometExchange [ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #4 + CometFilter [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + WholeStageCodegen (3) + Sort [cs_item_sk] + Project [cs_item_sk] + Filter [sale,refund] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (2) + HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_ext_list_price,cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometSort [cs_item_sk,cs_order_number,cs_ext_list_price] + CometExchange [cs_item_sk,cs_order_number] #6 + CometProject [cs_item_sk,cs_order_number,cs_ext_list_price] + CometFilter [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometExchange [cr_item_sk,cr_order_number] #7 + CometProject [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometFilter [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometFilter [s_store_sk,s_store_name,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (6) + ColumnarToRow + InputAdapter + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (9) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_marital_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (11) + ColumnarToRow + InputAdapter + CometFilter [p_promo_sk] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (12) + ColumnarToRow + InputAdapter + CometFilter [hd_demo_sk,hd_income_band_sk] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (14) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ib_income_band_sk] #13 + InputAdapter + ReusedExchange [ib_income_band_sk] #13 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (18) + ColumnarToRow + InputAdapter + CometProject [i_item_sk,i_product_name] + CometFilter [i_item_sk,i_current_price,i_color,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_color,i_product_name] + InputAdapter + WholeStageCodegen (40) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #17 + WholeStageCodegen (39) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + SortMergeJoin [ss_item_sk,cs_item_sk] + InputAdapter + WholeStageCodegen (21) + ColumnarToRow + InputAdapter + CometSort [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #3 + InputAdapter + WholeStageCodegen (23) + Sort [cs_item_sk] + Project [cs_item_sk] + Filter [sale,refund] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty] + InputAdapter + ReusedExchange [cs_item_sk,sum,sum,isEmpty] #5 + InputAdapter + BroadcastExchange #18 + WholeStageCodegen (24) + ColumnarToRow + InputAdapter + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_zip] #9 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + ReusedExchange [p_promo_sk] #13 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ib_income_band_sk] #13 + InputAdapter + ReusedExchange [ib_income_band_sk] #13 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #16 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q65.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q65.native_datafusion/explain.txt new file mode 100644 index 0000000000..5b68aeb7f2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q65.native_datafusion/explain.txt @@ -0,0 +1,234 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (19) + : : +- * BroadcastHashJoin Inner BuildRight (18) + : : :- * ColumnarToRow (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (1) + : : +- BroadcastExchange (17) + : : +- * Filter (16) + : : +- * HashAggregate (15) + : : +- Exchange (14) + : : +- * HashAggregate (13) + : : +- * ColumnarToRow (12) + : : +- CometProject (11) + : : +- CometBroadcastHashJoin (10) + : : :- CometFilter (5) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (4) + : : +- CometBroadcastExchange (9) + : : +- CometProject (8) + : : +- CometFilter (7) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (6) + : +- BroadcastExchange (23) + : +- * ColumnarToRow (22) + : +- CometFilter (21) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (20) + +- BroadcastExchange (39) + +- * Filter (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * HashAggregate (34) + +- Exchange (33) + +- * HashAggregate (32) + +- * ColumnarToRow (31) + +- CometProject (30) + +- CometBroadcastHashJoin (29) + :- CometFilter (27) + : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (26) + +- ReusedExchange (28) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#1, s_store_name#2] +Arguments: [s_store_sk#1, s_store_name#2] + +(2) CometFilter +Input [2]: [s_store_sk#1, s_store_name#2] +Condition : isnotnull(s_store_sk#1) + +(3) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#1, s_store_name#2] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(5) CometFilter +Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : (isnotnull(ss_store_sk#4) AND isnotnull(ss_item_sk#3)) + +(6) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#7, d_month_seq#8] +Arguments: [d_date_sk#7, d_month_seq#8] + +(7) CometFilter +Input [2]: [d_date_sk#7, d_month_seq#8] +Condition : (((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1176)) AND (d_month_seq#8 <= 1187)) AND isnotnull(d_date_sk#7)) + +(8) CometProject +Input [2]: [d_date_sk#7, d_month_seq#8] +Arguments: [d_date_sk#7], [d_date_sk#7] + +(9) CometBroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: [d_date_sk#7] + +(10) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Right output [1]: [d_date_sk#7] +Arguments: [ss_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight + +(11) CometProject +Input [5]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6, d_date_sk#7] +Arguments: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5], [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] + +(12) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] + +(13) HashAggregate [codegen id : 1] +Input [3]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] +Keys [2]: [ss_store_sk#4, ss_item_sk#3] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#5))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] + +(14) Exchange +Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] +Arguments: hashpartitioning(ss_store_sk#4, ss_item_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(15) HashAggregate [codegen id : 2] +Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] +Keys [2]: [ss_store_sk#4, ss_item_sk#3] +Functions [1]: [sum(UnscaledValue(ss_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#5))#11] +Results [3]: [ss_store_sk#4, ss_item_sk#3, MakeDecimal(sum(UnscaledValue(ss_sales_price#5))#11,17,2) AS revenue#12] + +(16) Filter [codegen id : 2] +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#12] +Condition : isnotnull(revenue#12) + +(17) BroadcastExchange +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(18) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [s_store_sk#1] +Right keys [1]: [ss_store_sk#4] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 7] +Output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12] +Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12] + +(20) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] + +(21) CometFilter +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Condition : isnotnull(i_item_sk#13) + +(22) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] + +(23) BroadcastExchange +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(24) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#3] +Right keys [1]: [i_item_sk#13] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 7] +Output [7]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12, i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] + +(26) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] +Arguments: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] + +(27) CometFilter +Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] +Condition : isnotnull(ss_store_sk#19) + +(28) ReusedExchange [Reuses operator id: 9] +Output [1]: [d_date_sk#22] + +(29) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] +Right output [1]: [d_date_sk#22] +Arguments: [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + +(30) CometProject +Input [5]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21, d_date_sk#22] +Arguments: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20], [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] + +(31) ColumnarToRow [codegen id : 4] +Input [3]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] + +(32) HashAggregate [codegen id : 4] +Input [3]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] +Keys [2]: [ss_store_sk#19, ss_item_sk#18] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#20))] +Aggregate Attributes [1]: [sum#23] +Results [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] + +(33) Exchange +Input [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] +Arguments: hashpartitioning(ss_store_sk#19, ss_item_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(34) HashAggregate [codegen id : 5] +Input [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] +Keys [2]: [ss_store_sk#19, ss_item_sk#18] +Functions [1]: [sum(UnscaledValue(ss_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#20))#25] +Results [2]: [ss_store_sk#19, MakeDecimal(sum(UnscaledValue(ss_sales_price#20))#25,17,2) AS revenue#26] + +(35) HashAggregate [codegen id : 5] +Input [2]: [ss_store_sk#19, revenue#26] +Keys [1]: [ss_store_sk#19] +Functions [1]: [partial_avg(revenue#26)] +Aggregate Attributes [2]: [sum#27, count#28] +Results [3]: [ss_store_sk#19, sum#29, count#30] + +(36) Exchange +Input [3]: [ss_store_sk#19, sum#29, count#30] +Arguments: hashpartitioning(ss_store_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(37) HashAggregate [codegen id : 6] +Input [3]: [ss_store_sk#19, sum#29, count#30] +Keys [1]: [ss_store_sk#19] +Functions [1]: [avg(revenue#26)] +Aggregate Attributes [1]: [avg(revenue#26)#31] +Results [2]: [ss_store_sk#19, avg(revenue#26)#31 AS ave#32] + +(38) Filter [codegen id : 6] +Input [2]: [ss_store_sk#19, ave#32] +Condition : isnotnull(ave#32) + +(39) BroadcastExchange +Input [2]: [ss_store_sk#19, ave#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(40) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [ss_store_sk#19] +Join type: Inner +Join condition: (cast(revenue#12 as decimal(23,7)) <= (0.1 * ave#32)) + +(41) Project [codegen id : 7] +Output [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Input [9]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17, ss_store_sk#19, ave#32] + +(42) TakeOrderedAndProject +Input [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#14 ASC NULLS FIRST], [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q65.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q65.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bf94064740 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q65.native_datafusion/simplified.txt @@ -0,0 +1,59 @@ +TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + WholeStageCodegen (7) + Project [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + BroadcastHashJoin [ss_store_sk,ss_store_sk,revenue,ave] + Project [s_store_name,ss_store_sk,revenue,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_store_sk,ss_item_sk,revenue] + BroadcastHashJoin [s_store_sk,ss_store_sk] + ColumnarToRow + InputAdapter + CometFilter [s_store_sk,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (2) + Filter [revenue] + HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + InputAdapter + Exchange [ss_store_sk,ss_item_sk] #2 + WholeStageCodegen (1) + HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_item_sk,ss_store_sk,ss_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometFilter [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Filter [ave] + HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen (5) + HashAggregate [ss_store_sk,revenue] [sum,count,sum,count] + HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + InputAdapter + Exchange [ss_store_sk,ss_item_sk] #7 + WholeStageCodegen (4) + HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_item_sk,ss_store_sk,ss_sales_price] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q66.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q66.native_datafusion/explain.txt new file mode 100644 index 0000000000..127adabf18 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q66.native_datafusion/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (35) ++- * HashAggregate (34) + +- Exchange (33) + +- * HashAggregate (32) + +- Union (31) + :- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * ColumnarToRow (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (16) + : : +- CometProject (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (13) + : +- CometBroadcastExchange (22) + : +- CometProject (21) + : +- CometFilter (20) + : +- CometNativeScan: `spark_catalog`.`default`.`ship_mode` (19) + +- * HashAggregate (30) + +- ReusedExchange (29) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Arguments: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Condition : ((isnotnull(ws_warehouse_sk#3) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_ship_mode_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(4) CometFilter +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Condition : isnotnull(w_warehouse_sk#8) + +(5) CometBroadcastExchange +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(6) CometBroadcastHashJoin +Left output [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Right output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [ws_warehouse_sk#3], [w_warehouse_sk#8], Inner, BuildRight + +(7) CometProject +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14], [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Arguments: [d_date_sk#15, d_year#16, d_moy#17] + +(9) CometFilter +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Arguments: [d_date_sk#15, d_year#16, d_moy#17] + +(11) CometBroadcastHashJoin +Left output [12]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Right output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Arguments: [ws_sold_date_sk#7], [d_date_sk#15], Inner, BuildRight + +(12) CometProject +Input [15]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#15, d_year#16, d_moy#17] +Arguments: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17], [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] + +(13) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [2]: [t_time_sk#18, t_time#19] +Arguments: [t_time_sk#18, t_time#19] + +(14) CometFilter +Input [2]: [t_time_sk#18, t_time#19] +Condition : (((isnotnull(t_time#19) AND (t_time#19 >= 30838)) AND (t_time#19 <= 59638)) AND isnotnull(t_time_sk#18)) + +(15) CometProject +Input [2]: [t_time_sk#18, t_time#19] +Arguments: [t_time_sk#18], [t_time_sk#18] + +(16) CometBroadcastExchange +Input [1]: [t_time_sk#18] +Arguments: [t_time_sk#18] + +(17) CometBroadcastHashJoin +Left output [13]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Right output [1]: [t_time_sk#18] +Arguments: [ws_sold_time_sk#1], [t_time_sk#18], Inner, BuildRight + +(18) CometProject +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17, t_time_sk#18] +Arguments: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17], [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] + +(19) CometNativeScan: `spark_catalog`.`default`.`ship_mode` +Output [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Arguments: [sm_ship_mode_sk#20, sm_carrier#21] + +(20) CometFilter +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Condition : (sm_carrier#21 IN (DHL ,BARIAN ) AND isnotnull(sm_ship_mode_sk#20)) + +(21) CometProject +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Arguments: [sm_ship_mode_sk#20], [sm_ship_mode_sk#20] + +(22) CometBroadcastExchange +Input [1]: [sm_ship_mode_sk#20] +Arguments: [sm_ship_mode_sk#20] + +(23) CometBroadcastHashJoin +Left output [12]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Right output [1]: [sm_ship_mode_sk#20] +Arguments: [ws_ship_mode_sk#2], [sm_ship_mode_sk#20], Inner, BuildRight + +(24) CometProject +Input [13]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17, sm_ship_mode_sk#20] +Arguments: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17], [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] + +(25) ColumnarToRow [codegen id : 1] +Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] + +(26) HashAggregate [codegen id : 1] +Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16] +Functions [24]: [partial_sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#22, isEmpty#23, sum#24, isEmpty#25, sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69] +Results [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#70, isEmpty#71, sum#72, isEmpty#73, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] + +(27) Exchange +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#70, isEmpty#71, sum#72, isEmpty#73, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(28) HashAggregate [codegen id : 2] +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#70, isEmpty#71, sum#72, isEmpty#73, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16] +Functions [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#118, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#119, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#120, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#121, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#122, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#141] +Results [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, DHL,BARIAN AS ship_carriers#142, d_year#16 AS year#143, sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#118 AS jan_sales#144, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#119 AS feb_sales#145, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#120 AS mar_sales#146, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#121 AS apr_sales#147, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#122 AS may_sales#148, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#123 AS jun_sales#149, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#124 AS jul_sales#150, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#125 AS aug_sales#151, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#126 AS sep_sales#152, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#127 AS oct_sales#153, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#128 AS nov_sales#154, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#129 AS dec_sales#155, sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#130 AS jan_net#156, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#131 AS feb_net#157, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#132 AS mar_net#158, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#133 AS apr_net#159, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#134 AS may_net#160, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#135 AS jun_net#161, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#136 AS jul_net#162, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#137 AS aug_net#163, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#138 AS sep_net#164, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#139 AS oct_net#165, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#140 AS nov_net#166, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#141 AS dec_net#167] + +(29) ReusedExchange [Reuses operator id: 27] +Output [55]: [w_warehouse_name#168, w_warehouse_sq_ft#169, w_city#170, w_county#171, w_state#172, w_country#173, d_year#174, sum#175, isEmpty#176, sum#177, isEmpty#178, sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188, sum#189, isEmpty#190, sum#191, isEmpty#192, sum#193, isEmpty#194, sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206, sum#207, isEmpty#208, sum#209, isEmpty#210, sum#211, isEmpty#212, sum#213, isEmpty#214, sum#215, isEmpty#216, sum#217, isEmpty#218, sum#219, isEmpty#220, sum#221, isEmpty#222] + +(30) HashAggregate [codegen id : 4] +Input [55]: [w_warehouse_name#168, w_warehouse_sq_ft#169, w_city#170, w_county#171, w_state#172, w_country#173, d_year#174, sum#175, isEmpty#176, sum#177, isEmpty#178, sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188, sum#189, isEmpty#190, sum#191, isEmpty#192, sum#193, isEmpty#194, sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206, sum#207, isEmpty#208, sum#209, isEmpty#210, sum#211, isEmpty#212, sum#213, isEmpty#214, sum#215, isEmpty#216, sum#217, isEmpty#218, sum#219, isEmpty#220, sum#221, isEmpty#222] +Keys [7]: [w_warehouse_name#168, w_warehouse_sq_ft#169, w_city#170, w_county#171, w_state#172, w_country#173, d_year#174] +Functions [24]: [sum(CASE WHEN (d_moy#223 = 1) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 2) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 3) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 4) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 5) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 6) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 7) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 8) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 9) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 10) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 11) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 12) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 1) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 2) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 3) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 4) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 5) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 6) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 7) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 8) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 9) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 10) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 11) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#223 = 12) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#223 = 1) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#227, sum(CASE WHEN (d_moy#223 = 2) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#228, sum(CASE WHEN (d_moy#223 = 3) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#229, sum(CASE WHEN (d_moy#223 = 4) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#230, sum(CASE WHEN (d_moy#223 = 5) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#231, sum(CASE WHEN (d_moy#223 = 6) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#232, sum(CASE WHEN (d_moy#223 = 7) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#233, sum(CASE WHEN (d_moy#223 = 8) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#234, sum(CASE WHEN (d_moy#223 = 9) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#235, sum(CASE WHEN (d_moy#223 = 10) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#236, sum(CASE WHEN (d_moy#223 = 11) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#237, sum(CASE WHEN (d_moy#223 = 12) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#238, sum(CASE WHEN (d_moy#223 = 1) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#239, sum(CASE WHEN (d_moy#223 = 2) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#240, sum(CASE WHEN (d_moy#223 = 3) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#241, sum(CASE WHEN (d_moy#223 = 4) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#242, sum(CASE WHEN (d_moy#223 = 5) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#243, sum(CASE WHEN (d_moy#223 = 6) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#244, sum(CASE WHEN (d_moy#223 = 7) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#245, sum(CASE WHEN (d_moy#223 = 8) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#246, sum(CASE WHEN (d_moy#223 = 9) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#247, sum(CASE WHEN (d_moy#223 = 10) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#248, sum(CASE WHEN (d_moy#223 = 11) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#249, sum(CASE WHEN (d_moy#223 = 12) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#250] +Results [32]: [w_warehouse_name#168, w_warehouse_sq_ft#169, w_city#170, w_county#171, w_state#172, w_country#173, DHL,BARIAN AS ship_carriers#251, d_year#174 AS year#252, sum(CASE WHEN (d_moy#223 = 1) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#227 AS jan_sales#253, sum(CASE WHEN (d_moy#223 = 2) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#228 AS feb_sales#254, sum(CASE WHEN (d_moy#223 = 3) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#229 AS mar_sales#255, sum(CASE WHEN (d_moy#223 = 4) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#230 AS apr_sales#256, sum(CASE WHEN (d_moy#223 = 5) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#231 AS may_sales#257, sum(CASE WHEN (d_moy#223 = 6) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#232 AS jun_sales#258, sum(CASE WHEN (d_moy#223 = 7) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#233 AS jul_sales#259, sum(CASE WHEN (d_moy#223 = 8) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#234 AS aug_sales#260, sum(CASE WHEN (d_moy#223 = 9) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#235 AS sep_sales#261, sum(CASE WHEN (d_moy#223 = 10) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#236 AS oct_sales#262, sum(CASE WHEN (d_moy#223 = 11) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#237 AS nov_sales#263, sum(CASE WHEN (d_moy#223 = 12) THEN (cs_sales_price#224 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#238 AS dec_sales#264, sum(CASE WHEN (d_moy#223 = 1) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#239 AS jan_net#265, sum(CASE WHEN (d_moy#223 = 2) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#240 AS feb_net#266, sum(CASE WHEN (d_moy#223 = 3) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#241 AS mar_net#267, sum(CASE WHEN (d_moy#223 = 4) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#242 AS apr_net#268, sum(CASE WHEN (d_moy#223 = 5) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#243 AS may_net#269, sum(CASE WHEN (d_moy#223 = 6) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#244 AS jun_net#270, sum(CASE WHEN (d_moy#223 = 7) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#245 AS jul_net#271, sum(CASE WHEN (d_moy#223 = 8) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#246 AS aug_net#272, sum(CASE WHEN (d_moy#223 = 9) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#247 AS sep_net#273, sum(CASE WHEN (d_moy#223 = 10) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#248 AS oct_net#274, sum(CASE WHEN (d_moy#223 = 11) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#249 AS nov_net#275, sum(CASE WHEN (d_moy#223 = 12) THEN (cs_net_paid_inc_tax#226 * cast(cs_quantity#225 as decimal(10,0))) ELSE 0.00 END)#250 AS dec_net#276] + +(31) Union + +(32) HashAggregate [codegen id : 5] +Input [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, jan_sales#144, feb_sales#145, mar_sales#146, apr_sales#147, may_sales#148, jun_sales#149, jul_sales#150, aug_sales#151, sep_sales#152, oct_sales#153, nov_sales#154, dec_sales#155, jan_net#156, feb_net#157, mar_net#158, apr_net#159, may_net#160, jun_net#161, jul_net#162, aug_net#163, sep_net#164, oct_net#165, nov_net#166, dec_net#167] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143] +Functions [36]: [partial_sum(jan_sales#144), partial_sum(feb_sales#145), partial_sum(mar_sales#146), partial_sum(apr_sales#147), partial_sum(may_sales#148), partial_sum(jun_sales#149), partial_sum(jul_sales#150), partial_sum(aug_sales#151), partial_sum(sep_sales#152), partial_sum(oct_sales#153), partial_sum(nov_sales#154), partial_sum(dec_sales#155), partial_sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum(jan_net#156), partial_sum(feb_net#157), partial_sum(mar_net#158), partial_sum(apr_net#159), partial_sum(may_net#160), partial_sum(jun_net#161), partial_sum(jul_net#162), partial_sum(aug_net#163), partial_sum(sep_net#164), partial_sum(oct_net#165), partial_sum(nov_net#166), partial_sum(dec_net#167)] +Aggregate Attributes [72]: [sum#277, isEmpty#278, sum#279, isEmpty#280, sum#281, isEmpty#282, sum#283, isEmpty#284, sum#285, isEmpty#286, sum#287, isEmpty#288, sum#289, isEmpty#290, sum#291, isEmpty#292, sum#293, isEmpty#294, sum#295, isEmpty#296, sum#297, isEmpty#298, sum#299, isEmpty#300, sum#301, isEmpty#302, sum#303, isEmpty#304, sum#305, isEmpty#306, sum#307, isEmpty#308, sum#309, isEmpty#310, sum#311, isEmpty#312, sum#313, isEmpty#314, sum#315, isEmpty#316, sum#317, isEmpty#318, sum#319, isEmpty#320, sum#321, isEmpty#322, sum#323, isEmpty#324, sum#325, isEmpty#326, sum#327, isEmpty#328, sum#329, isEmpty#330, sum#331, isEmpty#332, sum#333, isEmpty#334, sum#335, isEmpty#336, sum#337, isEmpty#338, sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348] +Results [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420] + +(33) Exchange +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(34) HashAggregate [codegen id : 6] +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143] +Functions [36]: [sum(jan_sales#144), sum(feb_sales#145), sum(mar_sales#146), sum(apr_sales#147), sum(may_sales#148), sum(jun_sales#149), sum(jul_sales#150), sum(aug_sales#151), sum(sep_sales#152), sum(oct_sales#153), sum(nov_sales#154), sum(dec_sales#155), sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum(jan_net#156), sum(feb_net#157), sum(mar_net#158), sum(apr_net#159), sum(may_net#160), sum(jun_net#161), sum(jul_net#162), sum(aug_net#163), sum(sep_net#164), sum(oct_net#165), sum(nov_net#166), sum(dec_net#167)] +Aggregate Attributes [36]: [sum(jan_sales#144)#421, sum(feb_sales#145)#422, sum(mar_sales#146)#423, sum(apr_sales#147)#424, sum(may_sales#148)#425, sum(jun_sales#149)#426, sum(jul_sales#150)#427, sum(aug_sales#151)#428, sum(sep_sales#152)#429, sum(oct_sales#153)#430, sum(nov_sales#154)#431, sum(dec_sales#155)#432, sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#433, sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#434, sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#435, sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#436, sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#437, sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#438, sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#439, sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#440, sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#441, sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#442, sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#443, sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#444, sum(jan_net#156)#445, sum(feb_net#157)#446, sum(mar_net#158)#447, sum(apr_net#159)#448, sum(may_net#160)#449, sum(jun_net#161)#450, sum(jul_net#162)#451, sum(aug_net#163)#452, sum(sep_net#164)#453, sum(oct_net#165)#454, sum(nov_net#166)#455, sum(dec_net#167)#456] +Results [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum(jan_sales#144)#421 AS jan_sales#457, sum(feb_sales#145)#422 AS feb_sales#458, sum(mar_sales#146)#423 AS mar_sales#459, sum(apr_sales#147)#424 AS apr_sales#460, sum(may_sales#148)#425 AS may_sales#461, sum(jun_sales#149)#426 AS jun_sales#462, sum(jul_sales#150)#427 AS jul_sales#463, sum(aug_sales#151)#428 AS aug_sales#464, sum(sep_sales#152)#429 AS sep_sales#465, sum(oct_sales#153)#430 AS oct_sales#466, sum(nov_sales#154)#431 AS nov_sales#467, sum(dec_sales#155)#432 AS dec_sales#468, sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#433 AS jan_sales_per_sq_foot#469, sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#434 AS feb_sales_per_sq_foot#470, sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#435 AS mar_sales_per_sq_foot#471, sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#436 AS apr_sales_per_sq_foot#472, sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#437 AS may_sales_per_sq_foot#473, sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#438 AS jun_sales_per_sq_foot#474, sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#439 AS jul_sales_per_sq_foot#475, sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#440 AS aug_sales_per_sq_foot#476, sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#441 AS sep_sales_per_sq_foot#477, sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#442 AS oct_sales_per_sq_foot#478, sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#443 AS nov_sales_per_sq_foot#479, sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#444 AS dec_sales_per_sq_foot#480, sum(jan_net#156)#445 AS jan_net#481, sum(feb_net#157)#446 AS feb_net#482, sum(mar_net#158)#447 AS mar_net#483, sum(apr_net#159)#448 AS apr_net#484, sum(may_net#160)#449 AS may_net#485, sum(jun_net#161)#450 AS jun_net#486, sum(jul_net#162)#451 AS jul_net#487, sum(aug_net#163)#452 AS aug_net#488, sum(sep_net#164)#453 AS sep_net#489, sum(oct_net#165)#454 AS oct_net#490, sum(nov_net#166)#455 AS nov_net#491, sum(dec_net#167)#456 AS dec_net#492] + +(35) TakeOrderedAndProject +Input [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, jan_sales#457, feb_sales#458, mar_sales#459, apr_sales#460, may_sales#461, jun_sales#462, jul_sales#463, aug_sales#464, sep_sales#465, oct_sales#466, nov_sales#467, dec_sales#468, jan_sales_per_sq_foot#469, feb_sales_per_sq_foot#470, mar_sales_per_sq_foot#471, apr_sales_per_sq_foot#472, may_sales_per_sq_foot#473, jun_sales_per_sq_foot#474, jul_sales_per_sq_foot#475, aug_sales_per_sq_foot#476, sep_sales_per_sq_foot#477, oct_sales_per_sq_foot#478, nov_sales_per_sq_foot#479, dec_sales_per_sq_foot#480, jan_net#481, feb_net#482, mar_net#483, apr_net#484, may_net#485, jun_net#486, jul_net#487, aug_net#488, sep_net#489, oct_net#490, nov_net#491, dec_net#492] +Arguments: 100, [w_warehouse_name#9 ASC NULLS FIRST], [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, jan_sales#457, feb_sales#458, mar_sales#459, apr_sales#460, may_sales#461, jun_sales#462, jul_sales#463, aug_sales#464, sep_sales#465, oct_sales#466, nov_sales#467, dec_sales#468, jan_sales_per_sq_foot#469, feb_sales_per_sq_foot#470, mar_sales_per_sq_foot#471, apr_sales_per_sq_foot#472, may_sales_per_sq_foot#473, jun_sales_per_sq_foot#474, jul_sales_per_sq_foot#475, aug_sales_per_sq_foot#476, sep_sales_per_sq_foot#477, oct_sales_per_sq_foot#478, nov_sales_per_sq_foot#479, dec_sales_per_sq_foot#480, jan_net#481, feb_net#482, mar_net#483, apr_net#484, may_net#485, jun_net#486, jul_net#487, aug_net#488, sep_net#489, oct_net#490, nov_net#491, dec_net#492] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q66.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q66.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c9f9d8272e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q66.native_datafusion/simplified.txt @@ -0,0 +1,45 @@ +TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] + WholeStageCodegen (6) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum((jan_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((feb_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((mar_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((apr_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((may_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jun_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jul_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((aug_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((sep_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((oct_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((nov_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((dec_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1 + WholeStageCodegen (5) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (2) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2 + WholeStageCodegen (1) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,ws_ext_sales_price,ws_quantity,ws_net_paid] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,sm_ship_mode_sk] + CometProject [ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,t_time_sk] + CometProject [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_date_sk,d_year,d_moy] + CometProject [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometFilter [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] #3 + CometFilter [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [t_time_sk] #5 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_time] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_time] + CometBroadcastExchange [sm_ship_mode_sk] #6 + CometProject [sm_ship_mode_sk] + CometFilter [sm_ship_mode_sk,sm_carrier] + CometNativeScan: `spark_catalog`.`default`.`ship_mode` [sm_ship_mode_sk,sm_carrier] + WholeStageCodegen (4) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q67.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q67.native_datafusion/explain.txt new file mode 100644 index 0000000000..afa4e7c07c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q67.native_datafusion/explain.txt @@ -0,0 +1,166 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Filter (30) + +- Window (29) + +- WindowGroupLimit (28) + +- * Sort (27) + +- Exchange (26) + +- WindowGroupLimit (25) + +- * Sort (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * ColumnarToRow (20) + +- CometExpand (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`item` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(4) CometFilter +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10], [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(6) CometBroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Right output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10], [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#11, s_store_id#12] +Arguments: [s_store_sk#11, s_store_id#12] + +(10) CometFilter +Input [2]: [s_store_sk#11, s_store_id#12] +Condition : isnotnull(s_store_sk#11) + +(11) CometBroadcastExchange +Input [2]: [s_store_sk#11, s_store_id#12] +Arguments: [s_store_sk#11, s_store_id#12] + +(12) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] +Right output [2]: [s_store_sk#11, s_store_id#12] +Arguments: [ss_store_sk#2], [s_store_sk#11], Inner, BuildRight + +(13) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] +Arguments: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12], [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] + +(14) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(15) CometFilter +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) + +(16) CometBroadcastExchange +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(17) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] +Right output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [ss_item_sk#1], [i_item_sk#13], Inner, BuildRight + +(18) CometProject +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] + +(19) CometExpand +Input [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 0], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null, 1], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null, null, 3], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null, null, null, 7], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, null, null, null, null, 15], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, null, null, null, null, null, 31], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, null, null, null, null, null, null, 63], [ss_quantity#3, ss_sales_price#4, i_category#16, null, null, null, null, null, null, null, 127], [ss_quantity#3, ss_sales_price#4, null, null, null, null, null, null, null, null, 255]], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] + +(20) ColumnarToRow [codegen id : 1] +Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] + +(21) HashAggregate [codegen id : 1] +Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [2]: [sum#27, isEmpty#28] +Results [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] + +(22) Exchange +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] +Arguments: hashpartitioning(i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(23) HashAggregate [codegen id : 2] +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#31] +Results [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#31 AS sumsales#32] + +(24) Sort [codegen id : 2] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [i_category#18 ASC NULLS FIRST, sumsales#32 DESC NULLS LAST], false, 0 + +(25) WindowGroupLimit +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [i_category#18], [sumsales#32 DESC NULLS LAST], rank(sumsales#32), 100, Partial + +(26) Exchange +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: hashpartitioning(i_category#18, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(27) Sort [codegen id : 3] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [i_category#18 ASC NULLS FIRST, sumsales#32 DESC NULLS LAST], false, 0 + +(28) WindowGroupLimit +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [i_category#18], [sumsales#32 DESC NULLS LAST], rank(sumsales#32), 100, Final + +(29) Window +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [rank(sumsales#32) windowspecdefinition(i_category#18, sumsales#32 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#33], [i_category#18], [sumsales#32 DESC NULLS LAST] + +(30) Filter [codegen id : 4] +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] +Condition : (rk#33 <= 100) + +(31) TakeOrderedAndProject +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] +Arguments: 100, [i_category#18 ASC NULLS FIRST, i_class#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, i_product_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_qoy#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, sumsales#32 ASC NULLS FIRST, rk#33 ASC NULLS FIRST], [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q67.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q67.native_datafusion/simplified.txt new file mode 100644 index 0000000000..42308b5b6e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q67.native_datafusion/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] + WholeStageCodegen (4) + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WindowGroupLimit [i_category,sumsales] + WholeStageCodegen (3) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WindowGroupLimit [i_category,sumsales] + WholeStageCodegen (2) + Sort [i_category,sumsales] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2 + WholeStageCodegen (1) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,ss_sales_price,ss_quantity] [sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExpand [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] + CometProject [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_sk,s_store_id] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_moy,d_qoy] + CometFilter [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy,d_qoy] #3 + CometProject [d_date_sk,d_year,d_moy,d_qoy] + CometFilter [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + CometBroadcastExchange [s_store_sk,s_store_id] #4 + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #5 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_product_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q68.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q68.native_datafusion/explain.txt new file mode 100644 index 0000000000..deb0db7cd8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q68.native_datafusion/explain.txt @@ -0,0 +1,224 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * HashAggregate (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- * ColumnarToRow (26) + : : +- CometProject (25) + : : +- CometBroadcastHashJoin (24) + : : :- CometProject (20) + : : : +- CometBroadcastHashJoin (19) + : : : :- CometProject (14) + : : : : +- CometBroadcastHashJoin (13) + : : : : :- CometProject (8) + : : : : : +- CometBroadcastHashJoin (7) + : : : : : :- CometFilter (2) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : +- CometBroadcastExchange (6) + : : : : : +- CometProject (5) + : : : : : +- CometFilter (4) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : : +- CometBroadcastExchange (12) + : : : : +- CometProject (11) + : : : : +- CometFilter (10) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : : : +- CometBroadcastExchange (18) + : : : +- CometProject (17) + : : : +- CometFilter (16) + : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + : : +- CometBroadcastExchange (23) + : : +- CometFilter (22) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (21) + : +- BroadcastExchange (33) + : +- * ColumnarToRow (32) + : +- CometFilter (31) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (30) + +- BroadcastExchange (39) + +- * ColumnarToRow (38) + +- CometFilter (37) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (36) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] + +(2) CometFilter +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#10, d_year#11, d_dom#12] +Arguments: [d_date_sk#10, d_year#11, d_dom#12] + +(4) CometFilter +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Condition : ((((isnotnull(d_dom#12) AND (d_dom#12 >= 1)) AND (d_dom#12 <= 2)) AND d_year#11 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) + +(5) CometProject +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(7) CometBroadcastHashJoin +Left output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Right output [1]: [d_date_sk#10] +Arguments: [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + +(8) CometProject +Input [10]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9, d_date_sk#10] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#13, s_city#14] +Arguments: [s_store_sk#13, s_city#14] + +(10) CometFilter +Input [2]: [s_store_sk#13, s_city#14] +Condition : (s_city#14 IN (Midway,Fairview) AND isnotnull(s_store_sk#13)) + +(11) CometProject +Input [2]: [s_store_sk#13, s_city#14] +Arguments: [s_store_sk#13], [s_store_sk#13] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: [s_store_sk#13] + +(13) CometBroadcastHashJoin +Left output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Right output [1]: [s_store_sk#13] +Arguments: [ss_store_sk#4], [s_store_sk#13], Inner, BuildRight + +(14) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#13] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Arguments: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(16) CometFilter +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Condition : (((hd_dep_count#16 = 4) OR (hd_vehicle_count#17 = 3)) AND isnotnull(hd_demo_sk#15)) + +(17) CometProject +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Arguments: [hd_demo_sk#15], [hd_demo_sk#15] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#15] +Arguments: [hd_demo_sk#15] + +(19) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Right output [1]: [hd_demo_sk#15] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#15], Inner, BuildRight + +(20) CometProject +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#15] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] + +(21) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#18, ca_city#19] +Arguments: [ca_address_sk#18, ca_city#19] + +(22) CometFilter +Input [2]: [ca_address_sk#18, ca_city#19] +Condition : (isnotnull(ca_address_sk#18) AND isnotnull(ca_city#19)) + +(23) CometBroadcastExchange +Input [2]: [ca_address_sk#18, ca_city#19] +Arguments: [ca_address_sk#18, ca_city#19] + +(24) CometBroadcastHashJoin +Left output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Right output [2]: [ca_address_sk#18, ca_city#19] +Arguments: [ss_addr_sk#3], [ca_address_sk#18], Inner, BuildRight + +(25) CometProject +Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#18, ca_city#19] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] + +(26) ColumnarToRow [codegen id : 1] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] + +(27) HashAggregate [codegen id : 1] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(UnscaledValue(ss_ext_list_price#7)), partial_sum(UnscaledValue(ss_ext_tax#8))] +Aggregate Attributes [3]: [sum#20, sum#21, sum#22] +Results [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24, sum#25] + +(28) Exchange +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24, sum#25] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(29) HashAggregate [codegen id : 4] +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24, sum#25] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(UnscaledValue(ss_ext_list_price#7)), sum(UnscaledValue(ss_ext_tax#8))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#26, sum(UnscaledValue(ss_ext_list_price#7))#27, sum(UnscaledValue(ss_ext_tax#8))#28] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#19 AS bought_city#29, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#26,17,2) AS extended_price#30, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#27,17,2) AS list_price#31, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#28,17,2) AS extended_tax#32] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Arguments: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(31) CometFilter +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Condition : (isnotnull(c_customer_sk#33) AND isnotnull(c_current_addr_sk#34)) + +(32) ColumnarToRow [codegen id : 2] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(33) BroadcastExchange +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(34) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#33] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 4] +Output [8]: [ss_ticket_number#5, bought_city#29, extended_price#30, list_price#31, extended_tax#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#29, extended_price#30, list_price#31, extended_tax#32, c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(36) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#37, ca_city#38] +Arguments: [ca_address_sk#37, ca_city#38] + +(37) CometFilter +Input [2]: [ca_address_sk#37, ca_city#38] +Condition : (isnotnull(ca_address_sk#37) AND isnotnull(ca_city#38)) + +(38) ColumnarToRow [codegen id : 3] +Input [2]: [ca_address_sk#37, ca_city#38] + +(39) BroadcastExchange +Input [2]: [ca_address_sk#37, ca_city#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(40) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [c_current_addr_sk#34] +Right keys [1]: [ca_address_sk#37] +Join type: Inner +Join condition: NOT (ca_city#38 = bought_city#29) + +(41) Project [codegen id : 4] +Output [8]: [c_last_name#36, c_first_name#35, ca_city#38, bought_city#29, ss_ticket_number#5, extended_price#30, extended_tax#32, list_price#31] +Input [10]: [ss_ticket_number#5, bought_city#29, extended_price#30, list_price#31, extended_tax#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36, ca_address_sk#37, ca_city#38] + +(42) TakeOrderedAndProject +Input [8]: [c_last_name#36, c_first_name#35, ca_city#38, bought_city#29, ss_ticket_number#5, extended_price#30, extended_tax#32, list_price#31] +Arguments: 100, [c_last_name#36 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#36, c_first_name#35, ca_city#38, bought_city#29, ss_ticket_number#5, extended_price#30, extended_tax#32, list_price#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q68.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q68.native_datafusion/simplified.txt new file mode 100644 index 0000000000..92eae58c2f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q68.native_datafusion/simplified.txt @@ -0,0 +1,52 @@ +TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_city,extended_price,extended_tax,list_price] + WholeStageCodegen (4) + Project [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,extended_price,extended_tax,list_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [ss_ticket_number,bought_city,extended_price,list_price,extended_tax,c_current_addr_sk,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_tax)),bought_city,extended_price,list_price,extended_tax,sum,sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen (1) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] [sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ca_city] + CometBroadcastHashJoin [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ca_address_sk,ca_city] + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #3 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_city] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_city] + CometBroadcastExchange [hd_demo_sk] #4 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [ca_address_sk,ca_city] #5 + CometFilter [ca_address_sk,ca_city] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometFilter [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_city] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_city] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q69.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q69.native_datafusion/explain.txt new file mode 100644 index 0000000000..709357a617 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q69.native_datafusion/explain.txt @@ -0,0 +1,209 @@ +== Physical Plan == +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (22) + : : +- * BroadcastHashJoin LeftAnti BuildRight (21) + : : :- * BroadcastHashJoin LeftAnti BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (13) + : : : +- ReusedExchange (14) + : : +- ReusedExchange (20) + : +- BroadcastExchange (27) + : +- * ColumnarToRow (26) + : +- CometProject (25) + : +- CometFilter (24) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (23) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(2) CometFilter +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Arguments: [ss_customer_sk#4, ss_sold_date_sk#5] + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Arguments: [d_date_sk#6, d_year#7, d_moy#8] + +(5) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2001)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 6)) AND isnotnull(d_date_sk#6)) + +(6) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#4], [ss_customer_sk#4] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: [ss_customer_sk#4] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [ss_customer_sk#4] +Arguments: [c_customer_sk#1], [ss_customer_sk#4], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(13) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Arguments: [ws_bill_customer_sk#9, ws_sold_date_sk#10] + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#11] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Right output [1]: [d_date_sk#11] +Arguments: [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] +Arguments: [ws_bill_customer_sk#9], [ws_bill_customer_sk#9] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#9] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#9] +Join type: LeftAnti +Join condition: None + +(20) ReusedExchange [Reuses operator id: 18] +Output [1]: [cs_ship_customer_sk#12] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_ship_customer_sk#12] +Join type: LeftAnti +Join condition: None + +(22) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(23) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#13, ca_state#14] +Arguments: [ca_address_sk#13, ca_state#14] + +(24) CometFilter +Input [2]: [ca_address_sk#13, ca_state#14] +Condition : (ca_state#14 IN (KY,GA,NM) AND isnotnull(ca_address_sk#13)) + +(25) CometProject +Input [2]: [ca_address_sk#13, ca_state#14] +Arguments: [ca_address_sk#13], [ca_address_sk#13] + +(26) ColumnarToRow [codegen id : 3] +Input [1]: [ca_address_sk#13] + +(27) BroadcastExchange +Input [1]: [ca_address_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#13] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#13] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [6]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Arguments: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] + +(31) CometFilter +Input [6]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Condition : isnotnull(cd_demo_sk#15) + +(32) ColumnarToRow [codegen id : 4] +Input [6]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] + +(33) BroadcastExchange +Input [6]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#15] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 5] +Output [5]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] + +(36) HashAggregate [codegen id : 5] +Input [5]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Keys [5]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#21] +Results [6]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20, count#22] + +(37) Exchange +Input [6]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20, count#22] +Arguments: hashpartitioning(cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 6] +Input [6]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20, count#22] +Keys [5]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cd_purchase_estimate#19, cd_credit_rating#20] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#23] +Results [8]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, count(1)#23 AS cnt1#24, cd_purchase_estimate#19, count(1)#23 AS cnt2#25, cd_credit_rating#20, count(1)#23 AS cnt3#26] + +(39) TakeOrderedAndProject +Input [8]: [cd_gender#16, cd_marital_status#17, cd_education_status#18, cnt1#24, cd_purchase_estimate#19, cnt2#25, cd_credit_rating#20, cnt3#26] +Arguments: 100, [cd_gender#16 ASC NULLS FIRST, cd_marital_status#17 ASC NULLS FIRST, cd_education_status#18 ASC NULLS FIRST, cd_purchase_estimate#19 ASC NULLS FIRST, cd_credit_rating#20 ASC NULLS FIRST], [cd_gender#16, cd_marital_status#17, cd_education_status#18, cnt1#24, cd_purchase_estimate#19, cnt2#25, cd_credit_rating#20, cnt3#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q69.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q69.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f8b77163ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q69.native_datafusion/simplified.txt @@ -0,0 +1,53 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cnt1,cnt2,cnt3] + WholeStageCodegen (6) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,count] [count(1),cnt1,cnt2,cnt3,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] #1 + WholeStageCodegen (5) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [cs_ship_customer_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q7.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q7.native_datafusion/explain.txt new file mode 100644 index 0000000000..2802968d2e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q7.native_datafusion/explain.txt @@ -0,0 +1,162 @@ +== Physical Plan == +TakeOrderedAndProject (30) ++- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * ColumnarToRow (26) + +- CometProject (25) + +- CometBroadcastHashJoin (24) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (15) + +- CometBroadcastExchange (23) + +- CometProject (22) + +- CometFilter (21) + +- CometNativeScan: `spark_catalog`.`default`.`promotion` (20) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [ss_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8], [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7], [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] + +(15) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#15, i_item_id#16] +Arguments: [i_item_sk#15, i_item_id#16] + +(16) CometFilter +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) + +(17) CometBroadcastExchange +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: [i_item_sk#15, i_item_id#16] + +(18) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Right output [2]: [i_item_sk#15, i_item_id#16] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#15, i_item_id#16] +Arguments: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16], [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] + +(20) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Arguments: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(21) CometFilter +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) + +(22) CometProject +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Arguments: [p_promo_sk#17], [p_promo_sk#17] + +(23) CometBroadcastExchange +Input [1]: [p_promo_sk#17] +Arguments: [p_promo_sk#17] + +(24) CometBroadcastHashJoin +Left output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Right output [1]: [p_promo_sk#17] +Arguments: [ss_promo_sk#3], [p_promo_sk#17], Inner, BuildRight + +(25) CometProject +Input [7]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16, p_promo_sk#17] +Arguments: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] + +(26) ColumnarToRow [codegen id : 1] +Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] + +(27) HashAggregate [codegen id : 1] +Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] +Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] + +(28) Exchange +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(29) HashAggregate [codegen id : 2] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#16] +Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [4]: [avg(ss_quantity#4)#36, avg(UnscaledValue(ss_list_price#5))#37, avg(UnscaledValue(ss_coupon_amt#7))#38, avg(UnscaledValue(ss_sales_price#6))#39] +Results [5]: [i_item_id#16, avg(ss_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(ss_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(ss_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(ss_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] + +(30) TakeOrderedAndProject +Input [5]: [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q7.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q7.native_datafusion/simplified.txt new file mode 100644 index 0000000000..999cdc29ab --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q7.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + WholeStageCodegen (2) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(ss_quantity),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (1) + HashAggregate [i_item_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + CometBroadcastHashJoin [ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,p_promo_sk] + CometProject [ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #4 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [p_promo_sk] #5 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_email,p_channel_event] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk,p_channel_email,p_channel_event] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q70.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q70.native_datafusion/explain.txt new file mode 100644 index 0000000000..eef9f1004c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q70.native_datafusion/explain.txt @@ -0,0 +1,243 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * Project (44) + +- Window (43) + +- * Sort (42) + +- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Expand (37) + +- * Project (36) + +- * BroadcastHashJoin Inner BuildRight (35) + :- * ColumnarToRow (9) + : +- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : +- CometBroadcastExchange (6) + : +- CometProject (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + +- BroadcastExchange (34) + +- * BroadcastHashJoin LeftSemi BuildRight (33) + :- * ColumnarToRow (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (10) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- Window (29) + +- WindowGroupLimit (28) + +- * Sort (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * ColumnarToRow (23) + +- CometProject (22) + +- CometBroadcastHashJoin (21) + :- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (13) + : +- CometBroadcastExchange (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (15) + +- ReusedExchange (20) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Arguments: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4, d_month_seq#5] + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_store_sk#1, ss_net_profit#2], [ss_store_sk#1, ss_net_profit#2] + +(9) ColumnarToRow [codegen id : 5] +Input [2]: [ss_store_sk#1, ss_net_profit#2] + +(10) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: [s_store_sk#6, s_county#7, s_state#8] + +(11) CometFilter +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Condition : isnotnull(s_store_sk#6) + +(12) ColumnarToRow [codegen id : 4] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] + +(13) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Arguments: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] + +(14) CometFilter +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_store_sk#9) + +(15) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#12, s_state#13] +Arguments: [s_store_sk#12, s_state#13] + +(16) CometFilter +Input [2]: [s_store_sk#12, s_state#13] +Condition : isnotnull(s_store_sk#12) + +(17) CometBroadcastExchange +Input [2]: [s_store_sk#12, s_state#13] +Arguments: [s_store_sk#12, s_state#13] + +(18) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Right output [2]: [s_store_sk#12, s_state#13] +Arguments: [ss_store_sk#9], [s_store_sk#12], Inner, BuildRight + +(19) CometProject +Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] +Arguments: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13], [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] + +(20) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] +Right output [1]: [d_date_sk#14] +Arguments: [ss_sold_date_sk#11], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] +Arguments: [ss_net_profit#10, s_state#13], [ss_net_profit#10, s_state#13] + +(23) ColumnarToRow [codegen id : 1] +Input [2]: [ss_net_profit#10, s_state#13] + +(24) HashAggregate [codegen id : 1] +Input [2]: [ss_net_profit#10, s_state#13] +Keys [1]: [s_state#13] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [s_state#13, sum#16] + +(25) Exchange +Input [2]: [s_state#13, sum#16] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(26) HashAggregate [codegen id : 2] +Input [2]: [s_state#13, sum#16] +Keys [1]: [s_state#13] +Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17] +Results [3]: [s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w0#18, s_state#13] + +(27) Sort [codegen id : 2] +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [s_state#13 ASC NULLS FIRST, _w0#18 DESC NULLS LAST], false, 0 + +(28) WindowGroupLimit +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [s_state#13], [_w0#18 DESC NULLS LAST], rank(_w0#18), 5, Final + +(29) Window +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [rank(_w0#18) windowspecdefinition(s_state#13, _w0#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w0#18 DESC NULLS LAST] + +(30) Filter [codegen id : 3] +Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19] +Condition : (ranking#19 <= 5) + +(31) Project [codegen id : 3] +Output [1]: [s_state#13] +Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19] + +(32) BroadcastExchange +Input [1]: [s_state#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] + +(33) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [s_state#8] +Right keys [1]: [s_state#13] +Join type: LeftSemi +Join condition: None + +(34) BroadcastExchange +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(35) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#6] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 5] +Output [3]: [ss_net_profit#2, s_state#8, s_county#7] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] + +(37) Expand [codegen id : 5] +Input [3]: [ss_net_profit#2, s_state#8, s_county#7] +Arguments: [[ss_net_profit#2, s_state#8, s_county#7, 0], [ss_net_profit#2, s_state#8, null, 1], [ss_net_profit#2, null, null, 3]], [ss_net_profit#2, s_state#20, s_county#21, spark_grouping_id#22] + +(38) HashAggregate [codegen id : 5] +Input [4]: [ss_net_profit#2, s_state#20, s_county#21, spark_grouping_id#22] +Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#23] +Results [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] + +(39) Exchange +Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] +Arguments: hashpartitioning(s_state#20, s_county#21, spark_grouping_id#22, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 6] +Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] +Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#25] +Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS total_sum#26, s_state#20, s_county#21, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS lochierarchy#27, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS _w0#28, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS _w1#29, CASE WHEN (cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint) = 0) THEN s_state#20 END AS _w2#30] + +(41) Exchange +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30] +Arguments: hashpartitioning(_w1#29, _w2#30, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(42) Sort [codegen id : 7] +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30] +Arguments: [_w1#29 ASC NULLS FIRST, _w2#30 ASC NULLS FIRST, _w0#28 DESC NULLS LAST], false, 0 + +(43) Window +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30] +Arguments: [rank(_w0#28) windowspecdefinition(_w1#29, _w2#30, _w0#28 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#29, _w2#30], [_w0#28 DESC NULLS LAST] + +(44) Project [codegen id : 8] +Output [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] +Input [8]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30, rank_within_parent#31] + +(45) TakeOrderedAndProject +Input [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] +Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (lochierarchy#27 = 0) THEN s_state#20 END ASC NULLS FIRST, rank_within_parent#31 ASC NULLS FIRST], [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q70.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q70.native_datafusion/simplified.txt new file mode 100644 index 0000000000..fdf0219cc5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q70.native_datafusion/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] + WholeStageCodegen (8) + Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (7) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (6) + HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w0,_w1,_w2,sum] + InputAdapter + Exchange [s_state,s_county,spark_grouping_id] #2 + WholeStageCodegen (5) + HashAggregate [s_state,s_county,spark_grouping_id,ss_net_profit] [sum,sum] + Expand [ss_net_profit,s_state,s_county] + Project [ss_net_profit,s_state,s_county] + BroadcastHashJoin [ss_store_sk,s_store_sk] + ColumnarToRow + InputAdapter + CometProject [ss_store_sk,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + BroadcastHashJoin [s_state,s_state] + ColumnarToRow + InputAdapter + CometFilter [s_store_sk,s_county,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w0,s_state] + WindowGroupLimit [s_state,_w0] + WholeStageCodegen (2) + Sort [s_state,_w0] + HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),_w0,s_state,sum] + InputAdapter + Exchange [s_state] #6 + WholeStageCodegen (1) + HashAggregate [s_state,ss_net_profit] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_net_profit,s_state] + CometBroadcastHashJoin [ss_net_profit,ss_sold_date_sk,s_state,d_date_sk] + CometProject [ss_net_profit,ss_sold_date_sk,s_state] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,s_store_sk,s_state] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk,s_state] #7 + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q71.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q71.native_datafusion/explain.txt new file mode 100644 index 0000000000..117ac1349d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q71.native_datafusion/explain.txt @@ -0,0 +1,201 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometSort (37) + +- CometColumnarExchange (36) + +- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * ColumnarToRow (32) + +- CometProject (31) + +- CometBroadcastHashJoin (30) + :- CometProject (25) + : +- CometBroadcastHashJoin (24) + : :- CometBroadcastExchange (4) + : : +- CometProject (3) + : : +- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : +- CometUnion (23) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometFilter (6) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (5) + : : +- CometBroadcastExchange (10) + : : +- CometProject (9) + : : +- CometFilter (8) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (7) + : :- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (13) + : : +- ReusedExchange (15) + : +- CometProject (22) + : +- CometBroadcastHashJoin (21) + : :- CometFilter (19) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (18) + : +- ReusedExchange (20) + +- CometBroadcastExchange (29) + +- CometProject (28) + +- CometFilter (27) + +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (26) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Arguments: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(2) CometFilter +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Condition : ((isnotnull(i_manager_id#4) AND (i_manager_id#4 = 1)) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Arguments: [i_item_sk#1, i_brand_id#2, i_brand#3], [i_item_sk#1, i_brand_id#2, i_brand#3] + +(4) CometBroadcastExchange +Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Arguments: [i_item_sk#1, i_brand_id#2, i_brand#3] + +(5) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Arguments: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] + +(6) CometFilter +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Condition : (isnotnull(ws_item_sk#6) AND isnotnull(ws_sold_time_sk#5)) + +(7) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9, d_year#10, d_moy#11] + +(8) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(9) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(10) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(11) CometBroadcastHashJoin +Left output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ws_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(12) CometProject +Input [5]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8, d_date_sk#9] +Arguments: [ext_price#12, sold_item_sk#13, time_sk#14], [ws_ext_sales_price#7 AS ext_price#12, ws_item_sk#6 AS sold_item_sk#13, ws_sold_time_sk#5 AS time_sk#14] + +(13) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Arguments: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] + +(14) CometFilter +Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_sold_time_sk#15)) + +(15) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#19] + +(16) CometBroadcastHashJoin +Left output [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [cs_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(17) CometProject +Input [5]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Arguments: [ext_price#20, sold_item_sk#21, time_sk#22], [cs_ext_sales_price#17 AS ext_price#20, cs_item_sk#16 AS sold_item_sk#21, cs_sold_time_sk#15 AS time_sk#22] + +(18) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Arguments: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] + +(19) CometFilter +Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_time_sk#23)) + +(20) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#27] + +(21) CometBroadcastHashJoin +Left output [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Right output [1]: [d_date_sk#27] +Arguments: [ss_sold_date_sk#26], [d_date_sk#27], Inner, BuildRight + +(22) CometProject +Input [5]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26, d_date_sk#27] +Arguments: [ext_price#28, sold_item_sk#29, time_sk#30], [ss_ext_sales_price#25 AS ext_price#28, ss_item_sk#24 AS sold_item_sk#29, ss_sold_time_sk#23 AS time_sk#30] + +(23) CometUnion +Child 0 Input [3]: [ext_price#12, sold_item_sk#13, time_sk#14] +Child 1 Input [3]: [ext_price#20, sold_item_sk#21, time_sk#22] +Child 2 Input [3]: [ext_price#28, sold_item_sk#29, time_sk#30] + +(24) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Right output [3]: [ext_price#12, sold_item_sk#13, time_sk#14] +Arguments: [i_item_sk#1], [sold_item_sk#13], Inner, BuildLeft + +(25) CometProject +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#12, sold_item_sk#13, time_sk#14] +Arguments: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14], [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14] + +(26) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Arguments: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(27) CometFilter +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Condition : (((t_meal_time#34 = breakfast ) OR (t_meal_time#34 = dinner )) AND isnotnull(t_time_sk#31)) + +(28) CometProject +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Arguments: [t_time_sk#31, t_hour#32, t_minute#33], [t_time_sk#31, t_hour#32, t_minute#33] + +(29) CometBroadcastExchange +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [t_time_sk#31, t_hour#32, t_minute#33] + +(30) CometBroadcastHashJoin +Left output [4]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14] +Right output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [time_sk#14], [t_time_sk#31], Inner, BuildRight + +(31) CometProject +Input [7]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14, t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33], [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] + +(32) ColumnarToRow [codegen id : 1] +Input [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] + +(33) HashAggregate [codegen id : 1] +Input [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [partial_sum(UnscaledValue(ext_price#12))] +Aggregate Attributes [1]: [sum#35] +Results [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#36] + +(34) Exchange +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#36] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(35) HashAggregate [codegen id : 2] +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#36] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [sum(UnscaledValue(ext_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#12))#37] +Results [5]: [i_brand_id#2 AS brand_id#38, i_brand#3 AS brand#39, t_hour#32, t_minute#33, MakeDecimal(sum(UnscaledValue(ext_price#12))#37,17,2) AS ext_price#40] + +(36) CometColumnarExchange +Input [5]: [brand_id#38, brand#39, t_hour#32, t_minute#33, ext_price#40] +Arguments: rangepartitioning(ext_price#40 DESC NULLS LAST, brand_id#38 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(37) CometSort +Input [5]: [brand_id#38, brand#39, t_hour#32, t_minute#33, ext_price#40] +Arguments: [brand_id#38, brand#39, t_hour#32, t_minute#33, ext_price#40], [ext_price#40 DESC NULLS LAST, brand_id#38 ASC NULLS FIRST] + +(38) ColumnarToRow [codegen id : 3] +Input [5]: [brand_id#38, brand#39, t_hour#32, t_minute#33, ext_price#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q71.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q71.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6fd882ac75 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q71.native_datafusion/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [brand_id,brand,t_hour,t_minute,ext_price] + CometColumnarExchange [ext_price,brand_id] #1 + WholeStageCodegen (2) + HashAggregate [i_brand,i_brand_id,t_hour,t_minute,sum] [sum(UnscaledValue(ext_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 + WholeStageCodegen (1) + HashAggregate [i_brand,i_brand_id,t_hour,t_minute,ext_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [i_brand_id,i_brand,ext_price,t_hour,t_minute] + CometBroadcastHashJoin [i_brand_id,i_brand,ext_price,time_sk,t_time_sk,t_hour,t_minute] + CometProject [i_brand_id,i_brand,ext_price,time_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_brand,ext_price,sold_item_sk,time_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_brand] #3 + CometProject [i_item_sk,i_brand_id,i_brand] + CometFilter [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_brand,i_manager_id] + CometUnion [ext_price,sold_item_sk,time_sk] + CometProject [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] [ext_price,sold_item_sk,time_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk,d_date_sk] + CometFilter [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometProject [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] [ext_price,sold_item_sk,time_sk] + CometBroadcastHashJoin [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,d_date_sk] + CometFilter [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #4 + CometProject [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] [ext_price,sold_item_sk,time_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,d_date_sk] + CometFilter [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #4 + CometBroadcastExchange [t_time_sk,t_hour,t_minute] #5 + CometProject [t_time_sk,t_hour,t_minute] + CometFilter [t_time_sk,t_hour,t_minute,t_meal_time] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute,t_meal_time] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q72.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q72.native_datafusion/explain.txt new file mode 100644 index 0000000000..2ef4c4fa34 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q72.native_datafusion/explain.txt @@ -0,0 +1,323 @@ +== Physical Plan == +* ColumnarToRow (62) ++- CometTakeOrderedAndProject (61) + +- CometHashAggregate (60) + +- CometExchange (59) + +- CometHashAggregate (58) + +- CometProject (57) + +- CometSortMergeJoin (56) + :- CometSort (50) + : +- CometExchange (49) + : +- CometProject (48) + : +- CometBroadcastHashJoin (47) + : :- CometProject (43) + : : +- CometBroadcastHashJoin (42) + : : :- CometProject (38) + : : : +- CometBroadcastHashJoin (37) + : : : :- CometProject (33) + : : : : +- CometBroadcastHashJoin (32) + : : : : :- CometProject (27) + : : : : : +- CometBroadcastHashJoin (26) + : : : : : :- CometProject (21) + : : : : : : +- CometBroadcastHashJoin (20) + : : : : : : :- CometProject (15) + : : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : : :- CometProject (12) + : : : : : : : : +- CometBroadcastHashJoin (11) + : : : : : : : : :- CometProject (7) + : : : : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : : : : :- CometFilter (2) + : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : : : : : : +- CometBroadcastExchange (5) + : : : : : : : : : +- CometFilter (4) + : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (3) + : : : : : : : : +- CometBroadcastExchange (10) + : : : : : : : : +- CometFilter (9) + : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (8) + : : : : : : : +- ReusedExchange (13) + : : : : : : +- CometBroadcastExchange (19) + : : : : : : +- CometProject (18) + : : : : : : +- CometFilter (17) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (16) + : : : : : +- CometBroadcastExchange (25) + : : : : : +- CometProject (24) + : : : : : +- CometFilter (23) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (22) + : : : : +- CometBroadcastExchange (31) + : : : : +- CometProject (30) + : : : : +- CometFilter (29) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (28) + : : : +- CometBroadcastExchange (36) + : : : +- CometFilter (35) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (34) + : : +- CometBroadcastExchange (41) + : : +- CometFilter (40) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (39) + : +- CometBroadcastExchange (46) + : +- CometFilter (45) + : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (44) + +- CometSort (55) + +- CometExchange (54) + +- CometProject (53) + +- CometFilter (52) + +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (51) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] + +(2) CometFilter +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(4) CometFilter +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Condition : ((isnotnull(inv_quantity_on_hand#11) AND isnotnull(inv_item_sk#9)) AND isnotnull(inv_warehouse_sk#10)) + +(5) CometBroadcastExchange +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(6) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Right output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [cs_item_sk#4], [inv_item_sk#9], Inner, (inv_quantity_on_hand#11 < cs_quantity#7), BuildRight + +(7) CometProject +Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] + +(8) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [w_warehouse_sk#13, w_warehouse_name#14] + +(9) CometFilter +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Condition : isnotnull(w_warehouse_sk#13) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [w_warehouse_sk#13, w_warehouse_name#14] + +(11) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] +Right output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [inv_warehouse_sk#10], [w_warehouse_sk#13], Inner, BuildRight + +(12) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12, w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] + +(13) ReusedExchange [Reuses operator id: 10] +Output [2]: [i_item_sk#15, i_item_desc#16] + +(14) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] +Right output [2]: [i_item_sk#15, i_item_desc#16] +Arguments: [cs_item_sk#4], [i_item_sk#15], Inner, BuildRight + +(15) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_sk#15, i_item_desc#16] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(16) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [2]: [cd_demo_sk#17, cd_marital_status#18] +Arguments: [cd_demo_sk#17, cd_marital_status#18] + +(17) CometFilter +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Condition : ((isnotnull(cd_marital_status#18) AND (cd_marital_status#18 = D)) AND isnotnull(cd_demo_sk#17)) + +(18) CometProject +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Arguments: [cd_demo_sk#17], [cd_demo_sk#17] + +(19) CometBroadcastExchange +Input [1]: [cd_demo_sk#17] +Arguments: [cd_demo_sk#17] + +(20) CometBroadcastHashJoin +Left output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [1]: [cd_demo_sk#17] +Arguments: [cs_bill_cdemo_sk#2], [cd_demo_sk#17], Inner, BuildRight + +(21) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, cd_demo_sk#17] +Arguments: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(22) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#19, hd_buy_potential#20] +Arguments: [hd_demo_sk#19, hd_buy_potential#20] + +(23) CometFilter +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Condition : ((isnotnull(hd_buy_potential#20) AND (hd_buy_potential#20 = >10000 )) AND isnotnull(hd_demo_sk#19)) + +(24) CometProject +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Arguments: [hd_demo_sk#19], [hd_demo_sk#19] + +(25) CometBroadcastExchange +Input [1]: [hd_demo_sk#19] +Arguments: [hd_demo_sk#19] + +(26) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [1]: [hd_demo_sk#19] +Arguments: [cs_bill_hdemo_sk#3], [hd_demo_sk#19], Inner, BuildRight + +(27) CometProject +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, hd_demo_sk#19] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(28) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(29) CometFilter +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Condition : ((((isnotnull(d_year#24) AND (d_year#24 = 1999)) AND isnotnull(d_date_sk#21)) AND isnotnull(d_week_seq#23)) AND isnotnull(d_date#22)) + +(30) CometProject +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23], [d_date_sk#21, d_date#22, d_week_seq#23] + +(31) CometBroadcastExchange +Input [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23] + +(32) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [cs_sold_date_sk#8], [d_date_sk#21], Inner, BuildRight + +(33) CometProject +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] + +(34) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_date_sk#25, d_week_seq#26] + +(35) CometFilter +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) + +(36) CometBroadcastExchange +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_date_sk#25, d_week_seq#26] + +(37) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Right output [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_week_seq#23, inv_date_sk#12], [d_week_seq#26, d_date_sk#25], Inner, BuildRight + +(38) CometProject +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#25, d_week_seq#26] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] + +(39) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#27, d_date#28] +Arguments: [d_date_sk#27, d_date#28] + +(40) CometFilter +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) + +(41) CometBroadcastExchange +Input [2]: [d_date_sk#27, d_date#28] +Arguments: [d_date_sk#27, d_date#28] + +(42) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Right output [2]: [d_date_sk#27, d_date#28] +Arguments: [cs_ship_date_sk#1], [d_date_sk#27], Inner, (d_date#28 > date_add(d_date#22, 5)), BuildRight + +(43) CometProject +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#27, d_date#28] +Arguments: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(44) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [1]: [p_promo_sk#29] +Arguments: [p_promo_sk#29] + +(45) CometFilter +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) + +(46) CometBroadcastExchange +Input [1]: [p_promo_sk#29] +Arguments: [p_promo_sk#29] + +(47) CometBroadcastHashJoin +Left output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Right output [1]: [p_promo_sk#29] +Arguments: [cs_promo_sk#5], [p_promo_sk#29], LeftOuter, BuildRight + +(48) CometProject +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, p_promo_sk#29] +Arguments: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(49) CometExchange +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(50) CometSort +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST] + +(51) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Arguments: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(52) CometFilter +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) + +(53) CometProject +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Arguments: [cr_item_sk#30, cr_order_number#31], [cr_item_sk#30, cr_order_number#31] + +(54) CometExchange +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(55) CometSort +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30, cr_order_number#31], [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST] + +(56) CometSortMergeJoin +Left output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Right output [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cs_item_sk#4, cs_order_number#6], [cr_item_sk#30, cr_order_number#31], LeftOuter + +(57) CometProject +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, cr_item_sk#30, cr_order_number#31] +Arguments: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(58) CometHashAggregate +Input [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [partial_count(1)] + +(59) CometExchange +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#33] +Arguments: hashpartitioning(i_item_desc#16, w_warehouse_name#14, d_week_seq#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(60) CometHashAggregate +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#33] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [count(1)] + +(61) CometTakeOrderedAndProject +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[total_cnt#36 DESC NULLS LAST,i_item_desc#16 ASC NULLS FIRST,w_warehouse_name#14 ASC NULLS FIRST,d_week_seq#23 ASC NULLS FIRST], output=[i_item_desc#16,w_warehouse_name#14,d_week_seq#23,no_promo#34,promo#35,total_cnt#36]), [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36], 100, [total_cnt#36 DESC NULLS LAST, i_item_desc#16 ASC NULLS FIRST, w_warehouse_name#14 ASC NULLS FIRST, d_week_seq#23 ASC NULLS FIRST], [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] + +(62) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q72.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q72.native_datafusion/simplified.txt new file mode 100644 index 0000000000..e70defef63 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q72.native_datafusion/simplified.txt @@ -0,0 +1,64 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo,total_cnt] + CometHashAggregate [i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo,total_cnt,count,count(1)] + CometExchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + CometHashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] + CometProject [w_warehouse_name,i_item_desc,d_week_seq] + CometSortMergeJoin [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq,cr_item_sk,cr_order_number] + CometSort [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometExchange [cs_item_sk,cs_order_number] #2 + CometProject [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq,p_promo_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq,d_date_sk,d_date] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq,d_date_sk,d_week_seq] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,d_date_sk,d_date,d_week_seq] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,hd_demo_sk] + CometProject [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,cd_demo_sk] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_sk,i_item_desc] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk,w_warehouse_sk,w_warehouse_name] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometFilter [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] #3 + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + ReusedExchange [i_item_sk,i_item_desc] #4 + CometBroadcastExchange [cd_demo_sk] #5 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_marital_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status] + CometBroadcastExchange [hd_demo_sk] #6 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_buy_potential] + CometBroadcastExchange [d_date_sk,d_date,d_week_seq] #7 + CometProject [d_date_sk,d_date,d_week_seq] + CometFilter [d_date_sk,d_date,d_week_seq,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_week_seq,d_year] + CometBroadcastExchange [d_date_sk,d_week_seq] #8 + CometFilter [d_date_sk,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq] + CometBroadcastExchange [d_date_sk,d_date] #9 + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [p_promo_sk] #10 + CometFilter [p_promo_sk] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk] + CometSort [cr_item_sk,cr_order_number] + CometExchange [cr_item_sk,cr_order_number] #11 + CometProject [cr_item_sk,cr_order_number] + CometFilter [cr_item_sk,cr_order_number,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q73.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q73.native_datafusion/explain.txt new file mode 100644 index 0000000000..13f2c1319f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q73.native_datafusion/explain.txt @@ -0,0 +1,168 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometSort (31) + +- CometColumnarExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (24) + : +- CometHashAggregate (23) + : +- CometExchange (22) + : +- CometHashAggregate (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6, d_year#7, d_dom#8] + +(4) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : ((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9, s_county#10] + +(10) CometFilter +Input [2]: [s_store_sk#9, s_county#10] +Condition : (s_county#10 IN (Williamson County,Franklin Parish,Bronx County,Orange County) AND isnotnull(s_store_sk#9)) + +(11) CometProject +Input [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9], [s_store_sk#9] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: [s_store_sk#9] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Right output [1]: [s_store_sk#9] +Arguments: [ss_store_sk#3], [s_store_sk#9], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(16) CometFilter +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(hd_dep_count#13 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(hd_vehicle_count#14 as double)))))) > 1.0) END) AND isnotnull(hd_demo_sk#11)) + +(17) CometProject +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11], [hd_demo_sk#11] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: [hd_demo_sk#11] + +(19) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Right output [1]: [hd_demo_sk#11] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#11], Inner, BuildRight + +(20) CometProject +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Arguments: [ss_customer_sk#1, ss_ticket_number#4], [ss_customer_sk#1, ss_ticket_number#4] + +(21) CometHashAggregate +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] + +(22) CometExchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] + +(24) CometFilter +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Condition : ((cnt#16 >= 1) AND (cnt#16 <= 5)) + +(25) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(26) CometFilter +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Condition : isnotnull(c_customer_sk#17) + +(27) CometBroadcastExchange +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(28) CometBroadcastHashJoin +Left output [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Right output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [ss_customer_sk#1], [c_customer_sk#17], Inner, BuildRight + +(29) CometProject +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16, c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + +(30) CometColumnarExchange +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: rangepartitioning(cnt#16 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(31) CometSort +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [cnt#16 DESC NULLS LAST] + +(32) ColumnarToRow [codegen id : 1] +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q73.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q73.native_datafusion/simplified.txt new file mode 100644 index 0000000000..35a931bfdf --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q73.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometColumnarExchange [cnt] #1 + CometProject [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,cnt,c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometFilter [ss_ticket_number,ss_customer_sk,cnt] + CometHashAggregate [ss_ticket_number,ss_customer_sk,cnt,count,count(1)] + CometExchange [ss_ticket_number,ss_customer_sk] #2 + CometHashAggregate [ss_ticket_number,ss_customer_sk,count] + CometProject [ss_customer_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_county] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_county] + CometBroadcastExchange [hd_demo_sk] #5 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] #6 + CometFilter [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q74.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q74.native_datafusion/explain.txt new file mode 100644 index 0000000000..e9b83cae94 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q74.native_datafusion/explain.txt @@ -0,0 +1,251 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * BroadcastHashJoin Inner BuildRight (33) + : : :- * Filter (17) + : : : +- * HashAggregate (16) + : : : +- Exchange (15) + : : : +- * HashAggregate (14) + : : : +- * ColumnarToRow (13) + : : : +- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- BroadcastExchange (32) + : : +- * HashAggregate (31) + : : +- Exchange (30) + : : +- * HashAggregate (29) + : : +- * ColumnarToRow (28) + : : +- CometProject (27) + : : +- CometBroadcastHashJoin (26) + : : :- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometFilter (19) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (18) + : : : +- ReusedExchange (20) + : : +- CometBroadcastExchange (25) + : : +- CometFilter (24) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (23) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * HashAggregate (35) + : +- ReusedExchange (34) + +- BroadcastExchange (42) + +- * HashAggregate (41) + +- ReusedExchange (40) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Arguments: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(2) CometFilter +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(4) CometFilter +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_customer_sk#5) + +(5) CometBroadcastExchange +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Right output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [c_customer_sk#1], [ss_customer_sk#5], Inner, BuildRight + +(7) CometProject +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7], [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#8, d_year#9] +Arguments: [d_date_sk#8, d_year#9] + +(9) CometFilter +Input [2]: [d_date_sk#8, d_year#9] +Condition : (((isnotnull(d_year#9) AND (d_year#9 = 2001)) AND d_year#9 IN (2001,2002)) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_year#9] +Arguments: [d_date_sk#8, d_year#9] + +(11) CometBroadcastHashJoin +Left output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_year#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9], [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] + +(13) ColumnarToRow [codegen id : 1] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] + +(14) HashAggregate [codegen id : 1] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))] +Aggregate Attributes [1]: [sum#10] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] + +(15) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(16) HashAggregate [codegen id : 8] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [sum(UnscaledValue(ss_net_paid#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#6))#12] +Results [2]: [c_customer_id#2 AS customer_id#13, MakeDecimal(sum(UnscaledValue(ss_net_paid#6))#12,17,2) AS year_total#14] + +(17) Filter [codegen id : 8] +Input [2]: [customer_id#13, year_total#14] +Condition : (isnotnull(year_total#14) AND (year_total#14 > 0.00)) + +(18) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Arguments: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] + +(19) CometFilter +Input [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_customer_id#16)) + +(20) ReusedExchange [Reuses operator id: 5] +Output [3]: [ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] + +(21) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Right output [3]: [ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] +Arguments: [c_customer_sk#15], [ss_customer_sk#19], Inner, BuildRight + +(22) CometProject +Input [7]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18, ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] +Arguments: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21], [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21] + +(23) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#22, d_year#23] +Arguments: [d_date_sk#22, d_year#23] + +(24) CometFilter +Input [2]: [d_date_sk#22, d_year#23] +Condition : (((isnotnull(d_year#23) AND (d_year#23 = 2002)) AND d_year#23 IN (2001,2002)) AND isnotnull(d_date_sk#22)) + +(25) CometBroadcastExchange +Input [2]: [d_date_sk#22, d_year#23] +Arguments: [d_date_sk#22, d_year#23] + +(26) CometBroadcastHashJoin +Left output [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21] +Right output [2]: [d_date_sk#22, d_year#23] +Arguments: [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + +(27) CometProject +Input [7]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21, d_date_sk#22, d_year#23] +Arguments: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23], [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] + +(28) ColumnarToRow [codegen id : 2] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] + +(29) HashAggregate [codegen id : 2] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] +Keys [4]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#20))] +Aggregate Attributes [1]: [sum#24] +Results [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] + +(30) Exchange +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] +Arguments: hashpartitioning(c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(31) HashAggregate [codegen id : 3] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] +Keys [4]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23] +Functions [1]: [sum(UnscaledValue(ss_net_paid#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#20))#12] +Results [4]: [c_customer_id#16 AS customer_id#26, c_first_name#17 AS customer_first_name#27, c_last_name#18 AS customer_last_name#28, MakeDecimal(sum(UnscaledValue(ss_net_paid#20))#12,17,2) AS year_total#29] + +(32) BroadcastExchange +Input [4]: [customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#26] +Join type: Inner +Join condition: None + +(34) ReusedExchange [Reuses operator id: 15] +Output [5]: [c_customer_id#30, c_first_name#31, c_last_name#32, d_year#33, sum#34] + +(35) HashAggregate [codegen id : 5] +Input [5]: [c_customer_id#30, c_first_name#31, c_last_name#32, d_year#33, sum#34] +Keys [4]: [c_customer_id#30, c_first_name#31, c_last_name#32, d_year#33] +Functions [1]: [sum(UnscaledValue(ws_net_paid#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#35))#36] +Results [2]: [c_customer_id#30 AS customer_id#37, MakeDecimal(sum(UnscaledValue(ws_net_paid#35))#36,17,2) AS year_total#38] + +(36) Filter [codegen id : 5] +Input [2]: [customer_id#37, year_total#38] +Condition : (isnotnull(year_total#38) AND (year_total#38 > 0.00)) + +(37) BroadcastExchange +Input [2]: [customer_id#37, year_total#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#37] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 8] +Output [7]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#38] +Input [8]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, customer_id#37, year_total#38] + +(40) ReusedExchange [Reuses operator id: 30] +Output [5]: [c_customer_id#39, c_first_name#40, c_last_name#41, d_year#42, sum#43] + +(41) HashAggregate [codegen id : 7] +Input [5]: [c_customer_id#39, c_first_name#40, c_last_name#41, d_year#42, sum#43] +Keys [4]: [c_customer_id#39, c_first_name#40, c_last_name#41, d_year#42] +Functions [1]: [sum(UnscaledValue(ws_net_paid#44))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#44))#36] +Results [2]: [c_customer_id#39 AS customer_id#45, MakeDecimal(sum(UnscaledValue(ws_net_paid#44))#36,17,2) AS year_total#46] + +(42) BroadcastExchange +Input [2]: [customer_id#45, year_total#46] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5] + +(43) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#45] +Join type: Inner +Join condition: (CASE WHEN (year_total#38 > 0.00) THEN (year_total#46 / year_total#38) END > CASE WHEN (year_total#14 > 0.00) THEN (year_total#29 / year_total#14) END) + +(44) Project [codegen id : 8] +Output [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Input [9]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#38, customer_id#45, year_total#46] + +(45) TakeOrderedAndProject +Input [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Arguments: 100, [customer_id#26 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST], [customer_id#26, customer_first_name#27, customer_last_name#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q74.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q74.native_datafusion/simplified.txt new file mode 100644 index 0000000000..20e538e1bd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q74.native_datafusion/simplified.txt @@ -0,0 +1,60 @@ +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] + WholeStageCodegen (8) + Project [customer_id,customer_first_name,customer_last_name] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen (1) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometBroadcastExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,customer_first_name,customer_last_name,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen (2) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name] + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + ReusedExchange [c_customer_id,c_first_name,c_last_name,d_year,sum] #1 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + ReusedExchange [c_customer_id,c_first_name,c_last_name,d_year,sum] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q75.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q75.native_datafusion/explain.txt new file mode 100644 index 0000000000..e0339b1928 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q75.native_datafusion/explain.txt @@ -0,0 +1,456 @@ +== Physical Plan == +TakeOrderedAndProject (87) ++- * Project (86) + +- * SortMergeJoin Inner (85) + :- * Sort (45) + : +- Exchange (44) + : +- * Filter (43) + : +- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * ColumnarToRow (39) + : +- CometHashAggregate (38) + : +- CometExchange (37) + : +- CometHashAggregate (36) + : +- CometUnion (35) + : :- CometProject (22) + : : +- CometSortMergeJoin (21) + : : :- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + : : +- CometSort (20) + : : +- CometExchange (19) + : : +- CometProject (18) + : : +- CometFilter (17) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (16) + : :- CometProject (28) + : : +- CometSortMergeJoin (27) + : : :- CometSort (24) + : : : +- ReusedExchange (23) + : : +- CometSort (26) + : : +- ReusedExchange (25) + : +- CometProject (34) + : +- CometSortMergeJoin (33) + : :- CometSort (30) + : : +- ReusedExchange (29) + : +- CometSort (32) + : +- ReusedExchange (31) + +- * Sort (84) + +- Exchange (83) + +- * Filter (82) + +- * HashAggregate (81) + +- Exchange (80) + +- * HashAggregate (79) + +- * ColumnarToRow (78) + +- CometHashAggregate (77) + +- CometExchange (76) + +- CometHashAggregate (75) + +- CometUnion (74) + :- CometProject (61) + : +- CometSortMergeJoin (60) + : :- CometSort (57) + : : +- CometExchange (56) + : : +- CometProject (55) + : : +- CometBroadcastHashJoin (54) + : : :- CometProject (50) + : : : +- CometBroadcastHashJoin (49) + : : : :- CometFilter (47) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (46) + : : : +- ReusedExchange (48) + : : +- CometBroadcastExchange (53) + : : +- CometFilter (52) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (51) + : +- CometSort (59) + : +- ReusedExchange (58) + :- CometProject (67) + : +- CometSortMergeJoin (66) + : :- CometSort (63) + : : +- ReusedExchange (62) + : +- CometSort (65) + : +- ReusedExchange (64) + +- CometProject (73) + +- CometSortMergeJoin (72) + :- CometSort (69) + : +- ReusedExchange (68) + +- CometSort (71) + +- ReusedExchange (70) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] + +(2) CometFilter +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(4) CometFilter +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books )) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(5) CometProject +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(6) CometBroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(7) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Right output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [cs_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(8) CometProject +Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#12, d_year#13] +Arguments: [d_date_sk#12, d_year#13] + +(10) CometFilter +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2002)) AND isnotnull(d_date_sk#12)) + +(11) CometBroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: [d_date_sk#12, d_year#13] + +(12) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right output [2]: [d_date_sk#12, d_year#13] +Arguments: [cs_sold_date_sk#5], [d_date_sk#12], Inner, BuildRight + +(13) CometProject +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#12, d_year#13] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13], [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] + +(14) CometExchange +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometSort +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13], [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST] + +(16) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(17) CometFilter +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Condition : (isnotnull(cr_order_number#15) AND isnotnull(cr_item_sk#14)) + +(18) CometProject +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17], [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] + +(19) CometExchange +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: hashpartitioning(cr_order_number#15, cr_item_sk#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometSort +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17], [cr_order_number#15 ASC NULLS FIRST, cr_item_sk#14 ASC NULLS FIRST] + +(21) CometSortMergeJoin +Left output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Right output [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cs_order_number#2, cs_item_sk#1], [cr_order_number#15, cr_item_sk#14], LeftOuter + +(22) CometProject +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13, cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20], [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#3 - coalesce(cr_return_quantity#16, 0)) AS sales_cnt#19, (cs_ext_sales_price#4 - coalesce(cr_return_amount#17, 0.00)) AS sales_amt#20] + +(23) ReusedExchange [Reuses operator id: 14] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29] + +(24) CometSort +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29] +Arguments: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29], [ss_ticket_number#22 ASC NULLS FIRST, ss_item_sk#21 ASC NULLS FIRST] + +(25) ReusedExchange [Reuses operator id: 19] +Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(26) CometSort +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33], [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST] + +(27) CometSortMergeJoin +Left output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29] +Right output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [ss_ticket_number#22, ss_item_sk#21], [sr_ticket_number#31, sr_item_sk#30], LeftOuter + +(28) CometProject +Input [13]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [d_year#29, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, sales_cnt#34, sales_amt#35], [d_year#29, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, (ss_quantity#23 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#34, (ss_ext_sales_price#24 - coalesce(sr_return_amt#33, 0.00)) AS sales_amt#35] + +(29) ReusedExchange [Reuses operator id: 14] +Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44] + +(30) CometSort +Input [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44] +Arguments: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44], [ws_order_number#37 ASC NULLS FIRST, ws_item_sk#36 ASC NULLS FIRST] + +(31) ReusedExchange [Reuses operator id: 19] +Output [4]: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] + +(32) CometSort +Input [4]: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] +Arguments: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48], [wr_order_number#46 ASC NULLS FIRST, wr_item_sk#45 ASC NULLS FIRST] + +(33) CometSortMergeJoin +Left output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44] +Right output [4]: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] +Arguments: [ws_order_number#37, ws_item_sk#36], [wr_order_number#46, wr_item_sk#45], LeftOuter + +(34) CometProject +Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44, wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] +Arguments: [d_year#44, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, sales_cnt#49, sales_amt#50], [d_year#44, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, (ws_quantity#38 - coalesce(wr_return_quantity#47, 0)) AS sales_cnt#49, (ws_ext_sales_price#39 - coalesce(wr_return_amt#48, 0.00)) AS sales_amt#50] + +(35) CometUnion +Child 0 Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Child 1 Input [7]: [d_year#29, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, sales_cnt#34, sales_amt#35] +Child 2 Input [7]: [d_year#44, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, sales_cnt#49, sales_amt#50] + +(36) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] + +(37) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(38) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] + +(39) ColumnarToRow [codegen id : 1] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] + +(40) HashAggregate [codegen id : 1] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54] + +(41) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(42) HashAggregate [codegen id : 2] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum(sales_cnt#19)#55, sum(UnscaledValue(sales_amt#20))#56] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(sales_cnt#19)#55 AS sales_cnt#57, MakeDecimal(sum(UnscaledValue(sales_amt#20))#56,18,2) AS sales_amt#58] + +(43) Filter [codegen id : 2] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#57, sales_amt#58] +Condition : isnotnull(sales_cnt#57) + +(44) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#57, sales_amt#58] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(45) Sort [codegen id : 3] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#57, sales_amt#58] +Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0 + +(46) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, cs_sold_date_sk#63] +Arguments: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, cs_sold_date_sk#63] + +(47) CometFilter +Input [5]: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, cs_sold_date_sk#63] +Condition : isnotnull(cs_item_sk#59) + +(48) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#64, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68] + +(49) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, cs_sold_date_sk#63] +Right output [5]: [i_item_sk#64, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68] +Arguments: [cs_item_sk#59], [i_item_sk#64], Inner, BuildRight + +(50) CometProject +Input [10]: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, cs_sold_date_sk#63, i_item_sk#64, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68] +Arguments: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, cs_sold_date_sk#63, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68], [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, cs_sold_date_sk#63, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68] + +(51) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#69, d_year#70] +Arguments: [d_date_sk#69, d_year#70] + +(52) CometFilter +Input [2]: [d_date_sk#69, d_year#70] +Condition : ((isnotnull(d_year#70) AND (d_year#70 = 2001)) AND isnotnull(d_date_sk#69)) + +(53) CometBroadcastExchange +Input [2]: [d_date_sk#69, d_year#70] +Arguments: [d_date_sk#69, d_year#70] + +(54) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, cs_sold_date_sk#63, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68] +Right output [2]: [d_date_sk#69, d_year#70] +Arguments: [cs_sold_date_sk#63], [d_date_sk#69], Inner, BuildRight + +(55) CometProject +Input [11]: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, cs_sold_date_sk#63, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, d_date_sk#69, d_year#70] +Arguments: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, d_year#70], [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, d_year#70] + +(56) CometExchange +Input [9]: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, d_year#70] +Arguments: hashpartitioning(cs_order_number#60, cs_item_sk#59, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(57) CometSort +Input [9]: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, d_year#70] +Arguments: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, d_year#70], [cs_order_number#60 ASC NULLS FIRST, cs_item_sk#59 ASC NULLS FIRST] + +(58) ReusedExchange [Reuses operator id: 19] +Output [4]: [cr_item_sk#71, cr_order_number#72, cr_return_quantity#73, cr_return_amount#74] + +(59) CometSort +Input [4]: [cr_item_sk#71, cr_order_number#72, cr_return_quantity#73, cr_return_amount#74] +Arguments: [cr_item_sk#71, cr_order_number#72, cr_return_quantity#73, cr_return_amount#74], [cr_order_number#72 ASC NULLS FIRST, cr_item_sk#71 ASC NULLS FIRST] + +(60) CometSortMergeJoin +Left output [9]: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, d_year#70] +Right output [4]: [cr_item_sk#71, cr_order_number#72, cr_return_quantity#73, cr_return_amount#74] +Arguments: [cs_order_number#60, cs_item_sk#59], [cr_order_number#72, cr_item_sk#71], LeftOuter + +(61) CometProject +Input [13]: [cs_item_sk#59, cs_order_number#60, cs_quantity#61, cs_ext_sales_price#62, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, d_year#70, cr_item_sk#71, cr_order_number#72, cr_return_quantity#73, cr_return_amount#74] +Arguments: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#75, sales_amt#76], [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, (cs_quantity#61 - coalesce(cr_return_quantity#73, 0)) AS sales_cnt#75, (cs_ext_sales_price#62 - coalesce(cr_return_amount#74, 0.00)) AS sales_amt#76] + +(62) ReusedExchange [Reuses operator id: 56] +Output [9]: [ss_item_sk#77, ss_ticket_number#78, ss_quantity#79, ss_ext_sales_price#80, i_brand_id#81, i_class_id#82, i_category_id#83, i_manufact_id#84, d_year#85] + +(63) CometSort +Input [9]: [ss_item_sk#77, ss_ticket_number#78, ss_quantity#79, ss_ext_sales_price#80, i_brand_id#81, i_class_id#82, i_category_id#83, i_manufact_id#84, d_year#85] +Arguments: [ss_item_sk#77, ss_ticket_number#78, ss_quantity#79, ss_ext_sales_price#80, i_brand_id#81, i_class_id#82, i_category_id#83, i_manufact_id#84, d_year#85], [ss_ticket_number#78 ASC NULLS FIRST, ss_item_sk#77 ASC NULLS FIRST] + +(64) ReusedExchange [Reuses operator id: 19] +Output [4]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] + +(65) CometSort +Input [4]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] +Arguments: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89], [sr_ticket_number#87 ASC NULLS FIRST, sr_item_sk#86 ASC NULLS FIRST] + +(66) CometSortMergeJoin +Left output [9]: [ss_item_sk#77, ss_ticket_number#78, ss_quantity#79, ss_ext_sales_price#80, i_brand_id#81, i_class_id#82, i_category_id#83, i_manufact_id#84, d_year#85] +Right output [4]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] +Arguments: [ss_ticket_number#78, ss_item_sk#77], [sr_ticket_number#87, sr_item_sk#86], LeftOuter + +(67) CometProject +Input [13]: [ss_item_sk#77, ss_ticket_number#78, ss_quantity#79, ss_ext_sales_price#80, i_brand_id#81, i_class_id#82, i_category_id#83, i_manufact_id#84, d_year#85, sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] +Arguments: [d_year#85, i_brand_id#81, i_class_id#82, i_category_id#83, i_manufact_id#84, sales_cnt#90, sales_amt#91], [d_year#85, i_brand_id#81, i_class_id#82, i_category_id#83, i_manufact_id#84, (ss_quantity#79 - coalesce(sr_return_quantity#88, 0)) AS sales_cnt#90, (ss_ext_sales_price#80 - coalesce(sr_return_amt#89, 0.00)) AS sales_amt#91] + +(68) ReusedExchange [Reuses operator id: 56] +Output [9]: [ws_item_sk#92, ws_order_number#93, ws_quantity#94, ws_ext_sales_price#95, i_brand_id#96, i_class_id#97, i_category_id#98, i_manufact_id#99, d_year#100] + +(69) CometSort +Input [9]: [ws_item_sk#92, ws_order_number#93, ws_quantity#94, ws_ext_sales_price#95, i_brand_id#96, i_class_id#97, i_category_id#98, i_manufact_id#99, d_year#100] +Arguments: [ws_item_sk#92, ws_order_number#93, ws_quantity#94, ws_ext_sales_price#95, i_brand_id#96, i_class_id#97, i_category_id#98, i_manufact_id#99, d_year#100], [ws_order_number#93 ASC NULLS FIRST, ws_item_sk#92 ASC NULLS FIRST] + +(70) ReusedExchange [Reuses operator id: 19] +Output [4]: [wr_item_sk#101, wr_order_number#102, wr_return_quantity#103, wr_return_amt#104] + +(71) CometSort +Input [4]: [wr_item_sk#101, wr_order_number#102, wr_return_quantity#103, wr_return_amt#104] +Arguments: [wr_item_sk#101, wr_order_number#102, wr_return_quantity#103, wr_return_amt#104], [wr_order_number#102 ASC NULLS FIRST, wr_item_sk#101 ASC NULLS FIRST] + +(72) CometSortMergeJoin +Left output [9]: [ws_item_sk#92, ws_order_number#93, ws_quantity#94, ws_ext_sales_price#95, i_brand_id#96, i_class_id#97, i_category_id#98, i_manufact_id#99, d_year#100] +Right output [4]: [wr_item_sk#101, wr_order_number#102, wr_return_quantity#103, wr_return_amt#104] +Arguments: [ws_order_number#93, ws_item_sk#92], [wr_order_number#102, wr_item_sk#101], LeftOuter + +(73) CometProject +Input [13]: [ws_item_sk#92, ws_order_number#93, ws_quantity#94, ws_ext_sales_price#95, i_brand_id#96, i_class_id#97, i_category_id#98, i_manufact_id#99, d_year#100, wr_item_sk#101, wr_order_number#102, wr_return_quantity#103, wr_return_amt#104] +Arguments: [d_year#100, i_brand_id#96, i_class_id#97, i_category_id#98, i_manufact_id#99, sales_cnt#105, sales_amt#106], [d_year#100, i_brand_id#96, i_class_id#97, i_category_id#98, i_manufact_id#99, (ws_quantity#94 - coalesce(wr_return_quantity#103, 0)) AS sales_cnt#105, (ws_ext_sales_price#95 - coalesce(wr_return_amt#104, 0.00)) AS sales_amt#106] + +(74) CometUnion +Child 0 Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#75, sales_amt#76] +Child 1 Input [7]: [d_year#85, i_brand_id#81, i_class_id#82, i_category_id#83, i_manufact_id#84, sales_cnt#90, sales_amt#91] +Child 2 Input [7]: [d_year#100, i_brand_id#96, i_class_id#97, i_category_id#98, i_manufact_id#99, sales_cnt#105, sales_amt#106] + +(75) CometHashAggregate +Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#75, sales_amt#76] +Keys [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#75, sales_amt#76] +Functions: [] + +(76) CometExchange +Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#75, sales_amt#76] +Arguments: hashpartitioning(d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#75, sales_amt#76, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(77) CometHashAggregate +Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#75, sales_amt#76] +Keys [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#75, sales_amt#76] +Functions: [] + +(78) ColumnarToRow [codegen id : 4] +Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#75, sales_amt#76] + +(79) HashAggregate [codegen id : 4] +Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#75, sales_amt#76] +Keys [5]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68] +Functions [2]: [partial_sum(sales_cnt#75), partial_sum(UnscaledValue(sales_amt#76))] +Aggregate Attributes [2]: [sum#107, sum#108] +Results [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sum#109, sum#110] + +(80) Exchange +Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sum#109, sum#110] +Arguments: hashpartitioning(d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(81) HashAggregate [codegen id : 5] +Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sum#109, sum#110] +Keys [5]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68] +Functions [2]: [sum(sales_cnt#75), sum(UnscaledValue(sales_amt#76))] +Aggregate Attributes [2]: [sum(sales_cnt#75)#55, sum(UnscaledValue(sales_amt#76))#56] +Results [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sum(sales_cnt#75)#55 AS sales_cnt#111, MakeDecimal(sum(UnscaledValue(sales_amt#76))#56,18,2) AS sales_amt#112] + +(82) Filter [codegen id : 5] +Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#111, sales_amt#112] +Condition : isnotnull(sales_cnt#111) + +(83) Exchange +Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#111, sales_amt#112] +Arguments: hashpartitioning(i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(84) Sort [codegen id : 6] +Input [7]: [d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#111, sales_amt#112] +Arguments: [i_brand_id#65 ASC NULLS FIRST, i_class_id#66 ASC NULLS FIRST, i_category_id#67 ASC NULLS FIRST, i_manufact_id#68 ASC NULLS FIRST], false, 0 + +(85) SortMergeJoin [codegen id : 7] +Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right keys [4]: [i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68] +Join type: Inner +Join condition: ((cast(sales_cnt#57 as decimal(17,2)) / cast(sales_cnt#111 as decimal(17,2))) < 0.90000000000000000000) + +(86) Project [codegen id : 7] +Output [10]: [d_year#70 AS prev_year#113, d_year#13 AS year#114, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#111 AS prev_yr_cnt#115, sales_cnt#57 AS curr_yr_cnt#116, (sales_cnt#57 - sales_cnt#111) AS sales_cnt_diff#117, (sales_amt#58 - sales_amt#112) AS sales_amt_diff#118] +Input [14]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#57, sales_amt#58, d_year#70, i_brand_id#65, i_class_id#66, i_category_id#67, i_manufact_id#68, sales_cnt#111, sales_amt#112] + +(87) TakeOrderedAndProject +Input [10]: [prev_year#113, year#114, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#115, curr_yr_cnt#116, sales_cnt_diff#117, sales_amt_diff#118] +Arguments: 100, [sales_cnt_diff#117 ASC NULLS FIRST], [prev_year#113, year#114, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#115, curr_yr_cnt#116, sales_cnt_diff#117, sales_amt_diff#118] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q75.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q75.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2d5d184d6e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q75.native_datafusion/simplified.txt @@ -0,0 +1,102 @@ +TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_amt_diff] + WholeStageCodegen (7) + Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt] + SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt] + InputAdapter + WholeStageCodegen (3) + Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 + WholeStageCodegen (2) + Filter [sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 + WholeStageCodegen (1) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometUnion [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometProject [cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometSort [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [cs_order_number,cs_item_sk] #4 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + CometProject [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometSort [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometExchange [cr_order_number,cr_item_sk] #7 + CometProject [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometFilter [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometProject [ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometSort [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #4 + CometSort [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 + CometProject [ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometSort [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #4 + CometSort [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #7 + InputAdapter + WholeStageCodegen (6) + Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #8 + WholeStageCodegen (5) + Filter [sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #9 + WholeStageCodegen (4) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #10 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometUnion [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometProject [cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometSort [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [cs_order_number,cs_item_sk] #11 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + CometBroadcastExchange [d_date_sk,d_year] #12 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometSort [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + CometProject [ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometSort [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #11 + CometSort [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 + CometProject [ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometSort [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #11 + CometSort [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q76.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q76.native_datafusion/explain.txt new file mode 100644 index 0000000000..bbead60635 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q76.native_datafusion/explain.txt @@ -0,0 +1,181 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * ColumnarToRow (30) + +- CometUnion (29) + :- CometProject (12) + : +- CometBroadcastHashJoin (11) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : +- CometBroadcastExchange (10) + : +- CometFilter (9) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + :- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (13) + : : +- ReusedExchange (15) + : +- ReusedExchange (18) + +- CometProject (28) + +- CometBroadcastHashJoin (27) + :- CometProject (25) + : +- CometBroadcastHashJoin (24) + : :- CometFilter (22) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (21) + : +- ReusedExchange (23) + +- ReusedExchange (26) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#5, i_category#6] +Arguments: [i_item_sk#5, i_category#6] + +(4) CometFilter +Input [2]: [i_item_sk#5, i_category#6] +Condition : isnotnull(i_item_sk#5) + +(5) CometBroadcastExchange +Input [2]: [i_item_sk#5, i_category#6] +Arguments: [i_item_sk#5, i_category#6] + +(6) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Right output [2]: [i_item_sk#5, i_category#6] +Arguments: [ss_item_sk#1], [i_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_item_sk#5, i_category#6] +Arguments: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6], [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [d_date_sk#7, d_year#8, d_qoy#9] + +(9) CometFilter +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Condition : isnotnull(d_date_sk#7) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [d_date_sk#7, d_year#8, d_qoy#9] + +(11) CometBroadcastHashJoin +Left output [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6] +Right output [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [ss_sold_date_sk#4], [d_date_sk#7], Inner, BuildRight + +(12) CometProject +Input [7]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6, d_date_sk#7, d_year#8, d_qoy#9] +Arguments: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12], [store AS channel#10, ss_store_sk#2 AS col_name#11, d_year#8, d_qoy#9, i_category#6, ss_ext_sales_price#3 AS ext_sales_price#12] + +(13) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Arguments: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] + +(14) CometFilter +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Condition : (isnull(ws_ship_customer_sk#14) AND isnotnull(ws_item_sk#13)) + +(15) ReusedExchange [Reuses operator id: 5] +Output [2]: [i_item_sk#17, i_category#18] + +(16) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Right output [2]: [i_item_sk#17, i_category#18] +Arguments: [ws_item_sk#13], [i_item_sk#17], Inner, BuildRight + +(17) CometProject +Input [6]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_item_sk#17, i_category#18] +Arguments: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18], [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18] + +(18) ReusedExchange [Reuses operator id: 10] +Output [3]: [d_date_sk#19, d_year#20, d_qoy#21] + +(19) CometBroadcastHashJoin +Left output [4]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18] +Right output [3]: [d_date_sk#19, d_year#20, d_qoy#21] +Arguments: [ws_sold_date_sk#16], [d_date_sk#19], Inner, BuildRight + +(20) CometProject +Input [7]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18, d_date_sk#19, d_year#20, d_qoy#21] +Arguments: [channel#22, col_name#23, d_year#20, d_qoy#21, i_category#18, ext_sales_price#24], [web AS channel#22, ws_ship_customer_sk#14 AS col_name#23, d_year#20, d_qoy#21, i_category#18, ws_ext_sales_price#15 AS ext_sales_price#24] + +(21) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Arguments: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] + +(22) CometFilter +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Condition : (isnull(cs_ship_addr_sk#25) AND isnotnull(cs_item_sk#26)) + +(23) ReusedExchange [Reuses operator id: 5] +Output [2]: [i_item_sk#29, i_category#30] + +(24) CometBroadcastHashJoin +Left output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Right output [2]: [i_item_sk#29, i_category#30] +Arguments: [cs_item_sk#26], [i_item_sk#29], Inner, BuildRight + +(25) CometProject +Input [6]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28, i_item_sk#29, i_category#30] +Arguments: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30], [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30] + +(26) ReusedExchange [Reuses operator id: 10] +Output [3]: [d_date_sk#31, d_year#32, d_qoy#33] + +(27) CometBroadcastHashJoin +Left output [4]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30] +Right output [3]: [d_date_sk#31, d_year#32, d_qoy#33] +Arguments: [cs_sold_date_sk#28], [d_date_sk#31], Inner, BuildRight + +(28) CometProject +Input [7]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30, d_date_sk#31, d_year#32, d_qoy#33] +Arguments: [channel#34, col_name#35, d_year#32, d_qoy#33, i_category#30, ext_sales_price#36], [catalog AS channel#34, cs_ship_addr_sk#25 AS col_name#35, d_year#32, d_qoy#33, i_category#30, cs_ext_sales_price#27 AS ext_sales_price#36] + +(29) CometUnion +Child 0 Input [6]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12] +Child 1 Input [6]: [channel#22, col_name#23, d_year#20, d_qoy#21, i_category#18, ext_sales_price#24] +Child 2 Input [6]: [channel#34, col_name#35, d_year#32, d_qoy#33, i_category#30, ext_sales_price#36] + +(30) ColumnarToRow [codegen id : 1] +Input [6]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12] + +(31) HashAggregate [codegen id : 1] +Input [6]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#12))] +Aggregate Attributes [2]: [count#37, sum#38] +Results [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] + +(32) Exchange +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] +Arguments: hashpartitioning(channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(33) HashAggregate [codegen id : 2] +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#12))] +Aggregate Attributes [2]: [count(1)#41, sum(UnscaledValue(ext_sales_price#12))#42] +Results [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count(1)#41 AS sales_cnt#43, MakeDecimal(sum(UnscaledValue(ext_sales_price#12))#42,17,2) AS sales_amt#44] + +(34) TakeOrderedAndProject +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#43, sales_amt#44] +Arguments: 100, [channel#10 ASC NULLS FIRST, col_name#11 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#9 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#43, sales_amt#44] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q76.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q76.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b3603c6017 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q76.native_datafusion/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_amt] + WholeStageCodegen (2) + HashAggregate [channel,col_name,d_year,d_qoy,i_category,count,sum] [count(1),sum(UnscaledValue(ext_sales_price)),sales_cnt,sales_amt,count,sum] + InputAdapter + Exchange [channel,col_name,d_year,d_qoy,i_category] #1 + WholeStageCodegen (1) + HashAggregate [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] [count,sum,count,sum] + ColumnarToRow + InputAdapter + CometUnion [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometProject [ss_store_sk,ss_ext_sales_price] [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_category,d_date_sk,d_year,d_qoy] + CometProject [ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_category] + CometFilter [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_category] #2 + CometFilter [i_item_sk,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_category] + CometBroadcastExchange [d_date_sk,d_year,d_qoy] #3 + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + CometProject [ws_ship_customer_sk,ws_ext_sales_price] [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometBroadcastHashJoin [ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_category,d_date_sk,d_year,d_qoy] + CometProject [ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_category] + CometBroadcastHashJoin [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_category] + CometFilter [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_category] #2 + ReusedExchange [d_date_sk,d_year,d_qoy] #3 + CometProject [cs_ship_addr_sk,cs_ext_sales_price] [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] + CometBroadcastHashJoin [cs_ship_addr_sk,cs_ext_sales_price,cs_sold_date_sk,i_category,d_date_sk,d_year,d_qoy] + CometProject [cs_ship_addr_sk,cs_ext_sales_price,cs_sold_date_sk,i_category] + CometBroadcastHashJoin [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_category] + CometFilter [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [i_item_sk,i_category] #2 + ReusedExchange [d_date_sk,d_year,d_qoy] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q77.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q77.native_datafusion/explain.txt new file mode 100644 index 0000000000..f12c117aba --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q77.native_datafusion/explain.txt @@ -0,0 +1,291 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * Expand (48) + +- Union (47) + :- * Project (22) + : +- * BroadcastHashJoin LeftOuter BuildRight (21) + : :- * HashAggregate (17) + : : +- Exchange (16) + : : +- * HashAggregate (15) + : : +- * ColumnarToRow (14) + : : +- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : +- BroadcastExchange (20) + : +- * HashAggregate (19) + : +- ReusedExchange (18) + :- * Project (41) + : +- * BroadcastNestedLoopJoin Inner BuildLeft (40) + : :- BroadcastExchange (31) + : : +- * HashAggregate (30) + : : +- Exchange (29) + : : +- * HashAggregate (28) + : : +- * ColumnarToRow (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (23) + : : +- ReusedExchange (24) + : +- * HashAggregate (39) + : +- Exchange (38) + : +- * HashAggregate (37) + : +- * ColumnarToRow (36) + : +- CometProject (35) + : +- CometBroadcastHashJoin (34) + : :- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (32) + : +- ReusedExchange (33) + +- * Project (46) + +- * BroadcastHashJoin LeftOuter BuildRight (45) + :- * HashAggregate (43) + : +- ReusedExchange (42) + +- ReusedExchange (44) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_date#6] +Arguments: [d_date_sk#5, d_date#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 2000-08-03)) AND (d_date#6 <= 2000-09-02)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_date#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3], [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(10) CometFilter +Input [1]: [s_store_sk#7] +Condition : isnotnull(s_store_sk#7) + +(11) CometBroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(12) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] +Right output [1]: [s_store_sk#7] +Arguments: [ss_store_sk#1], [s_store_sk#7], Inner, BuildRight + +(13) CometProject +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Arguments: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7], [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] + +(14) ColumnarToRow [codegen id : 1] +Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] + +(15) HashAggregate [codegen id : 1] +Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Keys [1]: [s_store_sk#7] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#8, sum#9] +Results [3]: [s_store_sk#7, sum#10, sum#11] + +(16) Exchange +Input [3]: [s_store_sk#7, sum#10, sum#11] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(17) HashAggregate [codegen id : 4] +Input [3]: [s_store_sk#7, sum#10, sum#11] +Keys [1]: [s_store_sk#7] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13] +Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15] + +(18) ReusedExchange [Reuses operator id: 16] +Output [3]: [s_store_sk#16, sum#17, sum#18] + +(19) HashAggregate [codegen id : 3] +Input [3]: [s_store_sk#16, sum#17, sum#18] +Keys [1]: [s_store_sk#16] +Functions [2]: [sum(UnscaledValue(sr_return_amt#19)), sum(UnscaledValue(sr_net_loss#20))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#19))#21, sum(UnscaledValue(sr_net_loss#20))#22] +Results [3]: [s_store_sk#16, MakeDecimal(sum(UnscaledValue(sr_return_amt#19))#21,17,2) AS returns#23, MakeDecimal(sum(UnscaledValue(sr_net_loss#20))#22,17,2) AS profit_loss#24] + +(20) BroadcastExchange +Input [3]: [s_store_sk#16, returns#23, profit_loss#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [s_store_sk#7] +Right keys [1]: [s_store_sk#16] +Join type: LeftOuter +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [sales#14, coalesce(returns#23, 0.00) AS returns#25, (profit#15 - coalesce(profit_loss#24, 0.00)) AS profit#26, store channel AS channel#27, s_store_sk#7 AS id#28] +Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#16, returns#23, profit_loss#24] + +(23) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_call_center_sk#29, cs_ext_sales_price#30, cs_net_profit#31, cs_sold_date_sk#32] +Arguments: [cs_call_center_sk#29, cs_ext_sales_price#30, cs_net_profit#31, cs_sold_date_sk#32] + +(24) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#33] + +(25) CometBroadcastHashJoin +Left output [4]: [cs_call_center_sk#29, cs_ext_sales_price#30, cs_net_profit#31, cs_sold_date_sk#32] +Right output [1]: [d_date_sk#33] +Arguments: [cs_sold_date_sk#32], [d_date_sk#33], Inner, BuildRight + +(26) CometProject +Input [5]: [cs_call_center_sk#29, cs_ext_sales_price#30, cs_net_profit#31, cs_sold_date_sk#32, d_date_sk#33] +Arguments: [cs_call_center_sk#29, cs_ext_sales_price#30, cs_net_profit#31], [cs_call_center_sk#29, cs_ext_sales_price#30, cs_net_profit#31] + +(27) ColumnarToRow [codegen id : 5] +Input [3]: [cs_call_center_sk#29, cs_ext_sales_price#30, cs_net_profit#31] + +(28) HashAggregate [codegen id : 5] +Input [3]: [cs_call_center_sk#29, cs_ext_sales_price#30, cs_net_profit#31] +Keys [1]: [cs_call_center_sk#29] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#30)), partial_sum(UnscaledValue(cs_net_profit#31))] +Aggregate Attributes [2]: [sum#34, sum#35] +Results [3]: [cs_call_center_sk#29, sum#36, sum#37] + +(29) Exchange +Input [3]: [cs_call_center_sk#29, sum#36, sum#37] +Arguments: hashpartitioning(cs_call_center_sk#29, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(30) HashAggregate [codegen id : 6] +Input [3]: [cs_call_center_sk#29, sum#36, sum#37] +Keys [1]: [cs_call_center_sk#29] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#30)), sum(UnscaledValue(cs_net_profit#31))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#30))#38, sum(UnscaledValue(cs_net_profit#31))#39] +Results [3]: [cs_call_center_sk#29, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#30))#38,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(cs_net_profit#31))#39,17,2) AS profit#41] + +(31) BroadcastExchange +Input [3]: [cs_call_center_sk#29, sales#40, profit#41] +Arguments: IdentityBroadcastMode, [plan_id=4] + +(32) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [3]: [cr_return_amount#42, cr_net_loss#43, cr_returned_date_sk#44] +Arguments: [cr_return_amount#42, cr_net_loss#43, cr_returned_date_sk#44] + +(33) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#45] + +(34) CometBroadcastHashJoin +Left output [3]: [cr_return_amount#42, cr_net_loss#43, cr_returned_date_sk#44] +Right output [1]: [d_date_sk#45] +Arguments: [cr_returned_date_sk#44], [d_date_sk#45], Inner, BuildRight + +(35) CometProject +Input [4]: [cr_return_amount#42, cr_net_loss#43, cr_returned_date_sk#44, d_date_sk#45] +Arguments: [cr_return_amount#42, cr_net_loss#43], [cr_return_amount#42, cr_net_loss#43] + +(36) ColumnarToRow [codegen id : 7] +Input [2]: [cr_return_amount#42, cr_net_loss#43] + +(37) HashAggregate [codegen id : 7] +Input [2]: [cr_return_amount#42, cr_net_loss#43] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#42)), partial_sum(UnscaledValue(cr_net_loss#43))] +Aggregate Attributes [2]: [sum#46, sum#47] +Results [2]: [sum#48, sum#49] + +(38) Exchange +Input [2]: [sum#48, sum#49] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(39) HashAggregate +Input [2]: [sum#48, sum#49] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#42)), sum(UnscaledValue(cr_net_loss#43))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#42))#50, sum(UnscaledValue(cr_net_loss#43))#51] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#42))#50,17,2) AS returns#52, MakeDecimal(sum(UnscaledValue(cr_net_loss#43))#51,17,2) AS profit_loss#53] + +(40) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 8] +Output [5]: [sales#40, returns#52, (profit#41 - profit_loss#53) AS profit#54, catalog channel AS channel#55, cs_call_center_sk#29 AS id#56] +Input [5]: [cs_call_center_sk#29, sales#40, profit#41, returns#52, profit_loss#53] + +(42) ReusedExchange [Reuses operator id: 16] +Output [3]: [wp_web_page_sk#57, sum#58, sum#59] + +(43) HashAggregate [codegen id : 12] +Input [3]: [wp_web_page_sk#57, sum#58, sum#59] +Keys [1]: [wp_web_page_sk#57] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#60)), sum(UnscaledValue(ws_net_profit#61))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#60))#62, sum(UnscaledValue(ws_net_profit#61))#63] +Results [3]: [wp_web_page_sk#57, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#60))#62,17,2) AS sales#64, MakeDecimal(sum(UnscaledValue(ws_net_profit#61))#63,17,2) AS profit#65] + +(44) ReusedExchange [Reuses operator id: 20] +Output [3]: [wp_web_page_sk#66, returns#67, profit_loss#68] + +(45) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [wp_web_page_sk#57] +Right keys [1]: [wp_web_page_sk#66] +Join type: LeftOuter +Join condition: None + +(46) Project [codegen id : 12] +Output [5]: [sales#64, coalesce(returns#67, 0.00) AS returns#69, (profit#65 - coalesce(profit_loss#68, 0.00)) AS profit#70, web channel AS channel#71, wp_web_page_sk#57 AS id#72] +Input [6]: [wp_web_page_sk#57, sales#64, profit#65, wp_web_page_sk#66, returns#67, profit_loss#68] + +(47) Union + +(48) Expand [codegen id : 13] +Input [5]: [sales#14, returns#25, profit#26, channel#27, id#28] +Arguments: [[sales#14, returns#25, profit#26, channel#27, id#28, 0], [sales#14, returns#25, profit#26, channel#27, null, 1], [sales#14, returns#25, profit#26, null, null, 3]], [sales#14, returns#25, profit#26, channel#73, id#74, spark_grouping_id#75] + +(49) HashAggregate [codegen id : 13] +Input [6]: [sales#14, returns#25, profit#26, channel#73, id#74, spark_grouping_id#75] +Keys [3]: [channel#73, id#74, spark_grouping_id#75] +Functions [3]: [partial_sum(sales#14), partial_sum(returns#25), partial_sum(profit#26)] +Aggregate Attributes [6]: [sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81] +Results [9]: [channel#73, id#74, spark_grouping_id#75, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87] + +(50) Exchange +Input [9]: [channel#73, id#74, spark_grouping_id#75, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87] +Arguments: hashpartitioning(channel#73, id#74, spark_grouping_id#75, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(51) HashAggregate [codegen id : 14] +Input [9]: [channel#73, id#74, spark_grouping_id#75, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87] +Keys [3]: [channel#73, id#74, spark_grouping_id#75] +Functions [3]: [sum(sales#14), sum(returns#25), sum(profit#26)] +Aggregate Attributes [3]: [sum(sales#14)#88, sum(returns#25)#89, sum(profit#26)#90] +Results [5]: [channel#73, id#74, sum(sales#14)#88 AS sales#91, sum(returns#25)#89 AS returns#92, sum(profit#26)#90 AS profit#93] + +(52) TakeOrderedAndProject +Input [5]: [channel#73, id#74, sales#91, returns#92, profit#93] +Arguments: 100, [channel#73 ASC NULLS FIRST, id#74 ASC NULLS FIRST], [channel#73, id#74, sales#91, returns#92, profit#93] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q77.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q77.native_datafusion/simplified.txt new file mode 100644 index 0000000000..65c1a5d6b9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q77.native_datafusion/simplified.txt @@ -0,0 +1,75 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (14) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (13) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (4) + Project [sales,returns,profit,profit_loss,s_store_sk] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [s_store_sk] #2 + WholeStageCodegen (1) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,ss_net_profit,s_store_sk] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,s_store_sk] + CometProject [ss_store_sk,ss_ext_sales_price,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk] #4 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + ReusedExchange [s_store_sk,sum,sum] #2 + WholeStageCodegen (8) + Project [sales,returns,profit,profit_loss,cs_call_center_sk] + BroadcastNestedLoopJoin + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [cs_call_center_sk] #7 + WholeStageCodegen (5) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + CometBroadcastHashJoin [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange #8 + WholeStageCodegen (7) + HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [cr_return_amount,cr_net_loss] + CometBroadcastHashJoin [cr_return_amount,cr_net_loss,cr_returned_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_return_amount,cr_net_loss,cr_returned_date_sk] + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (12) + Project [sales,returns,profit,profit_loss,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] + InputAdapter + ReusedExchange [wp_web_page_sk,sum,sum] #2 + InputAdapter + ReusedExchange [wp_web_page_sk,returns,profit_loss] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q78.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q78.native_datafusion/explain.txt new file mode 100644 index 0000000000..0c93eeff8b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q78.native_datafusion/explain.txt @@ -0,0 +1,266 @@ +== Physical Plan == +TakeOrderedAndProject (49) ++- * Project (48) + +- * SortMergeJoin Inner (47) + :- * Project (28) + : +- * SortMergeJoin Inner (27) + : :- * Sort (22) + : : +- * HashAggregate (21) + : : +- Exchange (20) + : : +- * HashAggregate (19) + : : +- * ColumnarToRow (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometFilter (11) + : : : +- CometSortMergeJoin (10) + : : : :- CometSort (4) + : : : : +- CometExchange (3) + : : : : +- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometSort (9) + : : : +- CometExchange (8) + : : : +- CometProject (7) + : : : +- CometFilter (6) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (5) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : +- * Sort (26) + : +- * Filter (25) + : +- * HashAggregate (24) + : +- ReusedExchange (23) + +- * Sort (46) + +- * Filter (45) + +- * HashAggregate (44) + +- Exchange (43) + +- * HashAggregate (42) + +- * ColumnarToRow (41) + +- CometProject (40) + +- CometBroadcastHashJoin (39) + :- CometProject (37) + : +- CometFilter (36) + : +- CometSortMergeJoin (35) + : :- CometSort (32) + : : +- CometExchange (31) + : : +- CometFilter (30) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (29) + : +- CometSort (34) + : +- ReusedExchange (33) + +- ReusedExchange (38) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) + +(3) CometExchange +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7], [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(5) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(6) CometFilter +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(7) CometProject +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Arguments: [sr_item_sk#8, sr_ticket_number#9], [sr_item_sk#8, sr_ticket_number#9] + +(8) CometExchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [sr_item_sk#8, sr_ticket_number#9], [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [ss_ticket_number#3, ss_item_sk#1], [sr_ticket_number#9, sr_item_sk#8], LeftOuter + +(11) CometFilter +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(12) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7], [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#11, d_year#12] +Arguments: [d_date_sk#11, d_year#12] + +(14) CometFilter +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(15) CometBroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: [d_date_sk#11, d_year#12] + +(16) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#11, d_year#12] +Arguments: [ss_sold_date_sk#7], [d_date_sk#11], Inner, BuildRight + +(17) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#11, d_year#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12], [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] + +(18) ColumnarToRow [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] + +(19) HashAggregate [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [3]: [sum#13, sum#14, sum#15] +Results [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] + +(20) Exchange +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] +Arguments: hashpartitioning(d_year#12, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(21) HashAggregate [codegen id : 2] +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [3]: [sum(ss_quantity#4)#19, sum(UnscaledValue(ss_wholesale_cost#5))#20, sum(UnscaledValue(ss_sales_price#6))#21] +Results [6]: [d_year#12 AS ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#19 AS ss_qty#23, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#20,17,2) AS ss_wc#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#21,17,2) AS ss_sp#25] + +(22) Sort [codegen id : 2] +Input [6]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25] +Arguments: [ss_sold_year#22 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(23) ReusedExchange [Reuses operator id: 20] +Output [6]: [d_year#26, ws_item_sk#27, ws_bill_customer_sk#28, sum#29, sum#30, sum#31] + +(24) HashAggregate [codegen id : 4] +Input [6]: [d_year#26, ws_item_sk#27, ws_bill_customer_sk#28, sum#29, sum#30, sum#31] +Keys [3]: [d_year#26, ws_item_sk#27, ws_bill_customer_sk#28] +Functions [3]: [sum(ws_quantity#32), sum(UnscaledValue(ws_wholesale_cost#33)), sum(UnscaledValue(ws_sales_price#34))] +Aggregate Attributes [3]: [sum(ws_quantity#32)#35, sum(UnscaledValue(ws_wholesale_cost#33))#36, sum(UnscaledValue(ws_sales_price#34))#37] +Results [6]: [d_year#26 AS ws_sold_year#38, ws_item_sk#27, ws_bill_customer_sk#28 AS ws_customer_sk#39, sum(ws_quantity#32)#35 AS ws_qty#40, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#33))#36,17,2) AS ws_wc#41, MakeDecimal(sum(UnscaledValue(ws_sales_price#34))#37,17,2) AS ws_sp#42] + +(25) Filter [codegen id : 4] +Input [6]: [ws_sold_year#38, ws_item_sk#27, ws_customer_sk#39, ws_qty#40, ws_wc#41, ws_sp#42] +Condition : (coalesce(ws_qty#40, 0) > 0) + +(26) Sort [codegen id : 4] +Input [6]: [ws_sold_year#38, ws_item_sk#27, ws_customer_sk#39, ws_qty#40, ws_wc#41, ws_sp#42] +Arguments: [ws_sold_year#38 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST, ws_customer_sk#39 ASC NULLS FIRST], false, 0 + +(27) SortMergeJoin [codegen id : 5] +Left keys [3]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [ws_sold_year#38, ws_item_sk#27, ws_customer_sk#39] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 5] +Output [9]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#40, ws_wc#41, ws_sp#42] +Input [12]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_sold_year#38, ws_item_sk#27, ws_customer_sk#39, ws_qty#40, ws_wc#41, ws_sp#42] + +(29) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [7]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_order_number#45, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49] +Arguments: [cs_bill_customer_sk#43, cs_item_sk#44, cs_order_number#45, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49] + +(30) CometFilter +Input [7]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_order_number#45, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49] +Condition : (isnotnull(cs_item_sk#44) AND isnotnull(cs_bill_customer_sk#43)) + +(31) CometExchange +Input [7]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_order_number#45, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49] +Arguments: hashpartitioning(cs_order_number#45, cs_item_sk#44, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(32) CometSort +Input [7]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_order_number#45, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49] +Arguments: [cs_bill_customer_sk#43, cs_item_sk#44, cs_order_number#45, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49], [cs_order_number#45 ASC NULLS FIRST, cs_item_sk#44 ASC NULLS FIRST] + +(33) ReusedExchange [Reuses operator id: 8] +Output [2]: [cr_item_sk#50, cr_order_number#51] + +(34) CometSort +Input [2]: [cr_item_sk#50, cr_order_number#51] +Arguments: [cr_item_sk#50, cr_order_number#51], [cr_order_number#51 ASC NULLS FIRST, cr_item_sk#50 ASC NULLS FIRST] + +(35) CometSortMergeJoin +Left output [7]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_order_number#45, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49] +Right output [2]: [cr_item_sk#50, cr_order_number#51] +Arguments: [cs_order_number#45, cs_item_sk#44], [cr_order_number#51, cr_item_sk#50], LeftOuter + +(36) CometFilter +Input [9]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_order_number#45, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49, cr_item_sk#50, cr_order_number#51] +Condition : isnull(cr_order_number#51) + +(37) CometProject +Input [9]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_order_number#45, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49, cr_item_sk#50, cr_order_number#51] +Arguments: [cs_bill_customer_sk#43, cs_item_sk#44, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49], [cs_bill_customer_sk#43, cs_item_sk#44, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49] + +(38) ReusedExchange [Reuses operator id: 15] +Output [2]: [d_date_sk#52, d_year#53] + +(39) CometBroadcastHashJoin +Left output [6]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49] +Right output [2]: [d_date_sk#52, d_year#53] +Arguments: [cs_sold_date_sk#49], [d_date_sk#52], Inner, BuildRight + +(40) CometProject +Input [8]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, cs_sold_date_sk#49, d_date_sk#52, d_year#53] +Arguments: [cs_bill_customer_sk#43, cs_item_sk#44, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, d_year#53], [cs_bill_customer_sk#43, cs_item_sk#44, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, d_year#53] + +(41) ColumnarToRow [codegen id : 6] +Input [6]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, d_year#53] + +(42) HashAggregate [codegen id : 6] +Input [6]: [cs_bill_customer_sk#43, cs_item_sk#44, cs_quantity#46, cs_wholesale_cost#47, cs_sales_price#48, d_year#53] +Keys [3]: [d_year#53, cs_item_sk#44, cs_bill_customer_sk#43] +Functions [3]: [partial_sum(cs_quantity#46), partial_sum(UnscaledValue(cs_wholesale_cost#47)), partial_sum(UnscaledValue(cs_sales_price#48))] +Aggregate Attributes [3]: [sum#54, sum#55, sum#56] +Results [6]: [d_year#53, cs_item_sk#44, cs_bill_customer_sk#43, sum#57, sum#58, sum#59] + +(43) Exchange +Input [6]: [d_year#53, cs_item_sk#44, cs_bill_customer_sk#43, sum#57, sum#58, sum#59] +Arguments: hashpartitioning(d_year#53, cs_item_sk#44, cs_bill_customer_sk#43, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(44) HashAggregate [codegen id : 7] +Input [6]: [d_year#53, cs_item_sk#44, cs_bill_customer_sk#43, sum#57, sum#58, sum#59] +Keys [3]: [d_year#53, cs_item_sk#44, cs_bill_customer_sk#43] +Functions [3]: [sum(cs_quantity#46), sum(UnscaledValue(cs_wholesale_cost#47)), sum(UnscaledValue(cs_sales_price#48))] +Aggregate Attributes [3]: [sum(cs_quantity#46)#60, sum(UnscaledValue(cs_wholesale_cost#47))#61, sum(UnscaledValue(cs_sales_price#48))#62] +Results [6]: [d_year#53 AS cs_sold_year#63, cs_item_sk#44, cs_bill_customer_sk#43 AS cs_customer_sk#64, sum(cs_quantity#46)#60 AS cs_qty#65, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#47))#61,17,2) AS cs_wc#66, MakeDecimal(sum(UnscaledValue(cs_sales_price#48))#62,17,2) AS cs_sp#67] + +(45) Filter [codegen id : 7] +Input [6]: [cs_sold_year#63, cs_item_sk#44, cs_customer_sk#64, cs_qty#65, cs_wc#66, cs_sp#67] +Condition : (coalesce(cs_qty#65, 0) > 0) + +(46) Sort [codegen id : 7] +Input [6]: [cs_sold_year#63, cs_item_sk#44, cs_customer_sk#64, cs_qty#65, cs_wc#66, cs_sp#67] +Arguments: [cs_sold_year#63 ASC NULLS FIRST, cs_item_sk#44 ASC NULLS FIRST, cs_customer_sk#64 ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 8] +Left keys [3]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [cs_sold_year#63, cs_item_sk#44, cs_customer_sk#64] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 8] +Output [12]: [round((cast(ss_qty#23 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(coalesce((ws_qty#40 + cs_qty#65), 1) as double)))), 2) AS ratio#68, ss_qty#23 AS store_qty#69, ss_wc#24 AS store_wholesale_cost#70, ss_sp#25 AS store_sales_price#71, (coalesce(ws_qty#40, 0) + coalesce(cs_qty#65, 0)) AS other_chan_qty#72, (coalesce(ws_wc#41, 0.00) + coalesce(cs_wc#66, 0.00)) AS other_chan_wholesale_cost#73, (coalesce(ws_sp#42, 0.00) + coalesce(cs_sp#67, 0.00)) AS other_chan_sales_price#74, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#40, cs_qty#65] +Input [15]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#40, ws_wc#41, ws_sp#42, cs_sold_year#63, cs_item_sk#44, cs_customer_sk#64, cs_qty#65, cs_wc#66, cs_sp#67] + +(49) TakeOrderedAndProject +Input [12]: [ratio#68, store_qty#69, store_wholesale_cost#70, store_sales_price#71, other_chan_qty#72, other_chan_wholesale_cost#73, other_chan_sales_price#74, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#40, cs_qty#65] +Arguments: 100, [ratio#68 ASC NULLS FIRST, ss_qty#23 DESC NULLS LAST, ss_wc#24 DESC NULLS LAST, ss_sp#25 DESC NULLS LAST, other_chan_qty#72 ASC NULLS FIRST, other_chan_wholesale_cost#73 ASC NULLS FIRST, other_chan_sales_price#74 ASC NULLS FIRST, round((cast(ss_qty#23 as double) / cast(coalesce((ws_qty#40 + cs_qty#65), 1) as double)), 2) ASC NULLS FIRST], [ratio#68, store_qty#69, store_wholesale_cost#70, store_sales_price#71, other_chan_qty#72, other_chan_wholesale_cost#73, other_chan_sales_price#74] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q78.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q78.native_datafusion/simplified.txt new file mode 100644 index 0000000000..152634bac1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q78.native_datafusion/simplified.txt @@ -0,0 +1,65 @@ +TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ws_qty,cs_qty,store_qty,store_wholesale_cost,store_sales_price] + WholeStageCodegen (8) + Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp] + SortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,cs_sold_year,cs_item_sk,cs_customer_sk] + InputAdapter + WholeStageCodegen (5) + Project [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] + SortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_sold_year,ss_item_sk,ss_customer_sk] + HashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum] [sum(ss_quantity),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price)),ss_sold_year,ss_qty,ss_wc,ss_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,ss_item_sk,ss_customer_sk] #1 + WholeStageCodegen (1) + HashAggregate [d_year,ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price] [sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometExchange [ss_ticket_number,ss_item_sk] #2 + CometFilter [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #3 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #4 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (4) + Sort [ws_sold_year,ws_item_sk,ws_customer_sk] + Filter [ws_qty] + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum] [sum(ws_quantity),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price)),ws_sold_year,ws_customer_sk,ws_qty,ws_wc,ws_sp,sum,sum,sum] + InputAdapter + ReusedExchange [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum] #1 + InputAdapter + WholeStageCodegen (7) + Sort [cs_sold_year,cs_item_sk,cs_customer_sk] + Filter [cs_qty] + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum] [sum(cs_quantity),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price)),cs_sold_year,cs_customer_sk,cs_qty,cs_wc,cs_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #5 + WholeStageCodegen (6) + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,cs_quantity,cs_wholesale_cost,cs_sales_price] [sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,d_date_sk,d_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number] + CometSortMergeJoin [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number] + CometSort [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometExchange [cs_order_number,cs_item_sk] #6 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number] + ReusedExchange [cr_item_sk,cr_order_number] #3 + ReusedExchange [d_date_sk,d_year] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q79.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q79.native_datafusion/explain.txt new file mode 100644 index 0000000000..7866af00cd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q79.native_datafusion/explain.txt @@ -0,0 +1,167 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * HashAggregate (24) + : +- Exchange (23) + : +- * HashAggregate (22) + : +- * ColumnarToRow (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + +- BroadcastExchange (28) + +- * ColumnarToRow (27) + +- CometFilter (26) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Arguments: [d_date_sk#9, d_year#10, d_dow#11] + +(4) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(5) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Arguments: [s_store_sk#12, s_number_employees#13, s_city#14] + +(10) CometFilter +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Condition : (((isnotnull(s_number_employees#13) AND (s_number_employees#13 >= 200)) AND (s_number_employees#13 <= 295)) AND isnotnull(s_store_sk#12)) + +(11) CometProject +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Arguments: [s_store_sk#12, s_city#14], [s_store_sk#12, s_city#14] + +(12) CometBroadcastExchange +Input [2]: [s_store_sk#12, s_city#14] +Arguments: [s_store_sk#12, s_city#14] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Right output [2]: [s_store_sk#12, s_city#14] +Arguments: [ss_store_sk#4], [s_store_sk#12], Inner, BuildRight + +(14) CometProject +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#12, s_city#14] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14], [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Arguments: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(16) CometFilter +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Condition : (((hd_dep_count#16 = 6) OR (hd_vehicle_count#17 > 2)) AND isnotnull(hd_demo_sk#15)) + +(17) CometProject +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Arguments: [hd_demo_sk#15], [hd_demo_sk#15] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#15] +Arguments: [hd_demo_sk#15] + +(19) CometBroadcastHashJoin +Left output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Right output [1]: [hd_demo_sk#15] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#15], Inner, BuildRight + +(20) CometProject +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14, hd_demo_sk#15] +Arguments: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14], [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] + +(21) ColumnarToRow [codegen id : 1] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] + +(22) HashAggregate [codegen id : 1] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum#18, sum#19] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#20, sum#21] + +(23) Exchange +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#20, sum#21] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(24) HashAggregate [codegen id : 3] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#20, sum#21] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#22, sum(UnscaledValue(ss_net_profit#7))#23] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#14, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#22,17,2) AS amt#24, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#23,17,2) AS profit#25] + +(25) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] +Arguments: [c_customer_sk#26, c_first_name#27, c_last_name#28] + +(26) CometFilter +Input [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] +Condition : isnotnull(c_customer_sk#26) + +(27) ColumnarToRow [codegen id : 2] +Input [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] + +(28) BroadcastExchange +Input [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(29) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#26] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 3] +Output [7]: [c_last_name#28, c_first_name#27, substr(s_city#14, 1, 30) AS substr(s_city, 1, 30)#29, ss_ticket_number#5, amt#24, profit#25, s_city#14] +Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#14, amt#24, profit#25, c_customer_sk#26, c_first_name#27, c_last_name#28] + +(31) TakeOrderedAndProject +Input [7]: [c_last_name#28, c_first_name#27, substr(s_city, 1, 30)#29, ss_ticket_number#5, amt#24, profit#25, s_city#14] +Arguments: 100, [c_last_name#28 ASC NULLS FIRST, c_first_name#27 ASC NULLS FIRST, substr(s_city#14, 1, 30) ASC NULLS FIRST, profit#25 ASC NULLS FIRST], [c_last_name#28, c_first_name#27, substr(s_city, 1, 30)#29, ss_ticket_number#5, amt#24, profit#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q79.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q79.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2f53027b64 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q79.native_datafusion/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1, 30),ss_ticket_number,amt] + WholeStageCodegen (3) + Project [c_last_name,c_first_name,s_city,ss_ticket_number,amt,profit] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,sum,sum] [sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),amt,profit,sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 + WholeStageCodegen (1) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,ss_coupon_amt,ss_net_profit] [sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_store_sk,s_city] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dow] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dow] + CometBroadcastExchange [s_store_sk,s_city] #3 + CometProject [s_store_sk,s_city] + CometFilter [s_store_sk,s_number_employees,s_city] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_number_employees,s_city] + CometBroadcastExchange [hd_demo_sk] #4 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q8.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q8.native_datafusion/explain.txt new file mode 100644 index 0000000000..d4600251d2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q8.native_datafusion/explain.txt @@ -0,0 +1,227 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * ColumnarToRow (38) + +- CometProject (37) + +- CometBroadcastHashJoin (36) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + +- CometBroadcastExchange (35) + +- CometHashAggregate (34) + +- CometExchange (33) + +- CometHashAggregate (32) + +- CometBroadcastHashJoin (31) + :- CometProject (16) + : +- CometFilter (15) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (14) + +- CometBroadcastExchange (30) + +- CometProject (29) + +- CometFilter (28) + +- CometHashAggregate (27) + +- CometExchange (26) + +- CometHashAggregate (25) + +- CometProject (24) + +- CometBroadcastHashJoin (23) + :- CometFilter (18) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (17) + +- CometBroadcastExchange (22) + +- CometProject (21) + +- CometFilter (20) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (19) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Arguments: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [d_date_sk#4, d_year#5, d_qoy#6] + +(4) CometFilter +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 1998)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_store_sk#1, ss_net_profit#2], [ss_store_sk#1, ss_net_profit#2] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [s_store_sk#7, s_store_name#8, s_zip#9] + +(10) CometFilter +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Condition : (isnotnull(s_store_sk#7) AND isnotnull(s_zip#9)) + +(11) CometBroadcastExchange +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [s_store_sk#7, s_store_name#8, s_zip#9] + +(12) CometBroadcastHashJoin +Left output [2]: [ss_store_sk#1, ss_net_profit#2] +Right output [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [ss_store_sk#1], [s_store_sk#7], Inner, BuildRight + +(13) CometProject +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: [ss_net_profit#2, s_store_name#8, s_zip#9], [ss_net_profit#2, s_store_name#8, s_zip#9] + +(14) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [1]: [ca_zip#10] +Arguments: [ca_zip#10] + +(15) CometFilter +Input [1]: [ca_zip#10] +Condition : (substr(ca_zip#10, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#10, 1, 5))) + +(16) CometProject +Input [1]: [ca_zip#10] +Arguments: [ca_zip#11], [substr(ca_zip#10, 1, 5) AS ca_zip#11] + +(17) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#12, ca_zip#13] +Arguments: [ca_address_sk#12, ca_zip#13] + +(18) CometFilter +Input [2]: [ca_address_sk#12, ca_zip#13] +Condition : isnotnull(ca_address_sk#12) + +(19) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Arguments: [c_current_addr_sk#14, c_preferred_cust_flag#15] + +(20) CometFilter +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Condition : ((isnotnull(c_preferred_cust_flag#15) AND (c_preferred_cust_flag#15 = Y)) AND isnotnull(c_current_addr_sk#14)) + +(21) CometProject +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Arguments: [c_current_addr_sk#14], [c_current_addr_sk#14] + +(22) CometBroadcastExchange +Input [1]: [c_current_addr_sk#14] +Arguments: [c_current_addr_sk#14] + +(23) CometBroadcastHashJoin +Left output [2]: [ca_address_sk#12, ca_zip#13] +Right output [1]: [c_current_addr_sk#14] +Arguments: [ca_address_sk#12], [c_current_addr_sk#14], Inner, BuildRight + +(24) CometProject +Input [3]: [ca_address_sk#12, ca_zip#13, c_current_addr_sk#14] +Arguments: [ca_zip#13], [ca_zip#13] + +(25) CometHashAggregate +Input [1]: [ca_zip#13] +Keys [1]: [ca_zip#13] +Functions [1]: [partial_count(1)] + +(26) CometExchange +Input [2]: [ca_zip#13, count#16] +Arguments: hashpartitioning(ca_zip#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [2]: [ca_zip#13, count#16] +Keys [1]: [ca_zip#13] +Functions [1]: [count(1)] + +(28) CometFilter +Input [2]: [ca_zip#17, cnt#18] +Condition : (cnt#18 > 10) + +(29) CometProject +Input [2]: [ca_zip#17, cnt#18] +Arguments: [ca_zip#17], [ca_zip#17] + +(30) CometBroadcastExchange +Input [1]: [ca_zip#17] +Arguments: [ca_zip#17] + +(31) CometBroadcastHashJoin +Left output [1]: [ca_zip#11] +Right output [1]: [ca_zip#17] +Arguments: [coalesce(ca_zip#11, ), isnull(ca_zip#11)], [coalesce(ca_zip#17, ), isnull(ca_zip#17)], LeftSemi, BuildRight + +(32) CometHashAggregate +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] +Functions: [] + +(33) CometExchange +Input [1]: [ca_zip#11] +Arguments: hashpartitioning(ca_zip#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(34) CometHashAggregate +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] +Functions: [] + +(35) CometBroadcastExchange +Input [1]: [ca_zip#11] +Arguments: [ca_zip#11] + +(36) CometBroadcastHashJoin +Left output [3]: [ss_net_profit#2, s_store_name#8, s_zip#9] +Right output [1]: [ca_zip#11] +Arguments: [substr(s_zip#9, 1, 2)], [substr(ca_zip#11, 1, 2)], Inner, BuildRight + +(37) CometProject +Input [4]: [ss_net_profit#2, s_store_name#8, s_zip#9, ca_zip#11] +Arguments: [ss_net_profit#2, s_store_name#8], [ss_net_profit#2, s_store_name#8] + +(38) ColumnarToRow [codegen id : 1] +Input [2]: [ss_net_profit#2, s_store_name#8] + +(39) HashAggregate [codegen id : 1] +Input [2]: [ss_net_profit#2, s_store_name#8] +Keys [1]: [s_store_name#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#19] +Results [2]: [s_store_name#8, sum#20] + +(40) Exchange +Input [2]: [s_store_name#8, sum#20] +Arguments: hashpartitioning(s_store_name#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(41) HashAggregate [codegen id : 2] +Input [2]: [s_store_name#8, sum#20] +Keys [1]: [s_store_name#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#21] +Results [2]: [s_store_name#8, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#21,17,2) AS sum(ss_net_profit)#22] + +(42) TakeOrderedAndProject +Input [2]: [s_store_name#8, sum(ss_net_profit)#22] +Arguments: 100, [s_store_name#8 ASC NULLS FIRST], [s_store_name#8, sum(ss_net_profit)#22] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q8.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q8.native_datafusion/simplified.txt new file mode 100644 index 0000000000..31df269cff --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q8.native_datafusion/simplified.txt @@ -0,0 +1,46 @@ +TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] + WholeStageCodegen (2) + HashAggregate [s_store_name,sum] [sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit),sum] + InputAdapter + Exchange [s_store_name] #1 + WholeStageCodegen (1) + HashAggregate [s_store_name,ss_net_profit] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_net_profit,s_store_name] + CometBroadcastHashJoin [ss_net_profit,s_store_name,s_zip,ca_zip] + CometProject [ss_net_profit,s_store_name,s_zip] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,s_store_sk,s_store_name,s_zip] + CometProject [ss_store_sk,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [s_store_sk,s_store_name,s_zip] #3 + CometFilter [s_store_sk,s_store_name,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_zip] + CometBroadcastExchange [ca_zip] #4 + CometHashAggregate [ca_zip] + CometExchange [ca_zip] #5 + CometHashAggregate [ca_zip] + CometBroadcastHashJoin [ca_zip,ca_zip] + CometProject [ca_zip] [ca_zip] + CometFilter [ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_zip] + CometBroadcastExchange [ca_zip] #6 + CometProject [ca_zip] + CometFilter [ca_zip,cnt] + CometHashAggregate [ca_zip,cnt,ca_zip,count,count(1)] + CometExchange [ca_zip] #7 + CometHashAggregate [ca_zip,count] + CometProject [ca_zip] + CometBroadcastHashJoin [ca_address_sk,ca_zip,c_current_addr_sk] + CometFilter [ca_address_sk,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_zip] + CometBroadcastExchange [c_current_addr_sk] #8 + CometProject [c_current_addr_sk] + CometFilter [c_current_addr_sk,c_preferred_cust_flag] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_current_addr_sk,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q80.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q80.native_datafusion/explain.txt new file mode 100644 index 0000000000..2f3fac41db --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q80.native_datafusion/explain.txt @@ -0,0 +1,374 @@ +== Physical Plan == +TakeOrderedAndProject (70) ++- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Expand (66) + +- Union (65) + :- * HashAggregate (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- * ColumnarToRow (35) + : +- CometProject (34) + : +- CometBroadcastHashJoin (33) + : :- CometProject (28) + : : +- CometBroadcastHashJoin (27) + : : :- CometProject (22) + : : : +- CometBroadcastHashJoin (21) + : : : :- CometProject (17) + : : : : +- CometBroadcastHashJoin (16) + : : : : :- CometProject (11) + : : : : : +- CometSortMergeJoin (10) + : : : : : :- CometSort (4) + : : : : : : +- CometExchange (3) + : : : : : : +- CometFilter (2) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : +- CometSort (9) + : : : : : +- CometExchange (8) + : : : : : +- CometProject (7) + : : : : : +- CometFilter (6) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (5) + : : : : +- CometBroadcastExchange (15) + : : : : +- CometProject (14) + : : : : +- CometFilter (13) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (12) + : : : +- CometBroadcastExchange (20) + : : : +- CometFilter (19) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (18) + : : +- CometBroadcastExchange (26) + : : +- CometProject (25) + : : +- CometFilter (24) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (23) + : +- CometBroadcastExchange (32) + : +- CometProject (31) + : +- CometFilter (30) + : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (29) + :- * HashAggregate (62) + : +- Exchange (61) + : +- * HashAggregate (60) + : +- * ColumnarToRow (59) + : +- CometProject (58) + : +- CometBroadcastHashJoin (57) + : :- CometProject (55) + : : +- CometBroadcastHashJoin (54) + : : :- CometProject (52) + : : : +- CometBroadcastHashJoin (51) + : : : :- CometProject (49) + : : : : +- CometBroadcastHashJoin (48) + : : : : :- CometProject (46) + : : : : : +- CometSortMergeJoin (45) + : : : : : :- CometSort (42) + : : : : : : +- CometExchange (41) + : : : : : : +- CometFilter (40) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (39) + : : : : : +- CometSort (44) + : : : : : +- ReusedExchange (43) + : : : : +- ReusedExchange (47) + : : : +- ReusedExchange (50) + : : +- ReusedExchange (53) + : +- ReusedExchange (56) + +- * HashAggregate (64) + +- ReusedExchange (63) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(3) CometExchange +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7], [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST] + +(5) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(6) CometFilter +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(7) CometProject +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11], [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(8) CometExchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: hashpartitioning(sr_item_sk#8, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11], [sr_item_sk#8 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [ss_item_sk#1, ss_ticket_number#4], [sr_item_sk#8, sr_ticket_number#9], LeftOuter + +(11) CometProject +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11], [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] + +(12) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_date#14] +Arguments: [d_date_sk#13, d_date#14] + +(13) CometFilter +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 2000-08-23)) AND (d_date#14 <= 2000-09-22)) AND isnotnull(d_date_sk#13)) + +(14) CometProject +Input [2]: [d_date_sk#13, d_date#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(15) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(16) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#7], [d_date_sk#13], Inner, BuildRight + +(17) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11], [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] + +(18) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#15, s_store_id#16] +Arguments: [s_store_sk#15, s_store_id#16] + +(19) CometFilter +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) + +(20) CometBroadcastExchange +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: [s_store_sk#15, s_store_id#16] + +(21) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] +Right output [2]: [s_store_sk#15, s_store_id#16] +Arguments: [ss_store_sk#2], [s_store_sk#15], Inner, BuildRight + +(22) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_sk#15, s_store_id#16] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(23) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#17, i_current_price#18] +Arguments: [i_item_sk#17, i_current_price#18] + +(24) CometFilter +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) + +(25) CometProject +Input [2]: [i_item_sk#17, i_current_price#18] +Arguments: [i_item_sk#17], [i_item_sk#17] + +(26) CometBroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: [i_item_sk#17] + +(27) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Right output [1]: [i_item_sk#17] +Arguments: [ss_item_sk#1], [i_item_sk#17], Inner, BuildRight + +(28) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, i_item_sk#17] +Arguments: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(29) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [2]: [p_promo_sk#19, p_channel_tv#20] +Arguments: [p_promo_sk#19, p_channel_tv#20] + +(30) CometFilter +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) + +(31) CometProject +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Arguments: [p_promo_sk#19], [p_promo_sk#19] + +(32) CometBroadcastExchange +Input [1]: [p_promo_sk#19] +Arguments: [p_promo_sk#19] + +(33) CometBroadcastHashJoin +Left output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Right output [1]: [p_promo_sk#19] +Arguments: [ss_promo_sk#3], [p_promo_sk#19], Inner, BuildRight + +(34) CometProject +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, p_promo_sk#19] +Arguments: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(35) ColumnarToRow [codegen id : 1] +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(36) HashAggregate [codegen id : 1] +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Results [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] + +(37) Exchange +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(38) HashAggregate [codegen id : 2] +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#32, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#33] +Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#34, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#32 AS returns#35, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#33 AS profit#36, store channel AS channel#37, concat(store, s_store_id#16) AS id#38] + +(39) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(40) CometFilter +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41)) + +(41) CometExchange +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(42) CometSort +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45], [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST] + +(43) ReusedExchange [Reuses operator id: 8] +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] + +(44) CometSort +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49], [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST] + +(45) CometSortMergeJoin +Left output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Right output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cs_item_sk#40, cs_order_number#42], [cr_item_sk#46, cr_order_number#47], LeftOuter + +(46) CometProject +Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49], [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] + +(47) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#50] + +(48) CometBroadcastHashJoin +Left output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Right output [1]: [d_date_sk#50] +Arguments: [cs_sold_date_sk#45], [d_date_sk#50], Inner, BuildRight + +(49) CometProject +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#50] +Arguments: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49], [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49] + +(50) ReusedExchange [Reuses operator id: 20] +Output [2]: [cp_catalog_page_sk#51, cp_catalog_page_id#52] + +(51) CometBroadcastHashJoin +Left output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49] +Right output [2]: [cp_catalog_page_sk#51, cp_catalog_page_id#52] +Arguments: [cs_catalog_page_sk#39], [cp_catalog_page_sk#51], Inner, BuildRight + +(52) CometProject +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#51, cp_catalog_page_id#52] +Arguments: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52], [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52] + +(53) ReusedExchange [Reuses operator id: 26] +Output [1]: [i_item_sk#53] + +(54) CometBroadcastHashJoin +Left output [7]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52] +Right output [1]: [i_item_sk#53] +Arguments: [cs_item_sk#40], [i_item_sk#53], Inner, BuildRight + +(55) CometProject +Input [8]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52, i_item_sk#53] +Arguments: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52], [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52] + +(56) ReusedExchange [Reuses operator id: 32] +Output [1]: [p_promo_sk#54] + +(57) CometBroadcastHashJoin +Left output [6]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52] +Right output [1]: [p_promo_sk#54] +Arguments: [cs_promo_sk#41], [p_promo_sk#54], Inner, BuildRight + +(58) CometProject +Input [7]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52, p_promo_sk#54] +Arguments: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52], [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52] + +(59) ColumnarToRow [codegen id : 3] +Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52] + +(60) HashAggregate [codegen id : 3] +Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#52] +Keys [1]: [cp_catalog_page_id#52] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] +Results [6]: [cp_catalog_page_id#52, sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64] + +(61) Exchange +Input [6]: [cp_catalog_page_id#52, sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64] +Arguments: hashpartitioning(cp_catalog_page_id#52, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(62) HashAggregate [codegen id : 4] +Input [6]: [cp_catalog_page_id#52, sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64] +Keys [1]: [cp_catalog_page_id#52] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#65, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#66, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#67] +Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#65,17,2) AS sales#68, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#66 AS returns#69, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#67 AS profit#70, catalog channel AS channel#71, concat(catalog_page, cp_catalog_page_id#52) AS id#72] + +(63) ReusedExchange [Reuses operator id: 37] +Output [6]: [web_site_id#73, sum#74, sum#75, isEmpty#76, sum#77, isEmpty#78] + +(64) HashAggregate [codegen id : 6] +Input [6]: [web_site_id#73, sum#74, sum#75, isEmpty#76, sum#77, isEmpty#78] +Keys [1]: [web_site_id#73] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#79)), sum(coalesce(cast(wr_return_amt#80 as decimal(12,2)), 0.00)), sum((ws_net_profit#81 - coalesce(cast(wr_net_loss#82 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#79))#83, sum(coalesce(cast(wr_return_amt#80 as decimal(12,2)), 0.00))#84, sum((ws_net_profit#81 - coalesce(cast(wr_net_loss#82 as decimal(12,2)), 0.00)))#85] +Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#79))#83,17,2) AS sales#86, sum(coalesce(cast(wr_return_amt#80 as decimal(12,2)), 0.00))#84 AS returns#87, sum((ws_net_profit#81 - coalesce(cast(wr_net_loss#82 as decimal(12,2)), 0.00)))#85 AS profit#88, web channel AS channel#89, concat(web_site, web_site_id#73) AS id#90] + +(65) Union + +(66) Expand [codegen id : 7] +Input [5]: [sales#34, returns#35, profit#36, channel#37, id#38] +Arguments: [[sales#34, returns#35, profit#36, channel#37, id#38, 0], [sales#34, returns#35, profit#36, channel#37, null, 1], [sales#34, returns#35, profit#36, null, null, 3]], [sales#34, returns#35, profit#36, channel#91, id#92, spark_grouping_id#93] + +(67) HashAggregate [codegen id : 7] +Input [6]: [sales#34, returns#35, profit#36, channel#91, id#92, spark_grouping_id#93] +Keys [3]: [channel#91, id#92, spark_grouping_id#93] +Functions [3]: [partial_sum(sales#34), partial_sum(returns#35), partial_sum(profit#36)] +Aggregate Attributes [6]: [sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99] +Results [9]: [channel#91, id#92, spark_grouping_id#93, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(68) Exchange +Input [9]: [channel#91, id#92, spark_grouping_id#93, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Arguments: hashpartitioning(channel#91, id#92, spark_grouping_id#93, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(69) HashAggregate [codegen id : 8] +Input [9]: [channel#91, id#92, spark_grouping_id#93, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [3]: [channel#91, id#92, spark_grouping_id#93] +Functions [3]: [sum(sales#34), sum(returns#35), sum(profit#36)] +Aggregate Attributes [3]: [sum(sales#34)#106, sum(returns#35)#107, sum(profit#36)#108] +Results [5]: [channel#91, id#92, sum(sales#34)#106 AS sales#109, sum(returns#35)#107 AS returns#110, sum(profit#36)#108 AS profit#111] + +(70) TakeOrderedAndProject +Input [5]: [channel#91, id#92, sales#109, returns#110, profit#111] +Arguments: 100, [channel#91 ASC NULLS FIRST, id#92 ASC NULLS FIRST], [channel#91, id#92, sales#109, returns#110, profit#111] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q80.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q80.native_datafusion/simplified.txt new file mode 100644 index 0000000000..386f58333c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q80.native_datafusion/simplified.txt @@ -0,0 +1,84 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (8) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (7) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (2) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen (1) + HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id,p_promo_sk] + CometProject [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id,i_item_sk] + CometProject [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_sk,s_store_id] + CometProject [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss] + CometSortMergeJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometSort [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometExchange [ss_item_sk,ss_ticket_number] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometExchange [sr_item_sk,sr_ticket_number] #4 + CometProject [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometFilter [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk,s_store_id] #6 + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + CometBroadcastExchange [i_item_sk] #7 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_current_price] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price] + CometBroadcastExchange [p_promo_sk] #8 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_tv] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk,p_channel_tv] + WholeStageCodegen (4) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cp_catalog_page_id] #9 + WholeStageCodegen (3) + HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id,p_promo_sk] + CometProject [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id,i_item_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_sk,cp_catalog_page_id] + CometProject [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + CometBroadcastHashJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss,d_date_sk] + CometProject [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss] + CometSortMergeJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + CometSort [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometExchange [cs_item_sk,cs_order_number] #10 + CometFilter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + ReusedExchange [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] #4 + ReusedExchange [d_date_sk] #5 + ReusedExchange [cp_catalog_page_sk,cp_catalog_page_id] #6 + ReusedExchange [i_item_sk] #7 + ReusedExchange [p_promo_sk] #8 + WholeStageCodegen (6) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [web_site_id,sum,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q81.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q81.native_datafusion/explain.txt new file mode 100644 index 0000000000..c412d20fc7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q81.native_datafusion/explain.txt @@ -0,0 +1,275 @@ +== Physical Plan == +TakeOrderedAndProject (50) ++- * Project (49) + +- * BroadcastHashJoin Inner BuildRight (48) + :- * Project (43) + : +- * BroadcastHashJoin Inner BuildRight (42) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (18) + : : : +- * HashAggregate (17) + : : : +- Exchange (16) + : : : +- * HashAggregate (15) + : : : +- * ColumnarToRow (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (9) + : : +- BroadcastExchange (35) + : : +- * Filter (34) + : : +- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * HashAggregate (30) + : : +- Exchange (29) + : : +- * HashAggregate (28) + : : +- * ColumnarToRow (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometProject (23) + : : : +- CometBroadcastHashJoin (22) + : : : :- CometFilter (20) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (19) + : : : +- ReusedExchange (21) + : : +- ReusedExchange (24) + : +- BroadcastExchange (41) + : +- * ColumnarToRow (40) + : +- CometFilter (39) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (38) + +- BroadcastExchange (47) + +- * ColumnarToRow (46) + +- CometFilter (45) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (44) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Arguments: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] + +(2) CometFilter +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Condition : (isnotnull(cr_returning_addr_sk#2) AND isnotnull(cr_returning_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5, d_year#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_year#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [cr_returned_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4, d_date_sk#5] +Arguments: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3], [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#7, ca_state#8] +Arguments: [ca_address_sk#7, ca_state#8] + +(10) CometFilter +Input [2]: [ca_address_sk#7, ca_state#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) + +(11) CometBroadcastExchange +Input [2]: [ca_address_sk#7, ca_state#8] +Arguments: [ca_address_sk#7, ca_state#8] + +(12) CometBroadcastHashJoin +Left output [3]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3] +Right output [2]: [ca_address_sk#7, ca_state#8] +Arguments: [cr_returning_addr_sk#2], [ca_address_sk#7], Inner, BuildRight + +(13) CometProject +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, ca_address_sk#7, ca_state#8] +Arguments: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8], [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] + +(14) ColumnarToRow [codegen id : 1] +Input [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] + +(15) HashAggregate [codegen id : 1] +Input [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#3))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] + +(16) Exchange +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] +Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(17) HashAggregate [codegen id : 7] +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))#11] +Results [3]: [cr_returning_customer_sk#1 AS ctr_customer_sk#12, ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#3))#11,17,2) AS ctr_total_return#14] + +(18) Filter [codegen id : 7] +Input [3]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14] +Condition : isnotnull(ctr_total_return#14) + +(19) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [4]: [cr_returning_customer_sk#15, cr_returning_addr_sk#16, cr_return_amt_inc_tax#17, cr_returned_date_sk#18] +Arguments: [cr_returning_customer_sk#15, cr_returning_addr_sk#16, cr_return_amt_inc_tax#17, cr_returned_date_sk#18] + +(20) CometFilter +Input [4]: [cr_returning_customer_sk#15, cr_returning_addr_sk#16, cr_return_amt_inc_tax#17, cr_returned_date_sk#18] +Condition : isnotnull(cr_returning_addr_sk#16) + +(21) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#19] + +(22) CometBroadcastHashJoin +Left output [4]: [cr_returning_customer_sk#15, cr_returning_addr_sk#16, cr_return_amt_inc_tax#17, cr_returned_date_sk#18] +Right output [1]: [d_date_sk#19] +Arguments: [cr_returned_date_sk#18], [d_date_sk#19], Inner, BuildRight + +(23) CometProject +Input [5]: [cr_returning_customer_sk#15, cr_returning_addr_sk#16, cr_return_amt_inc_tax#17, cr_returned_date_sk#18, d_date_sk#19] +Arguments: [cr_returning_customer_sk#15, cr_returning_addr_sk#16, cr_return_amt_inc_tax#17], [cr_returning_customer_sk#15, cr_returning_addr_sk#16, cr_return_amt_inc_tax#17] + +(24) ReusedExchange [Reuses operator id: 11] +Output [2]: [ca_address_sk#20, ca_state#21] + +(25) CometBroadcastHashJoin +Left output [3]: [cr_returning_customer_sk#15, cr_returning_addr_sk#16, cr_return_amt_inc_tax#17] +Right output [2]: [ca_address_sk#20, ca_state#21] +Arguments: [cr_returning_addr_sk#16], [ca_address_sk#20], Inner, BuildRight + +(26) CometProject +Input [5]: [cr_returning_customer_sk#15, cr_returning_addr_sk#16, cr_return_amt_inc_tax#17, ca_address_sk#20, ca_state#21] +Arguments: [cr_returning_customer_sk#15, cr_return_amt_inc_tax#17, ca_state#21], [cr_returning_customer_sk#15, cr_return_amt_inc_tax#17, ca_state#21] + +(27) ColumnarToRow [codegen id : 2] +Input [3]: [cr_returning_customer_sk#15, cr_return_amt_inc_tax#17, ca_state#21] + +(28) HashAggregate [codegen id : 2] +Input [3]: [cr_returning_customer_sk#15, cr_return_amt_inc_tax#17, ca_state#21] +Keys [2]: [cr_returning_customer_sk#15, ca_state#21] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#17))] +Aggregate Attributes [1]: [sum#22] +Results [3]: [cr_returning_customer_sk#15, ca_state#21, sum#23] + +(29) Exchange +Input [3]: [cr_returning_customer_sk#15, ca_state#21, sum#23] +Arguments: hashpartitioning(cr_returning_customer_sk#15, ca_state#21, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(30) HashAggregate [codegen id : 3] +Input [3]: [cr_returning_customer_sk#15, ca_state#21, sum#23] +Keys [2]: [cr_returning_customer_sk#15, ca_state#21] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#17))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#17))#11] +Results [2]: [ca_state#21 AS ctr_state#24, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#17))#11,17,2) AS ctr_total_return#25] + +(31) HashAggregate [codegen id : 3] +Input [2]: [ctr_state#24, ctr_total_return#25] +Keys [1]: [ctr_state#24] +Functions [1]: [partial_avg(ctr_total_return#25)] +Aggregate Attributes [2]: [sum#26, count#27] +Results [3]: [ctr_state#24, sum#28, count#29] + +(32) Exchange +Input [3]: [ctr_state#24, sum#28, count#29] +Arguments: hashpartitioning(ctr_state#24, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(33) HashAggregate [codegen id : 4] +Input [3]: [ctr_state#24, sum#28, count#29] +Keys [1]: [ctr_state#24] +Functions [1]: [avg(ctr_total_return#25)] +Aggregate Attributes [1]: [avg(ctr_total_return#25)#30] +Results [2]: [(avg(ctr_total_return#25)#30 * 1.2) AS (avg(ctr_total_return) * 1.2)#31, ctr_state#24] + +(34) Filter [codegen id : 4] +Input [2]: [(avg(ctr_total_return) * 1.2)#31, ctr_state#24] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#31) + +(35) BroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#31, ctr_state#24] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=4] + +(36) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ctr_state#13] +Right keys [1]: [ctr_state#24] +Join type: Inner +Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#31) + +(37) Project [codegen id : 7] +Output [2]: [ctr_customer_sk#12, ctr_total_return#14] +Input [5]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14, (avg(ctr_total_return) * 1.2)#31, ctr_state#24] + +(38) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [6]: [c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37] +Arguments: [c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37] + +(39) CometFilter +Input [6]: [c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37] +Condition : (isnotnull(c_customer_sk#32) AND isnotnull(c_current_addr_sk#34)) + +(40) ColumnarToRow [codegen id : 5] +Input [6]: [c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37] + +(41) BroadcastExchange +Input [6]: [c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(42) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ctr_customer_sk#12] +Right keys [1]: [c_customer_sk#32] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 7] +Output [6]: [ctr_total_return#14, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37] +Input [8]: [ctr_customer_sk#12, ctr_total_return#14, c_customer_sk#32, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37] + +(44) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [12]: [ca_address_sk#38, ca_street_number#39, ca_street_name#40, ca_street_type#41, ca_suite_number#42, ca_city#43, ca_county#44, ca_state#45, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] +Arguments: [ca_address_sk#38, ca_street_number#39, ca_street_name#40, ca_street_type#41, ca_suite_number#42, ca_city#43, ca_county#44, ca_state#45, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] + +(45) CometFilter +Input [12]: [ca_address_sk#38, ca_street_number#39, ca_street_name#40, ca_street_type#41, ca_suite_number#42, ca_city#43, ca_county#44, ca_state#45, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] +Condition : ((isnotnull(ca_state#45) AND (ca_state#45 = GA)) AND isnotnull(ca_address_sk#38)) + +(46) ColumnarToRow [codegen id : 6] +Input [12]: [ca_address_sk#38, ca_street_number#39, ca_street_name#40, ca_street_type#41, ca_suite_number#42, ca_city#43, ca_county#44, ca_state#45, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] + +(47) BroadcastExchange +Input [12]: [ca_address_sk#38, ca_street_number#39, ca_street_name#40, ca_street_type#41, ca_suite_number#42, ca_city#43, ca_county#44, ca_state#45, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(48) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#34] +Right keys [1]: [ca_address_sk#38] +Join type: Inner +Join condition: None + +(49) Project [codegen id : 7] +Output [16]: [c_customer_id#33, c_salutation#35, c_first_name#36, c_last_name#37, ca_street_number#39, ca_street_name#40, ca_street_type#41, ca_suite_number#42, ca_city#43, ca_county#44, ca_state#45, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49, ctr_total_return#14] +Input [18]: [ctr_total_return#14, c_customer_id#33, c_current_addr_sk#34, c_salutation#35, c_first_name#36, c_last_name#37, ca_address_sk#38, ca_street_number#39, ca_street_name#40, ca_street_type#41, ca_suite_number#42, ca_city#43, ca_county#44, ca_state#45, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49] + +(50) TakeOrderedAndProject +Input [16]: [c_customer_id#33, c_salutation#35, c_first_name#36, c_last_name#37, ca_street_number#39, ca_street_name#40, ca_street_type#41, ca_suite_number#42, ca_city#43, ca_county#44, ca_state#45, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49, ctr_total_return#14] +Arguments: 100, [c_customer_id#33 ASC NULLS FIRST, c_salutation#35 ASC NULLS FIRST, c_first_name#36 ASC NULLS FIRST, c_last_name#37 ASC NULLS FIRST, ca_street_number#39 ASC NULLS FIRST, ca_street_name#40 ASC NULLS FIRST, ca_street_type#41 ASC NULLS FIRST, ca_suite_number#42 ASC NULLS FIRST, ca_city#43 ASC NULLS FIRST, ca_county#44 ASC NULLS FIRST, ca_state#45 ASC NULLS FIRST, ca_zip#46 ASC NULLS FIRST, ca_country#47 ASC NULLS FIRST, ca_gmt_offset#48 ASC NULLS FIRST, ca_location_type#49 ASC NULLS FIRST, ctr_total_return#14 ASC NULLS FIRST], [c_customer_id#33, c_salutation#35, c_first_name#36, c_last_name#37, ca_street_number#39, ca_street_name#40, ca_street_type#41, ca_suite_number#42, ca_city#43, ca_county#44, ca_state#45, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49, ctr_total_return#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q81.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q81.native_datafusion/simplified.txt new file mode 100644 index 0000000000..e5c7a02f63 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q81.native_datafusion/simplified.txt @@ -0,0 +1,67 @@ +TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + WholeStageCodegen (7) + Project [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(avg(ctr_total_return) * 1.2)] + Filter [ctr_total_return] + HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_customer_sk,ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [cr_returning_customer_sk,ca_state] #1 + WholeStageCodegen (1) + HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,ca_address_sk,ca_state] + CometProject [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk,d_date_sk] + CometFilter [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [ca_address_sk,ca_state] #3 + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [(avg(ctr_total_return) * 1.2)] + HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),sum,count] + InputAdapter + Exchange [ctr_state] #5 + WholeStageCodegen (3) + HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] + HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [cr_returning_customer_sk,ca_state] #6 + WholeStageCodegen (2) + HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,ca_address_sk,ca_state] + CometProject [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + CometBroadcastHashJoin [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk,d_date_sk] + CometFilter [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + ReusedExchange [d_date_sk] #2 + ReusedExchange [ca_address_sk,ca_state] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometFilter [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q82.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q82.native_datafusion/explain.txt new file mode 100644 index 0000000000..fdcea86558 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q82.native_datafusion/explain.txt @@ -0,0 +1,137 @@ +== Physical Plan == +* ColumnarToRow (26) ++- CometTakeOrderedAndProject (25) + +- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometBroadcastHashJoin (20) + :- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : +- CometBroadcastExchange (7) + : : +- CometProject (6) + : : +- CometFilter (5) + : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (4) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (10) + +- CometProject (19) + +- CometFilter (18) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (17) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(2) CometFilter +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 62.00)) AND (i_current_price#4 <= 92.00)) AND i_manufact_id#5 IN (129,270,821,423)) AND isnotnull(i_item_sk#1)) + +(3) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(4) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(5) CometFilter +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) + +(6) CometProject +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8], [inv_item_sk#6, inv_date_sk#8] + +(7) CometBroadcastExchange +Input [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [inv_item_sk#6, inv_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1], [inv_item_sk#6], Inner, BuildRight + +(9) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] + +(10) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9, d_date#10] + +(11) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-05-25)) AND (d_date#10 <= 2000-07-24)) AND isnotnull(d_date_sk#9)) + +(12) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(14) CometBroadcastHashJoin +Left output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [inv_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(15) CometProject +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#9] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(16) CometBroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] + +(17) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#11, ss_sold_date_sk#12] + +(18) CometFilter +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_item_sk#11) + +(19) CometProject +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#11], [ss_item_sk#11] + +(20) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Right output [1]: [ss_item_sk#11] +Arguments: [i_item_sk#1], [ss_item_sk#11], Inner, BuildLeft + +(21) CometProject +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#11] +Arguments: [i_item_id#2, i_item_desc#3, i_current_price#4], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(22) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(23) CometExchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] + +(25) CometTakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#2 ASC NULLS FIRST], output=[i_item_id#2,i_item_desc#3,i_current_price#4]), [i_item_id#2, i_item_desc#3, i_current_price#4], 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + +(26) ColumnarToRow [codegen id : 1] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q82.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q82.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b8535baaf2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q82.native_datafusion/simplified.txt @@ -0,0 +1,28 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometExchange [i_item_id,i_item_desc,i_current_price] #1 + CometHashAggregate [i_item_id,i_item_desc,i_current_price] + CometProject [i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,ss_item_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price] #2 + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk,d_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + CometBroadcastHashJoin [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_item_sk,inv_date_sk] + CometProject [i_item_sk,i_item_id,i_item_desc,i_current_price] + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + CometBroadcastExchange [inv_item_sk,inv_date_sk] #3 + CometProject [inv_item_sk,inv_date_sk] + CometFilter [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometProject [ss_item_sk] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q83.ansi.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q83.ansi.native_datafusion/explain.txt new file mode 100644 index 0000000000..b71044ea66 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q83.ansi.native_datafusion/explain.txt @@ -0,0 +1,192 @@ +== Physical Plan == +TakeOrderedAndProject (35) ++- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Project (31) + : +- * BroadcastHashJoin Inner BuildRight (30) + : :- * HashAggregate (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- * ColumnarToRow (23) + : : +- CometProject (22) + : : +- CometBroadcastHashJoin (21) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : +- CometBroadcastExchange (20) + : : +- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (17) + : : +- CometProject (16) + : : +- CometBroadcastHashJoin (15) + : : :- CometNativeScan: `spark_catalog`.`default`.`date_dim` (10) + : : +- CometBroadcastExchange (14) + : : +- CometProject (13) + : : +- CometFilter (12) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (11) + : +- BroadcastExchange (29) + : +- * HashAggregate (28) + : +- ReusedExchange (27) + +- ReusedExchange (32) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Arguments: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] + +(2) CometFilter +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Condition : isnotnull(sr_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#4, i_item_id#5] +Arguments: [i_item_sk#4, i_item_id#5] + +(4) CometFilter +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(5) CometBroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: [i_item_sk#4, i_item_id#5] + +(6) CometBroadcastHashJoin +Left output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Right output [2]: [i_item_sk#4, i_item_id#5] +Arguments: [sr_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [5]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, i_item_sk#4, i_item_id#5] +Arguments: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5], [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#6, d_date#7] +Arguments: [d_date_sk#6, d_date#7] + +(9) CometFilter +Input [2]: [d_date_sk#6, d_date#7] +Condition : isnotnull(d_date_sk#6) + +(10) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date#8, d_week_seq#9] +Arguments: [d_date#8, d_week_seq#9] + +(11) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date#10, d_week_seq#11] +Arguments: [d_date#10, d_week_seq#11] + +(12) CometFilter +Input [2]: [d_date#10, d_week_seq#11] +Condition : d_date#10 IN (2000-06-30,2000-09-27,2000-11-17) + +(13) CometProject +Input [2]: [d_date#10, d_week_seq#11] +Arguments: [d_week_seq#11], [d_week_seq#11] + +(14) CometBroadcastExchange +Input [1]: [d_week_seq#11] +Arguments: [d_week_seq#11] + +(15) CometBroadcastHashJoin +Left output [2]: [d_date#8, d_week_seq#9] +Right output [1]: [d_week_seq#11] +Arguments: [d_week_seq#9], [d_week_seq#11], LeftSemi, BuildRight + +(16) CometProject +Input [2]: [d_date#8, d_week_seq#9] +Arguments: [d_date#8], [d_date#8] + +(17) CometBroadcastExchange +Input [1]: [d_date#8] +Arguments: [d_date#8] + +(18) CometBroadcastHashJoin +Left output [2]: [d_date_sk#6, d_date#7] +Right output [1]: [d_date#8] +Arguments: [d_date#7], [d_date#8], LeftSemi, BuildRight + +(19) CometProject +Input [2]: [d_date_sk#6, d_date#7] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(20) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(21) CometBroadcastHashJoin +Left output [3]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5] +Right output [1]: [d_date_sk#6] +Arguments: [sr_returned_date_sk#3], [d_date_sk#6], Inner, BuildRight + +(22) CometProject +Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5, d_date_sk#6] +Arguments: [sr_return_quantity#2, i_item_id#5], [sr_return_quantity#2, i_item_id#5] + +(23) ColumnarToRow [codegen id : 1] +Input [2]: [sr_return_quantity#2, i_item_id#5] + +(24) HashAggregate [codegen id : 1] +Input [2]: [sr_return_quantity#2, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(sr_return_quantity#2)] +Aggregate Attributes [1]: [sum#12] +Results [2]: [i_item_id#5, sum#13] + +(25) Exchange +Input [2]: [i_item_id#5, sum#13] +Arguments: hashpartitioning(i_item_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(26) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#5, sum#13] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(sr_return_quantity#2)] +Aggregate Attributes [1]: [sum(sr_return_quantity#2)#14] +Results [2]: [i_item_id#5 AS item_id#15, sum(sr_return_quantity#2)#14 AS sr_item_qty#16] + +(27) ReusedExchange [Reuses operator id: 25] +Output [2]: [i_item_id#17, sum#18] + +(28) HashAggregate [codegen id : 3] +Input [2]: [i_item_id#17, sum#18] +Keys [1]: [i_item_id#17] +Functions [1]: [sum(cr_return_quantity#19)] +Aggregate Attributes [1]: [sum(cr_return_quantity#19)#20] +Results [2]: [i_item_id#17 AS item_id#21, sum(cr_return_quantity#19)#20 AS cr_item_qty#22] + +(29) BroadcastExchange +Input [2]: [item_id#21, cr_item_qty#22] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [item_id#15] +Right keys [1]: [item_id#21] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [3]: [item_id#15, sr_item_qty#16, cr_item_qty#22] +Input [4]: [item_id#15, sr_item_qty#16, item_id#21, cr_item_qty#22] + +(32) ReusedExchange [Reuses operator id: 29] +Output [2]: [item_id#23, wr_item_qty#24] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [item_id#15] +Right keys [1]: [item_id#23] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 6] +Output [8]: [item_id#15, sr_item_qty#16, (((cast(sr_item_qty#16 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(((sr_item_qty#16 + cr_item_qty#22) + wr_item_qty#24) as double)))) / 3.0) * 100.0) AS sr_dev#25, cr_item_qty#22, (((cast(cr_item_qty#22 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(((sr_item_qty#16 + cr_item_qty#22) + wr_item_qty#24) as double)))) / 3.0) * 100.0) AS cr_dev#26, wr_item_qty#24, (((cast(wr_item_qty#24 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(((sr_item_qty#16 + cr_item_qty#22) + wr_item_qty#24) as double)))) / 3.0) * 100.0) AS wr_dev#27, (cast(((sr_item_qty#16 + cr_item_qty#22) + wr_item_qty#24) as decimal(20,0)) / 3.0) AS average#28] +Input [5]: [item_id#15, sr_item_qty#16, cr_item_qty#22, item_id#23, wr_item_qty#24] + +(35) TakeOrderedAndProject +Input [8]: [item_id#15, sr_item_qty#16, sr_dev#25, cr_item_qty#22, cr_dev#26, wr_item_qty#24, wr_dev#27, average#28] +Arguments: 100, [item_id#15 ASC NULLS FIRST, sr_item_qty#16 ASC NULLS FIRST], [item_id#15, sr_item_qty#16, sr_dev#25, cr_item_qty#22, cr_dev#26, wr_item_qty#24, wr_dev#27, average#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q83.ansi.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q83.ansi.native_datafusion/simplified.txt new file mode 100644 index 0000000000..81c23c07c4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q83.ansi.native_datafusion/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] + WholeStageCodegen (6) + Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] + BroadcastHashJoin [item_id,item_id] + Project [item_id,sr_item_qty,cr_item_qty] + BroadcastHashJoin [item_id,item_id] + HashAggregate [i_item_id,sum] [sum(sr_return_quantity),item_id,sr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (1) + HashAggregate [i_item_id,sr_return_quantity] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [sr_return_quantity,i_item_id] + CometBroadcastHashJoin [sr_return_quantity,sr_returned_date_sk,i_item_id,d_date_sk] + CometProject [sr_return_quantity,sr_returned_date_sk,i_item_id] + CometBroadcastHashJoin [sr_item_sk,sr_return_quantity,sr_returned_date_sk,i_item_sk,i_item_id] + CometFilter [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id] #2 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometBroadcastHashJoin [d_date_sk,d_date,d_date] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [d_date] #4 + CometProject [d_date] + CometBroadcastHashJoin [d_date,d_week_seq,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date,d_week_seq] + CometBroadcastExchange [d_week_seq] #5 + CometProject [d_week_seq] + CometFilter [d_date,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date,d_week_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + HashAggregate [i_item_id,sum] [sum(cr_return_quantity),item_id,cr_item_qty,sum] + InputAdapter + ReusedExchange [i_item_id,sum] #1 + InputAdapter + ReusedExchange [item_id,wr_item_qty] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q84.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q84.native_datafusion/explain.txt new file mode 100644 index 0000000000..37a2d3f738 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q84.native_datafusion/explain.txt @@ -0,0 +1,167 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * ColumnarToRow (30) + +- CometBroadcastHashJoin (29) + :- CometBroadcastExchange (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (9) + : : +- CometBroadcastExchange (16) + : : +- CometFilter (15) + : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (14) + : +- CometBroadcastExchange (22) + : +- CometProject (21) + : +- CometFilter (20) + : +- CometNativeScan: `spark_catalog`.`default`.`income_band` (19) + +- CometProject (28) + +- CometFilter (27) + +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (26) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Arguments: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] + +(2) CometFilter +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#7, ca_city#8] +Arguments: [ca_address_sk#7, ca_city#8] + +(4) CometFilter +Input [2]: [ca_address_sk#7, ca_city#8] +Condition : ((isnotnull(ca_city#8) AND (ca_city#8 = Edgewood)) AND isnotnull(ca_address_sk#7)) + +(5) CometProject +Input [2]: [ca_address_sk#7, ca_city#8] +Arguments: [ca_address_sk#7], [ca_address_sk#7] + +(6) CometBroadcastExchange +Input [1]: [ca_address_sk#7] +Arguments: [ca_address_sk#7] + +(7) CometBroadcastHashJoin +Left output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Right output [1]: [ca_address_sk#7] +Arguments: [c_current_addr_sk#4], [ca_address_sk#7], Inner, BuildRight + +(8) CometProject +Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] +Arguments: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6], [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(10) CometFilter +Input [1]: [cd_demo_sk#9] +Condition : isnotnull(cd_demo_sk#9) + +(11) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(12) CometBroadcastHashJoin +Left output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] +Right output [1]: [cd_demo_sk#9] +Arguments: [c_current_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(13) CometProject +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Arguments: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9], [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(14) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [hd_demo_sk#10, hd_income_band_sk#11] + +(15) CometFilter +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Condition : (isnotnull(hd_demo_sk#10) AND isnotnull(hd_income_band_sk#11)) + +(16) CometBroadcastExchange +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [hd_demo_sk#10, hd_income_band_sk#11] + +(17) CometBroadcastHashJoin +Left output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Right output [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [c_current_hdemo_sk#3], [hd_demo_sk#10], Inner, BuildRight + +(18) CometProject +Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_demo_sk#10, hd_income_band_sk#11] +Arguments: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11], [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11] + +(19) CometNativeScan: `spark_catalog`.`default`.`income_band` +Output [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Arguments: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] + +(20) CometFilter +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Condition : ((((isnotnull(ib_lower_bound#13) AND isnotnull(ib_upper_bound#14)) AND (ib_lower_bound#13 >= 38128)) AND (ib_upper_bound#14 <= 88128)) AND isnotnull(ib_income_band_sk#12)) + +(21) CometProject +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Arguments: [ib_income_band_sk#12], [ib_income_band_sk#12] + +(22) CometBroadcastExchange +Input [1]: [ib_income_band_sk#12] +Arguments: [ib_income_band_sk#12] + +(23) CometBroadcastHashJoin +Left output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11] +Right output [1]: [ib_income_band_sk#12] +Arguments: [hd_income_band_sk#11], [ib_income_band_sk#12], Inner, BuildRight + +(24) CometProject +Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11, ib_income_band_sk#12] +Arguments: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9], [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(25) CometBroadcastExchange +Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Arguments: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(26) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Arguments: [sr_cdemo_sk#15, sr_returned_date_sk#16] + +(27) CometFilter +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Condition : isnotnull(sr_cdemo_sk#15) + +(28) CometProject +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Arguments: [sr_cdemo_sk#15], [sr_cdemo_sk#15] + +(29) CometBroadcastHashJoin +Left output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Right output [1]: [sr_cdemo_sk#15] +Arguments: [cd_demo_sk#9], [sr_cdemo_sk#15], Inner, BuildLeft + +(30) ColumnarToRow [codegen id : 1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, sr_cdemo_sk#15] + +(31) Project [codegen id : 1] +Output [3]: [c_customer_id#1 AS customer_id#17, concat(c_last_name#6, , , c_first_name#5) AS customername#18, c_customer_id#1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, sr_cdemo_sk#15] + +(32) TakeOrderedAndProject +Input [3]: [customer_id#17, customername#18, c_customer_id#1] +Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#17, customername#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q84.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q84.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6fd5331780 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q84.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [c_customer_id,customer_id,customername] + WholeStageCodegen (1) + Project [c_customer_id,c_last_name,c_first_name] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,cd_demo_sk,sr_cdemo_sk] + CometBroadcastExchange [c_customer_id,c_first_name,c_last_name,cd_demo_sk] #1 + CometProject [c_customer_id,c_first_name,c_last_name,cd_demo_sk] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk,ib_income_band_sk] + CometProject [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk] + CometBroadcastHashJoin [c_customer_id,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk,hd_demo_sk,hd_income_band_sk] + CometProject [c_customer_id,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk] + CometBroadcastHashJoin [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk] + CometProject [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_first_name,c_last_name] + CometBroadcastHashJoin [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name,ca_address_sk] + CometFilter [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name] + CometBroadcastExchange [ca_address_sk] #2 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_city] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_city] + CometBroadcastExchange [cd_demo_sk] #3 + CometFilter [cd_demo_sk] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk] + CometBroadcastExchange [hd_demo_sk,hd_income_band_sk] #4 + CometFilter [hd_demo_sk,hd_income_band_sk] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_income_band_sk] + CometBroadcastExchange [ib_income_band_sk] #5 + CometProject [ib_income_band_sk] + CometFilter [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + CometNativeScan: `spark_catalog`.`default`.`income_band` [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + CometProject [sr_cdemo_sk] + CometFilter [sr_cdemo_sk,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_cdemo_sk,sr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q85.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q85.native_datafusion/explain.txt new file mode 100644 index 0000000000..8f7309cd1c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q85.native_datafusion/explain.txt @@ -0,0 +1,240 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * HashAggregate (44) + +- Exchange (43) + +- * HashAggregate (42) + +- * ColumnarToRow (41) + +- CometProject (40) + +- CometBroadcastHashJoin (39) + :- CometProject (35) + : +- CometBroadcastHashJoin (34) + : :- CometProject (29) + : : +- CometBroadcastHashJoin (28) + : : :- CometProject (23) + : : : +- CometBroadcastHashJoin (22) + : : : :- CometProject (18) + : : : : +- CometBroadcastHashJoin (17) + : : : : :- CometProject (13) + : : : : : +- CometBroadcastHashJoin (12) + : : : : : :- CometProject (8) + : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : :- CometBroadcastExchange (3) + : : : : : : : +- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : : : : +- CometProject (6) + : : : : : : +- CometFilter (5) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (4) + : : : : : +- CometBroadcastExchange (11) + : : : : : +- CometFilter (10) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_page` (9) + : : : : +- CometBroadcastExchange (16) + : : : : +- CometFilter (15) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (14) + : : : +- CometBroadcastExchange (21) + : : : +- CometFilter (20) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (19) + : : +- CometBroadcastExchange (27) + : : +- CometProject (26) + : : +- CometFilter (25) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (24) + : +- CometBroadcastExchange (33) + : +- CometProject (32) + : +- CometFilter (31) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (30) + +- CometBroadcastExchange (38) + +- CometFilter (37) + +- CometNativeScan: `spark_catalog`.`default`.`reason` (36) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Condition : ((((isnotnull(ws_item_sk#1) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_web_page_sk#2)) AND ((((ws_sales_price#5 >= 100.00) AND (ws_sales_price#5 <= 150.00)) OR ((ws_sales_price#5 >= 50.00) AND (ws_sales_price#5 <= 100.00))) OR ((ws_sales_price#5 >= 150.00) AND (ws_sales_price#5 <= 200.00)))) AND ((((ws_net_profit#6 >= 100.00) AND (ws_net_profit#6 <= 200.00)) OR ((ws_net_profit#6 >= 150.00) AND (ws_net_profit#6 <= 300.00))) OR ((ws_net_profit#6 >= 50.00) AND (ws_net_profit#6 <= 250.00)))) + +(3) CometBroadcastExchange +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] + +(4) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Arguments: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] + +(5) CometFilter +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Condition : (((((isnotnull(wr_item_sk#8) AND isnotnull(wr_order_number#13)) AND isnotnull(wr_refunded_cdemo_sk#9)) AND isnotnull(wr_returning_cdemo_sk#11)) AND isnotnull(wr_refunded_addr_sk#10)) AND isnotnull(wr_reason_sk#12)) + +(6) CometProject +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Arguments: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15], [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] + +(7) CometBroadcastHashJoin +Left output [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Right output [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Arguments: [ws_item_sk#1, ws_order_number#3], [wr_item_sk#8, wr_order_number#13], Inner, BuildLeft + +(8) CometProject +Input [15]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Arguments: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(9) CometNativeScan: `spark_catalog`.`default`.`web_page` +Output [1]: [wp_web_page_sk#17] +Arguments: [wp_web_page_sk#17] + +(10) CometFilter +Input [1]: [wp_web_page_sk#17] +Condition : isnotnull(wp_web_page_sk#17) + +(11) CometBroadcastExchange +Input [1]: [wp_web_page_sk#17] +Arguments: [wp_web_page_sk#17] + +(12) CometBroadcastHashJoin +Left output [11]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [1]: [wp_web_page_sk#17] +Arguments: [ws_web_page_sk#2], [wp_web_page_sk#17], Inner, BuildRight + +(13) CometProject +Input [12]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, wp_web_page_sk#17] +Arguments: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(14) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(15) CometFilter +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Condition : (((isnotnull(cd_demo_sk#18) AND isnotnull(cd_marital_status#19)) AND isnotnull(cd_education_status#20)) AND ((((cd_marital_status#19 = M) AND (cd_education_status#20 = Advanced Degree )) OR ((cd_marital_status#19 = S) AND (cd_education_status#20 = College ))) OR ((cd_marital_status#19 = W) AND (cd_education_status#20 = 2 yr Degree )))) + +(16) CometBroadcastExchange +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(17) CometBroadcastHashJoin +Left output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [wr_refunded_cdemo_sk#9], [cd_demo_sk#18], Inner, ((((((cd_marital_status#19 = M) AND (cd_education_status#20 = Advanced Degree )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#19 = S) AND (cd_education_status#20 = College )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#19 = W) AND (cd_education_status#20 = 2 yr Degree )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00))), BuildRight + +(18) CometProject +Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20], [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20] + +(19) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + +(20) CometFilter +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Condition : ((isnotnull(cd_demo_sk#21) AND isnotnull(cd_marital_status#22)) AND isnotnull(cd_education_status#23)) + +(21) CometBroadcastExchange +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + +(22) CometBroadcastHashJoin +Left output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20] +Right output [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [wr_returning_cdemo_sk#11, cd_marital_status#19, cd_education_status#20], [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23], Inner, BuildRight + +(23) CometProject +Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20, cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Arguments: [ca_address_sk#24, ca_state#25, ca_country#26] + +(25) CometFilter +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Condition : (((isnotnull(ca_country#26) AND (ca_country#26 = United States)) AND isnotnull(ca_address_sk#24)) AND ((ca_state#25 IN (IN,OH,NJ) OR ca_state#25 IN (WI,CT,KY)) OR ca_state#25 IN (LA,IA,AR))) + +(26) CometProject +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Arguments: [ca_address_sk#24, ca_state#25], [ca_address_sk#24, ca_state#25] + +(27) CometBroadcastExchange +Input [2]: [ca_address_sk#24, ca_state#25] +Arguments: [ca_address_sk#24, ca_state#25] + +(28) CometBroadcastHashJoin +Left output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [2]: [ca_address_sk#24, ca_state#25] +Arguments: [wr_refunded_addr_sk#10], [ca_address_sk#24], Inner, ((((ca_state#25 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#25 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#25 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00))), BuildRight + +(29) CometProject +Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, ca_address_sk#24, ca_state#25] +Arguments: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(30) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#27, d_year#28] +Arguments: [d_date_sk#27, d_year#28] + +(31) CometFilter +Input [2]: [d_date_sk#27, d_year#28] +Condition : ((isnotnull(d_year#28) AND (d_year#28 = 2000)) AND isnotnull(d_date_sk#27)) + +(32) CometProject +Input [2]: [d_date_sk#27, d_year#28] +Arguments: [d_date_sk#27], [d_date_sk#27] + +(33) CometBroadcastExchange +Input [1]: [d_date_sk#27] +Arguments: [d_date_sk#27] + +(34) CometBroadcastHashJoin +Left output [5]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [1]: [d_date_sk#27] +Arguments: [ws_sold_date_sk#7], [d_date_sk#27], Inner, BuildRight + +(35) CometProject +Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, d_date_sk#27] +Arguments: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15], [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] + +(36) CometNativeScan: `spark_catalog`.`default`.`reason` +Output [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: [r_reason_sk#29, r_reason_desc#30] + +(37) CometFilter +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Condition : isnotnull(r_reason_sk#29) + +(38) CometBroadcastExchange +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: [r_reason_sk#29, r_reason_desc#30] + +(39) CometBroadcastHashJoin +Left output [4]: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Right output [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: [wr_reason_sk#12], [r_reason_sk#29], Inner, BuildRight + +(40) CometProject +Input [6]: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, r_reason_sk#29, r_reason_desc#30] +Arguments: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30], [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] + +(41) ColumnarToRow [codegen id : 1] +Input [4]: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] + +(42) HashAggregate [codegen id : 1] +Input [4]: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] +Keys [1]: [r_reason_desc#30] +Functions [3]: [partial_avg(ws_quantity#4), partial_avg(UnscaledValue(wr_refunded_cash#15)), partial_avg(UnscaledValue(wr_fee#14))] +Aggregate Attributes [6]: [sum#31, count#32, sum#33, count#34, sum#35, count#36] +Results [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] + +(43) Exchange +Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Arguments: hashpartitioning(r_reason_desc#30, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(44) HashAggregate [codegen id : 2] +Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Keys [1]: [r_reason_desc#30] +Functions [3]: [avg(ws_quantity#4), avg(UnscaledValue(wr_refunded_cash#15)), avg(UnscaledValue(wr_fee#14))] +Aggregate Attributes [3]: [avg(ws_quantity#4)#43, avg(UnscaledValue(wr_refunded_cash#15))#44, avg(UnscaledValue(wr_fee#14))#45] +Results [4]: [substr(r_reason_desc#30, 1, 20) AS substr(r_reason_desc, 1, 20)#46, avg(ws_quantity#4)#43 AS avg(ws_quantity)#47, cast((avg(UnscaledValue(wr_refunded_cash#15))#44 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#48, cast((avg(UnscaledValue(wr_fee#14))#45 / 100.0) as decimal(11,6)) AS avg(wr_fee)#49] + +(45) TakeOrderedAndProject +Input [4]: [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49] +Arguments: 100, [substr(r_reason_desc, 1, 20)#46 ASC NULLS FIRST, avg(ws_quantity)#47 ASC NULLS FIRST, avg(wr_refunded_cash)#48 ASC NULLS FIRST, avg(wr_fee)#49 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q85.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q85.native_datafusion/simplified.txt new file mode 100644 index 0000000000..d970a3cc38 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q85.native_datafusion/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee)] + WholeStageCodegen (2) + HashAggregate [r_reason_desc,sum,count,sum,count,sum,count] [avg(ws_quantity),avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee)),substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee),sum,count,sum,count,sum,count] + InputAdapter + Exchange [r_reason_desc] #1 + WholeStageCodegen (1) + HashAggregate [r_reason_desc,ws_quantity,wr_refunded_cash,wr_fee] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] + CometBroadcastHashJoin [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash,r_reason_sk,r_reason_desc] + CometProject [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_quantity,ws_sold_date_sk,wr_reason_sk,wr_fee,wr_refunded_cash,d_date_sk] + CometProject [ws_quantity,ws_sold_date_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_reason_sk,wr_fee,wr_refunded_cash,ca_address_sk,ca_state] + CometProject [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_marital_status,cd_education_status,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_web_page_sk,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,wp_web_page_sk] + CometProject [ws_web_page_sk,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + CometBroadcastHashJoin [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] + CometBroadcastExchange [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] #2 + CometFilter [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] + CometProject [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] + CometFilter [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk] + CometBroadcastExchange [wp_web_page_sk] #3 + CometFilter [wp_web_page_sk] + CometNativeScan: `spark_catalog`.`default`.`web_page` [wp_web_page_sk] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #4 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #5 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [ca_address_sk,ca_state] #6 + CometProject [ca_address_sk,ca_state] + CometFilter [ca_address_sk,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #7 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [r_reason_sk,r_reason_desc] #8 + CometFilter [r_reason_sk,r_reason_desc] + CometNativeScan: `spark_catalog`.`default`.`reason` [r_reason_sk,r_reason_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q86.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q86.native_datafusion/explain.txt new file mode 100644 index 0000000000..6612b949e8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q86.native_datafusion/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (23) ++- * Project (22) + +- Window (21) + +- * Sort (20) + +- Exchange (19) + +- * HashAggregate (18) + +- Exchange (17) + +- * HashAggregate (16) + +- * ColumnarToRow (15) + +- CometExpand (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (8) + : +- CometBroadcastHashJoin (7) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : +- CometBroadcastExchange (6) + : +- CometProject (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + +- CometBroadcastExchange (11) + +- CometFilter (10) + +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4, d_month_seq#5] + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#4] +Arguments: [ws_item_sk#1, ws_net_paid#2], [ws_item_sk#1, ws_net_paid#2] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [i_item_sk#6, i_class#7, i_category#8] + +(10) CometFilter +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Condition : isnotnull(i_item_sk#6) + +(11) CometBroadcastExchange +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [i_item_sk#6, i_class#7, i_category#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#1, ws_net_paid#2] +Right output [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [ws_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] +Arguments: [ws_net_paid#2, i_category#8, i_class#7], [ws_net_paid#2, i_category#8, i_class#7] + +(14) CometExpand +Input [3]: [ws_net_paid#2, i_category#8, i_class#7] +Arguments: [[ws_net_paid#2, i_category#8, i_class#7, 0], [ws_net_paid#2, i_category#8, null, 1], [ws_net_paid#2, null, null, 3]], [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] + +(15) ColumnarToRow [codegen id : 1] +Input [4]: [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] + +(16) HashAggregate [codegen id : 1] +Input [4]: [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum#12] +Results [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] + +(17) Exchange +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] +Arguments: hashpartitioning(i_category#9, i_class#10, spark_grouping_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(18) HashAggregate [codegen id : 2] +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#14] +Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS total_sum#15, i_category#9, i_class#10, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS lochierarchy#16, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS _w0#17, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS _w1#18, CASE WHEN (cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint) = 0) THEN i_category#9 END AS _w2#19] + +(19) Exchange +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19] +Arguments: hashpartitioning(_w1#18, _w2#19, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(20) Sort [codegen id : 3] +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19] +Arguments: [_w1#18 ASC NULLS FIRST, _w2#19 ASC NULLS FIRST, _w0#17 DESC NULLS LAST], false, 0 + +(21) Window +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19] +Arguments: [rank(_w0#17) windowspecdefinition(_w1#18, _w2#19, _w0#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#20], [_w1#18, _w2#19], [_w0#17 DESC NULLS LAST] + +(22) Project [codegen id : 4] +Output [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] +Input [8]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19, rank_within_parent#20] + +(23) TakeOrderedAndProject +Input [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] +Arguments: 100, [lochierarchy#16 DESC NULLS LAST, CASE WHEN (lochierarchy#16 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#20 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q86.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q86.native_datafusion/simplified.txt new file mode 100644 index 0000000000..aa40577d65 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q86.native_datafusion/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] + WholeStageCodegen (4) + Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (3) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (2) + HashAggregate [i_category,i_class,spark_grouping_id,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,lochierarchy,_w0,_w1,_w2,sum] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen (1) + HashAggregate [i_category,i_class,spark_grouping_id,ws_net_paid] [sum,sum] + ColumnarToRow + InputAdapter + CometExpand [i_category,i_class] [ws_net_paid,i_category,i_class,spark_grouping_id] + CometProject [ws_net_paid,i_category,i_class] + CometBroadcastHashJoin [ws_item_sk,ws_net_paid,i_item_sk,i_class,i_category] + CometProject [ws_item_sk,ws_net_paid] + CometBroadcastHashJoin [ws_item_sk,ws_net_paid,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_net_paid,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_net_paid,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_class,i_category] #4 + CometFilter [i_item_sk,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_class,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q87.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q87.native_datafusion/explain.txt new file mode 100644 index 0000000000..3ff75a4a3b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q87.native_datafusion/explain.txt @@ -0,0 +1,154 @@ +== Physical Plan == +* HashAggregate (28) ++- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin LeftAnti BuildRight (24) + :- * BroadcastHashJoin LeftAnti BuildRight (22) + : :- * ColumnarToRow (17) + : : +- CometHashAggregate (16) + : : +- CometExchange (15) + : : +- CometHashAggregate (14) + : : +- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (9) + : +- BroadcastExchange (21) + : +- * ColumnarToRow (20) + : +- CometHashAggregate (19) + : +- ReusedExchange (18) + +- ReusedExchange (23) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Arguments: [ss_customer_sk#1, ss_sold_date_sk#2] + +(2) CometFilter +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Condition : isnotnull(ss_customer_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Arguments: [d_date_sk#3, d_date#4, d_month_seq#5] + +(4) CometFilter +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(5) CometProject +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Arguments: [d_date_sk#3, d_date#4], [d_date_sk#3, d_date#4] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: [d_date_sk#3, d_date#4] + +(7) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Right output [2]: [d_date_sk#3, d_date#4] +Arguments: [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#3, d_date#4] +Arguments: [ss_customer_sk#1, d_date#4], [ss_customer_sk#1, d_date#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(10) CometFilter +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) + +(11) CometBroadcastExchange +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#1, d_date#4] +Right output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [ss_customer_sk#1], [c_customer_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ss_customer_sk#1, d_date#4, c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_last_name#8, c_first_name#7, d_date#4], [c_last_name#8, c_first_name#7, d_date#4] + +(14) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(15) CometExchange +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(18) ReusedExchange [Reuses operator id: 15] +Output [3]: [c_last_name#9, c_first_name#10, d_date#11] + +(19) CometHashAggregate +Input [3]: [c_last_name#9, c_first_name#10, d_date#11] +Keys [3]: [c_last_name#9, c_first_name#10, d_date#11] +Functions: [] + +(20) ColumnarToRow [codegen id : 1] +Input [3]: [c_last_name#9, c_first_name#10, d_date#11] + +(21) BroadcastExchange +Input [3]: [c_last_name#9, c_first_name#10, d_date#11] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=2] + +(22) BroadcastHashJoin [codegen id : 3] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#10, ), isnull(c_first_name#10), coalesce(d_date#11, 1970-01-01), isnull(d_date#11)] +Join type: LeftAnti +Join condition: None + +(23) ReusedExchange [Reuses operator id: 21] +Output [3]: [c_last_name#12, c_first_name#13, d_date#14] + +(24) BroadcastHashJoin [codegen id : 3] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#12, ), isnull(c_last_name#12), coalesce(c_first_name#13, ), isnull(c_first_name#13), coalesce(d_date#14, 1970-01-01), isnull(d_date#14)] +Join type: LeftAnti +Join condition: None + +(25) Project [codegen id : 3] +Output: [] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(26) HashAggregate [codegen id : 3] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [1]: [count#16] + +(27) Exchange +Input [1]: [count#16] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] + +(28) HashAggregate [codegen id : 4] +Input [1]: [count#16] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [1]: [count(1)#17 AS count(1)#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q87.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q87.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8612445652 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q87.native_datafusion/simplified.txt @@ -0,0 +1,36 @@ +WholeStageCodegen (4) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (3) + HashAggregate [count,count] + Project + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + ColumnarToRow + InputAdapter + CometHashAggregate [c_last_name,c_first_name,d_date] + CometExchange [c_last_name,c_first_name,d_date] #2 + CometHashAggregate [c_last_name,c_first_name,d_date] + CometProject [c_last_name,c_first_name,d_date] + CometBroadcastHashJoin [ss_customer_sk,d_date,c_customer_sk,c_first_name,c_last_name] + CometProject [ss_customer_sk,d_date] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_customer_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #3 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_month_seq] + CometBroadcastExchange [c_customer_sk,c_first_name,c_last_name] #4 + CometFilter [c_customer_sk,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [c_last_name,c_first_name,d_date] + ReusedExchange [c_last_name,c_first_name,d_date] #2 + InputAdapter + ReusedExchange [c_last_name,c_first_name,d_date] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q88.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q88.native_datafusion/explain.txt new file mode 100644 index 0000000000..1917778c34 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q88.native_datafusion/explain.txt @@ -0,0 +1,873 @@ +== Physical Plan == +* BroadcastNestedLoopJoin Inner BuildRight (172) +:- * BroadcastNestedLoopJoin Inner BuildRight (151) +: :- * BroadcastNestedLoopJoin Inner BuildRight (130) +: : :- * BroadcastNestedLoopJoin Inner BuildRight (109) +: : : :- * BroadcastNestedLoopJoin Inner BuildRight (88) +: : : : :- * BroadcastNestedLoopJoin Inner BuildRight (67) +: : : : : :- * BroadcastNestedLoopJoin Inner BuildRight (46) +: : : : : : :- * ColumnarToRow (25) +: : : : : : : +- CometHashAggregate (24) +: : : : : : : +- CometExchange (23) +: : : : : : : +- CometHashAggregate (22) +: : : : : : : +- CometProject (21) +: : : : : : : +- CometBroadcastHashJoin (20) +: : : : : : : :- CometProject (15) +: : : : : : : : +- CometBroadcastHashJoin (14) +: : : : : : : : :- CometProject (9) +: : : : : : : : : +- CometBroadcastHashJoin (8) +: : : : : : : : : :- CometProject (3) +: : : : : : : : : : +- CometFilter (2) +: : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) +: : : : : : : : : +- CometBroadcastExchange (7) +: : : : : : : : : +- CometProject (6) +: : : : : : : : : +- CometFilter (5) +: : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (4) +: : : : : : : : +- CometBroadcastExchange (13) +: : : : : : : : +- CometProject (12) +: : : : : : : : +- CometFilter (11) +: : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (10) +: : : : : : : +- CometBroadcastExchange (19) +: : : : : : : +- CometProject (18) +: : : : : : : +- CometFilter (17) +: : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (16) +: : : : : : +- BroadcastExchange (45) +: : : : : : +- * ColumnarToRow (44) +: : : : : : +- CometHashAggregate (43) +: : : : : : +- CometExchange (42) +: : : : : : +- CometHashAggregate (41) +: : : : : : +- CometProject (40) +: : : : : : +- CometBroadcastHashJoin (39) +: : : : : : :- CometProject (37) +: : : : : : : +- CometBroadcastHashJoin (36) +: : : : : : : :- CometProject (31) +: : : : : : : : +- CometBroadcastHashJoin (30) +: : : : : : : : :- CometProject (28) +: : : : : : : : : +- CometFilter (27) +: : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (26) +: : : : : : : : +- ReusedExchange (29) +: : : : : : : +- CometBroadcastExchange (35) +: : : : : : : +- CometProject (34) +: : : : : : : +- CometFilter (33) +: : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (32) +: : : : : : +- ReusedExchange (38) +: : : : : +- BroadcastExchange (66) +: : : : : +- * ColumnarToRow (65) +: : : : : +- CometHashAggregate (64) +: : : : : +- CometExchange (63) +: : : : : +- CometHashAggregate (62) +: : : : : +- CometProject (61) +: : : : : +- CometBroadcastHashJoin (60) +: : : : : :- CometProject (58) +: : : : : : +- CometBroadcastHashJoin (57) +: : : : : : :- CometProject (52) +: : : : : : : +- CometBroadcastHashJoin (51) +: : : : : : : :- CometProject (49) +: : : : : : : : +- CometFilter (48) +: : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (47) +: : : : : : : +- ReusedExchange (50) +: : : : : : +- CometBroadcastExchange (56) +: : : : : : +- CometProject (55) +: : : : : : +- CometFilter (54) +: : : : : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (53) +: : : : : +- ReusedExchange (59) +: : : : +- BroadcastExchange (87) +: : : : +- * ColumnarToRow (86) +: : : : +- CometHashAggregate (85) +: : : : +- CometExchange (84) +: : : : +- CometHashAggregate (83) +: : : : +- CometProject (82) +: : : : +- CometBroadcastHashJoin (81) +: : : : :- CometProject (79) +: : : : : +- CometBroadcastHashJoin (78) +: : : : : :- CometProject (73) +: : : : : : +- CometBroadcastHashJoin (72) +: : : : : : :- CometProject (70) +: : : : : : : +- CometFilter (69) +: : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (68) +: : : : : : +- ReusedExchange (71) +: : : : : +- CometBroadcastExchange (77) +: : : : : +- CometProject (76) +: : : : : +- CometFilter (75) +: : : : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (74) +: : : : +- ReusedExchange (80) +: : : +- BroadcastExchange (108) +: : : +- * ColumnarToRow (107) +: : : +- CometHashAggregate (106) +: : : +- CometExchange (105) +: : : +- CometHashAggregate (104) +: : : +- CometProject (103) +: : : +- CometBroadcastHashJoin (102) +: : : :- CometProject (100) +: : : : +- CometBroadcastHashJoin (99) +: : : : :- CometProject (94) +: : : : : +- CometBroadcastHashJoin (93) +: : : : : :- CometProject (91) +: : : : : : +- CometFilter (90) +: : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (89) +: : : : : +- ReusedExchange (92) +: : : : +- CometBroadcastExchange (98) +: : : : +- CometProject (97) +: : : : +- CometFilter (96) +: : : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (95) +: : : +- ReusedExchange (101) +: : +- BroadcastExchange (129) +: : +- * ColumnarToRow (128) +: : +- CometHashAggregate (127) +: : +- CometExchange (126) +: : +- CometHashAggregate (125) +: : +- CometProject (124) +: : +- CometBroadcastHashJoin (123) +: : :- CometProject (121) +: : : +- CometBroadcastHashJoin (120) +: : : :- CometProject (115) +: : : : +- CometBroadcastHashJoin (114) +: : : : :- CometProject (112) +: : : : : +- CometFilter (111) +: : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (110) +: : : : +- ReusedExchange (113) +: : : +- CometBroadcastExchange (119) +: : : +- CometProject (118) +: : : +- CometFilter (117) +: : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (116) +: : +- ReusedExchange (122) +: +- BroadcastExchange (150) +: +- * ColumnarToRow (149) +: +- CometHashAggregate (148) +: +- CometExchange (147) +: +- CometHashAggregate (146) +: +- CometProject (145) +: +- CometBroadcastHashJoin (144) +: :- CometProject (142) +: : +- CometBroadcastHashJoin (141) +: : :- CometProject (136) +: : : +- CometBroadcastHashJoin (135) +: : : :- CometProject (133) +: : : : +- CometFilter (132) +: : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (131) +: : : +- ReusedExchange (134) +: : +- CometBroadcastExchange (140) +: : +- CometProject (139) +: : +- CometFilter (138) +: : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (137) +: +- ReusedExchange (143) ++- BroadcastExchange (171) + +- * ColumnarToRow (170) + +- CometHashAggregate (169) + +- CometExchange (168) + +- CometHashAggregate (167) + +- CometProject (166) + +- CometBroadcastHashJoin (165) + :- CometProject (163) + : +- CometBroadcastHashJoin (162) + : :- CometProject (157) + : : +- CometBroadcastHashJoin (156) + : : :- CometProject (154) + : : : +- CometFilter (153) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (152) + : : +- ReusedExchange (155) + : +- CometBroadcastExchange (161) + : +- CometProject (160) + : +- CometFilter (159) + : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (158) + +- ReusedExchange (164) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Arguments: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(3) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Arguments: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3], [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(4) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Arguments: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] + +(5) CometFilter +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Condition : (((((hd_dep_count#6 = 4) AND (hd_vehicle_count#7 <= 6)) OR ((hd_dep_count#6 = 2) AND (hd_vehicle_count#7 <= 4))) OR ((hd_dep_count#6 = 0) AND (hd_vehicle_count#7 <= 2))) AND isnotnull(hd_demo_sk#5)) + +(6) CometProject +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Arguments: [hd_demo_sk#5], [hd_demo_sk#5] + +(7) CometBroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: [hd_demo_sk#5] + +(8) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Right output [1]: [hd_demo_sk#5] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#5], Inner, BuildRight + +(9) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] +Arguments: [ss_sold_time_sk#1, ss_store_sk#3], [ss_sold_time_sk#1, ss_store_sk#3] + +(10) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Arguments: [t_time_sk#8, t_hour#9, t_minute#10] + +(11) CometFilter +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 8)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(12) CometProject +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Arguments: [t_time_sk#8], [t_time_sk#8] + +(13) CometBroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: [t_time_sk#8] + +(14) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Right output [1]: [t_time_sk#8] +Arguments: [ss_sold_time_sk#1], [t_time_sk#8], Inner, BuildRight + +(15) CometProject +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] +Arguments: [ss_store_sk#3], [ss_store_sk#3] + +(16) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#11, s_store_name#12] +Arguments: [s_store_sk#11, s_store_name#12] + +(17) CometFilter +Input [2]: [s_store_sk#11, s_store_name#12] +Condition : ((isnotnull(s_store_name#12) AND (s_store_name#12 = ese)) AND isnotnull(s_store_sk#11)) + +(18) CometProject +Input [2]: [s_store_sk#11, s_store_name#12] +Arguments: [s_store_sk#11], [s_store_sk#11] + +(19) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(20) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#3] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#3], [s_store_sk#11], Inner, BuildRight + +(21) CometProject +Input [2]: [ss_store_sk#3, s_store_sk#11] + +(22) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(23) CometExchange +Input [1]: [count#13] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [1]: [count#13] +Keys: [] +Functions [1]: [count(1)] + +(25) ColumnarToRow [codegen id : 8] +Input [1]: [h8_30_to_9#14] + +(26) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] +Arguments: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] + +(27) CometFilter +Input [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] +Condition : ((isnotnull(ss_hdemo_sk#16) AND isnotnull(ss_sold_time_sk#15)) AND isnotnull(ss_store_sk#17)) + +(28) CometProject +Input [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, ss_sold_date_sk#18] +Arguments: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17], [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17] + +(29) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#19] + +(30) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17] +Right output [1]: [hd_demo_sk#19] +Arguments: [ss_hdemo_sk#16], [hd_demo_sk#19], Inner, BuildRight + +(31) CometProject +Input [4]: [ss_sold_time_sk#15, ss_hdemo_sk#16, ss_store_sk#17, hd_demo_sk#19] +Arguments: [ss_sold_time_sk#15, ss_store_sk#17], [ss_sold_time_sk#15, ss_store_sk#17] + +(32) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#20, t_hour#21, t_minute#22] +Arguments: [t_time_sk#20, t_hour#21, t_minute#22] + +(33) CometFilter +Input [3]: [t_time_sk#20, t_hour#21, t_minute#22] +Condition : ((((isnotnull(t_hour#21) AND isnotnull(t_minute#22)) AND (t_hour#21 = 9)) AND (t_minute#22 < 30)) AND isnotnull(t_time_sk#20)) + +(34) CometProject +Input [3]: [t_time_sk#20, t_hour#21, t_minute#22] +Arguments: [t_time_sk#20], [t_time_sk#20] + +(35) CometBroadcastExchange +Input [1]: [t_time_sk#20] +Arguments: [t_time_sk#20] + +(36) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#15, ss_store_sk#17] +Right output [1]: [t_time_sk#20] +Arguments: [ss_sold_time_sk#15], [t_time_sk#20], Inner, BuildRight + +(37) CometProject +Input [3]: [ss_sold_time_sk#15, ss_store_sk#17, t_time_sk#20] +Arguments: [ss_store_sk#17], [ss_store_sk#17] + +(38) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#23] + +(39) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#17] +Right output [1]: [s_store_sk#23] +Arguments: [ss_store_sk#17], [s_store_sk#23], Inner, BuildRight + +(40) CometProject +Input [2]: [ss_store_sk#17, s_store_sk#23] + +(41) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(42) CometExchange +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(43) CometHashAggregate +Input [1]: [count#24] +Keys: [] +Functions [1]: [count(1)] + +(44) ColumnarToRow [codegen id : 1] +Input [1]: [h9_to_9_30#25] + +(45) BroadcastExchange +Input [1]: [h9_to_9_30#25] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(46) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(47) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] +Arguments: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] + +(48) CometFilter +Input [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] +Condition : ((isnotnull(ss_hdemo_sk#27) AND isnotnull(ss_sold_time_sk#26)) AND isnotnull(ss_store_sk#28)) + +(49) CometProject +Input [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, ss_sold_date_sk#29] +Arguments: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28], [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28] + +(50) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#30] + +(51) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28] +Right output [1]: [hd_demo_sk#30] +Arguments: [ss_hdemo_sk#27], [hd_demo_sk#30], Inner, BuildRight + +(52) CometProject +Input [4]: [ss_sold_time_sk#26, ss_hdemo_sk#27, ss_store_sk#28, hd_demo_sk#30] +Arguments: [ss_sold_time_sk#26, ss_store_sk#28], [ss_sold_time_sk#26, ss_store_sk#28] + +(53) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [t_time_sk#31, t_hour#32, t_minute#33] + +(54) CometFilter +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Condition : ((((isnotnull(t_hour#32) AND isnotnull(t_minute#33)) AND (t_hour#32 = 9)) AND (t_minute#33 >= 30)) AND isnotnull(t_time_sk#31)) + +(55) CometProject +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: [t_time_sk#31], [t_time_sk#31] + +(56) CometBroadcastExchange +Input [1]: [t_time_sk#31] +Arguments: [t_time_sk#31] + +(57) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#26, ss_store_sk#28] +Right output [1]: [t_time_sk#31] +Arguments: [ss_sold_time_sk#26], [t_time_sk#31], Inner, BuildRight + +(58) CometProject +Input [3]: [ss_sold_time_sk#26, ss_store_sk#28, t_time_sk#31] +Arguments: [ss_store_sk#28], [ss_store_sk#28] + +(59) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#34] + +(60) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#28] +Right output [1]: [s_store_sk#34] +Arguments: [ss_store_sk#28], [s_store_sk#34], Inner, BuildRight + +(61) CometProject +Input [2]: [ss_store_sk#28, s_store_sk#34] + +(62) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(63) CometExchange +Input [1]: [count#35] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(64) CometHashAggregate +Input [1]: [count#35] +Keys: [] +Functions [1]: [count(1)] + +(65) ColumnarToRow [codegen id : 2] +Input [1]: [h9_30_to_10#36] + +(66) BroadcastExchange +Input [1]: [h9_30_to_10#36] +Arguments: IdentityBroadcastMode, [plan_id=5] + +(67) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(68) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Arguments: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] + +(69) CometFilter +Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Condition : ((isnotnull(ss_hdemo_sk#38) AND isnotnull(ss_sold_time_sk#37)) AND isnotnull(ss_store_sk#39)) + +(70) CometProject +Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Arguments: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39], [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39] + +(71) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#41] + +(72) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39] +Right output [1]: [hd_demo_sk#41] +Arguments: [ss_hdemo_sk#38], [hd_demo_sk#41], Inner, BuildRight + +(73) CometProject +Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, hd_demo_sk#41] +Arguments: [ss_sold_time_sk#37, ss_store_sk#39], [ss_sold_time_sk#37, ss_store_sk#39] + +(74) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Arguments: [t_time_sk#42, t_hour#43, t_minute#44] + +(75) CometFilter +Input [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Condition : ((((isnotnull(t_hour#43) AND isnotnull(t_minute#44)) AND (t_hour#43 = 10)) AND (t_minute#44 < 30)) AND isnotnull(t_time_sk#42)) + +(76) CometProject +Input [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Arguments: [t_time_sk#42], [t_time_sk#42] + +(77) CometBroadcastExchange +Input [1]: [t_time_sk#42] +Arguments: [t_time_sk#42] + +(78) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#37, ss_store_sk#39] +Right output [1]: [t_time_sk#42] +Arguments: [ss_sold_time_sk#37], [t_time_sk#42], Inner, BuildRight + +(79) CometProject +Input [3]: [ss_sold_time_sk#37, ss_store_sk#39, t_time_sk#42] +Arguments: [ss_store_sk#39], [ss_store_sk#39] + +(80) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#45] + +(81) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#39] +Right output [1]: [s_store_sk#45] +Arguments: [ss_store_sk#39], [s_store_sk#45], Inner, BuildRight + +(82) CometProject +Input [2]: [ss_store_sk#39, s_store_sk#45] + +(83) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(84) CometExchange +Input [1]: [count#46] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(85) CometHashAggregate +Input [1]: [count#46] +Keys: [] +Functions [1]: [count(1)] + +(86) ColumnarToRow [codegen id : 3] +Input [1]: [h10_to_10_30#47] + +(87) BroadcastExchange +Input [1]: [h10_to_10_30#47] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(88) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(89) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] +Arguments: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] + +(90) CometFilter +Input [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] +Condition : ((isnotnull(ss_hdemo_sk#49) AND isnotnull(ss_sold_time_sk#48)) AND isnotnull(ss_store_sk#50)) + +(91) CometProject +Input [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, ss_sold_date_sk#51] +Arguments: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50], [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50] + +(92) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#52] + +(93) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50] +Right output [1]: [hd_demo_sk#52] +Arguments: [ss_hdemo_sk#49], [hd_demo_sk#52], Inner, BuildRight + +(94) CometProject +Input [4]: [ss_sold_time_sk#48, ss_hdemo_sk#49, ss_store_sk#50, hd_demo_sk#52] +Arguments: [ss_sold_time_sk#48, ss_store_sk#50], [ss_sold_time_sk#48, ss_store_sk#50] + +(95) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#53, t_hour#54, t_minute#55] +Arguments: [t_time_sk#53, t_hour#54, t_minute#55] + +(96) CometFilter +Input [3]: [t_time_sk#53, t_hour#54, t_minute#55] +Condition : ((((isnotnull(t_hour#54) AND isnotnull(t_minute#55)) AND (t_hour#54 = 10)) AND (t_minute#55 >= 30)) AND isnotnull(t_time_sk#53)) + +(97) CometProject +Input [3]: [t_time_sk#53, t_hour#54, t_minute#55] +Arguments: [t_time_sk#53], [t_time_sk#53] + +(98) CometBroadcastExchange +Input [1]: [t_time_sk#53] +Arguments: [t_time_sk#53] + +(99) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#48, ss_store_sk#50] +Right output [1]: [t_time_sk#53] +Arguments: [ss_sold_time_sk#48], [t_time_sk#53], Inner, BuildRight + +(100) CometProject +Input [3]: [ss_sold_time_sk#48, ss_store_sk#50, t_time_sk#53] +Arguments: [ss_store_sk#50], [ss_store_sk#50] + +(101) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#56] + +(102) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#50] +Right output [1]: [s_store_sk#56] +Arguments: [ss_store_sk#50], [s_store_sk#56], Inner, BuildRight + +(103) CometProject +Input [2]: [ss_store_sk#50, s_store_sk#56] + +(104) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(105) CometExchange +Input [1]: [count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(106) CometHashAggregate +Input [1]: [count#57] +Keys: [] +Functions [1]: [count(1)] + +(107) ColumnarToRow [codegen id : 4] +Input [1]: [h10_30_to_11#58] + +(108) BroadcastExchange +Input [1]: [h10_30_to_11#58] +Arguments: IdentityBroadcastMode, [plan_id=9] + +(109) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(110) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] +Arguments: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] + +(111) CometFilter +Input [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] +Condition : ((isnotnull(ss_hdemo_sk#60) AND isnotnull(ss_sold_time_sk#59)) AND isnotnull(ss_store_sk#61)) + +(112) CometProject +Input [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, ss_sold_date_sk#62] +Arguments: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61], [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61] + +(113) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#63] + +(114) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61] +Right output [1]: [hd_demo_sk#63] +Arguments: [ss_hdemo_sk#60], [hd_demo_sk#63], Inner, BuildRight + +(115) CometProject +Input [4]: [ss_sold_time_sk#59, ss_hdemo_sk#60, ss_store_sk#61, hd_demo_sk#63] +Arguments: [ss_sold_time_sk#59, ss_store_sk#61], [ss_sold_time_sk#59, ss_store_sk#61] + +(116) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#64, t_hour#65, t_minute#66] +Arguments: [t_time_sk#64, t_hour#65, t_minute#66] + +(117) CometFilter +Input [3]: [t_time_sk#64, t_hour#65, t_minute#66] +Condition : ((((isnotnull(t_hour#65) AND isnotnull(t_minute#66)) AND (t_hour#65 = 11)) AND (t_minute#66 < 30)) AND isnotnull(t_time_sk#64)) + +(118) CometProject +Input [3]: [t_time_sk#64, t_hour#65, t_minute#66] +Arguments: [t_time_sk#64], [t_time_sk#64] + +(119) CometBroadcastExchange +Input [1]: [t_time_sk#64] +Arguments: [t_time_sk#64] + +(120) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#59, ss_store_sk#61] +Right output [1]: [t_time_sk#64] +Arguments: [ss_sold_time_sk#59], [t_time_sk#64], Inner, BuildRight + +(121) CometProject +Input [3]: [ss_sold_time_sk#59, ss_store_sk#61, t_time_sk#64] +Arguments: [ss_store_sk#61], [ss_store_sk#61] + +(122) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#67] + +(123) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#61] +Right output [1]: [s_store_sk#67] +Arguments: [ss_store_sk#61], [s_store_sk#67], Inner, BuildRight + +(124) CometProject +Input [2]: [ss_store_sk#61, s_store_sk#67] + +(125) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(126) CometExchange +Input [1]: [count#68] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=10] + +(127) CometHashAggregate +Input [1]: [count#68] +Keys: [] +Functions [1]: [count(1)] + +(128) ColumnarToRow [codegen id : 5] +Input [1]: [h11_to_11_30#69] + +(129) BroadcastExchange +Input [1]: [h11_to_11_30#69] +Arguments: IdentityBroadcastMode, [plan_id=11] + +(130) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(131) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] +Arguments: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] + +(132) CometFilter +Input [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] +Condition : ((isnotnull(ss_hdemo_sk#71) AND isnotnull(ss_sold_time_sk#70)) AND isnotnull(ss_store_sk#72)) + +(133) CometProject +Input [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, ss_sold_date_sk#73] +Arguments: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72], [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72] + +(134) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#74] + +(135) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72] +Right output [1]: [hd_demo_sk#74] +Arguments: [ss_hdemo_sk#71], [hd_demo_sk#74], Inner, BuildRight + +(136) CometProject +Input [4]: [ss_sold_time_sk#70, ss_hdemo_sk#71, ss_store_sk#72, hd_demo_sk#74] +Arguments: [ss_sold_time_sk#70, ss_store_sk#72], [ss_sold_time_sk#70, ss_store_sk#72] + +(137) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#75, t_hour#76, t_minute#77] +Arguments: [t_time_sk#75, t_hour#76, t_minute#77] + +(138) CometFilter +Input [3]: [t_time_sk#75, t_hour#76, t_minute#77] +Condition : ((((isnotnull(t_hour#76) AND isnotnull(t_minute#77)) AND (t_hour#76 = 11)) AND (t_minute#77 >= 30)) AND isnotnull(t_time_sk#75)) + +(139) CometProject +Input [3]: [t_time_sk#75, t_hour#76, t_minute#77] +Arguments: [t_time_sk#75], [t_time_sk#75] + +(140) CometBroadcastExchange +Input [1]: [t_time_sk#75] +Arguments: [t_time_sk#75] + +(141) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#70, ss_store_sk#72] +Right output [1]: [t_time_sk#75] +Arguments: [ss_sold_time_sk#70], [t_time_sk#75], Inner, BuildRight + +(142) CometProject +Input [3]: [ss_sold_time_sk#70, ss_store_sk#72, t_time_sk#75] +Arguments: [ss_store_sk#72], [ss_store_sk#72] + +(143) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#78] + +(144) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#72] +Right output [1]: [s_store_sk#78] +Arguments: [ss_store_sk#72], [s_store_sk#78], Inner, BuildRight + +(145) CometProject +Input [2]: [ss_store_sk#72, s_store_sk#78] + +(146) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(147) CometExchange +Input [1]: [count#79] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=12] + +(148) CometHashAggregate +Input [1]: [count#79] +Keys: [] +Functions [1]: [count(1)] + +(149) ColumnarToRow [codegen id : 6] +Input [1]: [h11_30_to_12#80] + +(150) BroadcastExchange +Input [1]: [h11_30_to_12#80] +Arguments: IdentityBroadcastMode, [plan_id=13] + +(151) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + +(152) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] +Arguments: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] + +(153) CometFilter +Input [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] +Condition : ((isnotnull(ss_hdemo_sk#82) AND isnotnull(ss_sold_time_sk#81)) AND isnotnull(ss_store_sk#83)) + +(154) CometProject +Input [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, ss_sold_date_sk#84] +Arguments: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83], [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83] + +(155) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#85] + +(156) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83] +Right output [1]: [hd_demo_sk#85] +Arguments: [ss_hdemo_sk#82], [hd_demo_sk#85], Inner, BuildRight + +(157) CometProject +Input [4]: [ss_sold_time_sk#81, ss_hdemo_sk#82, ss_store_sk#83, hd_demo_sk#85] +Arguments: [ss_sold_time_sk#81, ss_store_sk#83], [ss_sold_time_sk#81, ss_store_sk#83] + +(158) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#86, t_hour#87, t_minute#88] +Arguments: [t_time_sk#86, t_hour#87, t_minute#88] + +(159) CometFilter +Input [3]: [t_time_sk#86, t_hour#87, t_minute#88] +Condition : ((((isnotnull(t_hour#87) AND isnotnull(t_minute#88)) AND (t_hour#87 = 12)) AND (t_minute#88 < 30)) AND isnotnull(t_time_sk#86)) + +(160) CometProject +Input [3]: [t_time_sk#86, t_hour#87, t_minute#88] +Arguments: [t_time_sk#86], [t_time_sk#86] + +(161) CometBroadcastExchange +Input [1]: [t_time_sk#86] +Arguments: [t_time_sk#86] + +(162) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#81, ss_store_sk#83] +Right output [1]: [t_time_sk#86] +Arguments: [ss_sold_time_sk#81], [t_time_sk#86], Inner, BuildRight + +(163) CometProject +Input [3]: [ss_sold_time_sk#81, ss_store_sk#83, t_time_sk#86] +Arguments: [ss_store_sk#83], [ss_store_sk#83] + +(164) ReusedExchange [Reuses operator id: 19] +Output [1]: [s_store_sk#89] + +(165) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#83] +Right output [1]: [s_store_sk#89] +Arguments: [ss_store_sk#83], [s_store_sk#89], Inner, BuildRight + +(166) CometProject +Input [2]: [ss_store_sk#83, s_store_sk#89] + +(167) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(168) CometExchange +Input [1]: [count#90] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=14] + +(169) CometHashAggregate +Input [1]: [count#90] +Keys: [] +Functions [1]: [count(1)] + +(170) ColumnarToRow [codegen id : 7] +Input [1]: [h12_to_12_30#91] + +(171) BroadcastExchange +Input [1]: [h12_to_12_30#91] +Arguments: IdentityBroadcastMode, [plan_id=15] + +(172) BroadcastNestedLoopJoin [codegen id : 8] +Join type: Inner +Join condition: None + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q88.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q88.native_datafusion/simplified.txt new file mode 100644 index 0000000000..5acfeba131 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q88.native_datafusion/simplified.txt @@ -0,0 +1,195 @@ +WholeStageCodegen (8) + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + ColumnarToRow + InputAdapter + CometHashAggregate [h8_30_to_9,count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometBroadcastExchange [hd_demo_sk] #2 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [t_time_sk] #3 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [h9_to_9_30,count,count(1)] + CometExchange #6 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #7 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometHashAggregate [h9_30_to_10,count,count(1)] + CometExchange #9 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #10 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometHashAggregate [h10_to_10_30,count,count(1)] + CometExchange #12 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #13 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometHashAggregate [h10_30_to_11,count,count(1)] + CometExchange #15 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #16 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometHashAggregate [h11_to_11_30,count,count(1)] + CometExchange #18 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #19 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #20 + WholeStageCodegen (6) + ColumnarToRow + InputAdapter + CometHashAggregate [h11_30_to_12,count,count(1)] + CometExchange #21 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #22 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #23 + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometHashAggregate [h12_to_12_30,count,count(1)] + CometExchange #24 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #25 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + ReusedExchange [s_store_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q89.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q89.native_datafusion/explain.txt new file mode 100644 index 0000000000..75ee453e4b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q89.native_datafusion/explain.txt @@ -0,0 +1,151 @@ +== Physical Plan == +TakeOrderedAndProject (28) ++- * Project (27) + +- * Filter (26) + +- Window (25) + +- * Sort (24) + +- Exchange (23) + +- * HashAggregate (22) + +- Exchange (21) + +- * HashAggregate (20) + +- * ColumnarToRow (19) + +- CometProject (18) + +- CometBroadcastHashJoin (17) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (7) + : : +- CometBroadcastHashJoin (6) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : +- CometBroadcastExchange (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : +- CometBroadcastExchange (11) + : +- CometProject (10) + : +- CometFilter (9) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + +- CometBroadcastExchange (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`store` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Arguments: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] + +(2) CometFilter +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Condition : (((i_category#4 IN (Books ,Electronics ,Sports ) AND i_class#3 IN (computers ,stereo ,football )) OR (i_category#4 IN (Men ,Jewelry ,Women ) AND i_class#3 IN (shirts ,birdal ,dresses ))) AND isnotnull(i_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(4) CometFilter +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Condition : (isnotnull(ss_item_sk#5) AND isnotnull(ss_store_sk#6)) + +(5) CometBroadcastExchange +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(6) CometBroadcastHashJoin +Left output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Right output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [i_item_sk#1], [ss_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8], [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9, d_year#10, d_moy#11] + +(9) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((isnotnull(d_year#10) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(10) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9, d_moy#11], [d_date_sk#9, d_moy#11] + +(11) CometBroadcastExchange +Input [2]: [d_date_sk#9, d_moy#11] +Arguments: [d_date_sk#9, d_moy#11] + +(12) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Right output [2]: [d_date_sk#9, d_moy#11] +Arguments: [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(13) CometProject +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#9, d_moy#11] +Arguments: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11], [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] + +(14) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [s_store_sk#12, s_store_name#13, s_company_name#14] + +(15) CometFilter +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Condition : isnotnull(s_store_sk#12) + +(16) CometBroadcastExchange +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [s_store_sk#12, s_store_name#13, s_company_name#14] + +(17) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] +Right output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [ss_store_sk#6], [s_store_sk#12], Inner, BuildRight + +(18) CometProject +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11, s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14], [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] + +(20) HashAggregate [codegen id : 1] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] + +(21) Exchange +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(22) HashAggregate [codegen id : 2] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#17] +Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS _w0#19] + +(23) Exchange +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(24) Sort [codegen id : 3] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, s_company_name#14 ASC NULLS FIRST], false, 0 + +(25) Window +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#4, i_brand#2, s_store_name#13, s_company_name#14] + +(26) Filter [codegen id : 4] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] +Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END + +(27) Project [codegen id : 4] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] + +(28) TakeOrderedAndProject +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Arguments: 100, [(sum_sales#18 - avg_monthly_sales#20) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q89.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q89.native_datafusion/simplified.txt new file mode 100644 index 0000000000..cf2bafd365 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q89.native_datafusion/simplified.txt @@ -0,0 +1,36 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_class,i_brand,s_company_name,d_moy] + WholeStageCodegen (4) + Project [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (3) + Sort [i_category,i_brand,s_store_name,s_company_name] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen (2) + HashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy] #2 + WholeStageCodegen (1) + HashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,ss_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [i_brand,i_class,i_category,ss_sales_price,d_moy,s_store_name,s_company_name] + CometBroadcastHashJoin [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,d_moy,s_store_sk,s_store_name,s_company_name] + CometProject [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,d_moy] + CometBroadcastHashJoin [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_moy] + CometProject [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_class,i_category,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_moy] #4 + CometProject [d_date_sk,d_moy] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_name] #5 + CometFilter [s_store_sk,s_store_name,s_company_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9.native_datafusion/explain.txt new file mode 100644 index 0000000000..e17d267111 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9.native_datafusion/explain.txt @@ -0,0 +1,285 @@ +== Physical Plan == +* ColumnarToRow (4) ++- CometProject (3) + +- CometFilter (2) + +- CometNativeScan: `spark_catalog`.`default`.`reason` (1) + + +(1) CometNativeScan: `spark_catalog`.`default`.`reason` +Output [1]: [r_reason_sk#1] +Arguments: [r_reason_sk#1] + +(2) CometFilter +Input [1]: [r_reason_sk#1] +Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) + +(3) CometProject +Input [1]: [r_reason_sk#1] +Arguments: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6], [CASE WHEN (Subquery scalar-subquery#7, [id=#8].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_net_paid) END AS bucket1#2, CASE WHEN (Subquery scalar-subquery#9, [id=#10].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#9, [id=#10].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#9, [id=#10].avg(ss_net_paid) END AS bucket2#3, CASE WHEN (Subquery scalar-subquery#11, [id=#12].count(1) > 365541424) THEN ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_net_paid) END AS bucket3#4, CASE WHEN (Subquery scalar-subquery#13, [id=#14].count(1) > 216357808) THEN ReusedSubquery Subquery scalar-subquery#13, [id=#14].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#13, [id=#14].avg(ss_net_paid) END AS bucket4#5, CASE WHEN (Subquery scalar-subquery#15, [id=#16].count(1) > 184483884) THEN ReusedSubquery Subquery scalar-subquery#15, [id=#16].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#15, [id=#16].avg(ss_net_paid) END AS bucket5#6] + +(4) ColumnarToRow [codegen id : 1] +Input [5]: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#7, [id=#8] +* Project (12) ++- * HashAggregate (11) + +- Exchange (10) + +- * HashAggregate (9) + +- * ColumnarToRow (8) + +- CometProject (7) + +- CometFilter (6) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (5) + + +(5) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Arguments: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] + +(6) CometFilter +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_quantity#17) AND (ss_quantity#17 >= 1)) AND (ss_quantity#17 <= 20)) + +(7) CometProject +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Arguments: [ss_ext_discount_amt#18, ss_net_paid#19], [ss_ext_discount_amt#18, ss_net_paid#19] + +(8) ColumnarToRow [codegen id : 1] +Input [2]: [ss_ext_discount_amt#18, ss_net_paid#19] + +(9) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#18, ss_net_paid#19] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#18)), partial_avg(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [5]: [count#21, sum#22, count#23, sum#24, count#25] +Results [5]: [count#26, sum#27, count#28, sum#29, count#30] + +(10) Exchange +Input [5]: [count#26, sum#27, count#28, sum#29, count#30] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=1] + +(11) HashAggregate [codegen id : 2] +Input [5]: [count#26, sum#27, count#28, sum#29, count#30] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#18)), avg(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [3]: [count(1)#31, avg(UnscaledValue(ss_ext_discount_amt#18))#32, avg(UnscaledValue(ss_net_paid#19))#33] +Results [3]: [count(1)#31 AS count(1)#34, cast((avg(UnscaledValue(ss_ext_discount_amt#18))#32 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#35, cast((avg(UnscaledValue(ss_net_paid#19))#33 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#36] + +(12) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#34, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#35, avg(ss_net_paid), avg(ss_net_paid)#36) AS mergedValue#37] +Input [3]: [count(1)#34, avg(ss_ext_discount_amt)#35, avg(ss_net_paid)#36] + +Subquery:2 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] + +Subquery:3 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] + +Subquery:4 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +* Project (20) ++- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * ColumnarToRow (16) + +- CometProject (15) + +- CometFilter (14) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (13) + + +(13) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] +Arguments: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] + +(14) CometFilter +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] +Condition : ((isnotnull(ss_quantity#38) AND (ss_quantity#38 >= 21)) AND (ss_quantity#38 <= 40)) + +(15) CometProject +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] +Arguments: [ss_ext_discount_amt#39, ss_net_paid#40], [ss_ext_discount_amt#39, ss_net_paid#40] + +(16) ColumnarToRow [codegen id : 1] +Input [2]: [ss_ext_discount_amt#39, ss_net_paid#40] + +(17) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#39, ss_net_paid#40] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#39)), partial_avg(UnscaledValue(ss_net_paid#40))] +Aggregate Attributes [5]: [count#42, sum#43, count#44, sum#45, count#46] +Results [5]: [count#47, sum#48, count#49, sum#50, count#51] + +(18) Exchange +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(19) HashAggregate [codegen id : 2] +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#39)), avg(UnscaledValue(ss_net_paid#40))] +Aggregate Attributes [3]: [count(1)#52, avg(UnscaledValue(ss_ext_discount_amt#39))#53, avg(UnscaledValue(ss_net_paid#40))#54] +Results [3]: [count(1)#52 AS count(1)#55, cast((avg(UnscaledValue(ss_ext_discount_amt#39))#53 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#56, cast((avg(UnscaledValue(ss_net_paid#40))#54 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#57] + +(20) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#55, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#56, avg(ss_net_paid), avg(ss_net_paid)#57) AS mergedValue#58] +Input [3]: [count(1)#55, avg(ss_ext_discount_amt)#56, avg(ss_net_paid)#57] + +Subquery:5 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] + +Subquery:6 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] + +Subquery:7 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +* Project (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * ColumnarToRow (24) + +- CometProject (23) + +- CometFilter (22) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (21) + + +(21) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] +Arguments: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] + +(22) CometFilter +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] +Condition : ((isnotnull(ss_quantity#59) AND (ss_quantity#59 >= 41)) AND (ss_quantity#59 <= 60)) + +(23) CometProject +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] +Arguments: [ss_ext_discount_amt#60, ss_net_paid#61], [ss_ext_discount_amt#60, ss_net_paid#61] + +(24) ColumnarToRow [codegen id : 1] +Input [2]: [ss_ext_discount_amt#60, ss_net_paid#61] + +(25) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#60, ss_net_paid#61] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#60)), partial_avg(UnscaledValue(ss_net_paid#61))] +Aggregate Attributes [5]: [count#63, sum#64, count#65, sum#66, count#67] +Results [5]: [count#68, sum#69, count#70, sum#71, count#72] + +(26) Exchange +Input [5]: [count#68, sum#69, count#70, sum#71, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] + +(27) HashAggregate [codegen id : 2] +Input [5]: [count#68, sum#69, count#70, sum#71, count#72] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#60)), avg(UnscaledValue(ss_net_paid#61))] +Aggregate Attributes [3]: [count(1)#73, avg(UnscaledValue(ss_ext_discount_amt#60))#74, avg(UnscaledValue(ss_net_paid#61))#75] +Results [3]: [count(1)#73 AS count(1)#76, cast((avg(UnscaledValue(ss_ext_discount_amt#60))#74 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#77, cast((avg(UnscaledValue(ss_net_paid#61))#75 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#78] + +(28) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#76, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#77, avg(ss_net_paid), avg(ss_net_paid)#78) AS mergedValue#79] +Input [3]: [count(1)#76, avg(ss_ext_discount_amt)#77, avg(ss_net_paid)#78] + +Subquery:8 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] + +Subquery:9 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] + +Subquery:10 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +* Project (36) ++- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * ColumnarToRow (32) + +- CometProject (31) + +- CometFilter (30) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (29) + + +(29) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] +Arguments: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] + +(30) CometFilter +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] +Condition : ((isnotnull(ss_quantity#80) AND (ss_quantity#80 >= 61)) AND (ss_quantity#80 <= 80)) + +(31) CometProject +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] +Arguments: [ss_ext_discount_amt#81, ss_net_paid#82], [ss_ext_discount_amt#81, ss_net_paid#82] + +(32) ColumnarToRow [codegen id : 1] +Input [2]: [ss_ext_discount_amt#81, ss_net_paid#82] + +(33) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#81, ss_net_paid#82] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#81)), partial_avg(UnscaledValue(ss_net_paid#82))] +Aggregate Attributes [5]: [count#84, sum#85, count#86, sum#87, count#88] +Results [5]: [count#89, sum#90, count#91, sum#92, count#93] + +(34) Exchange +Input [5]: [count#89, sum#90, count#91, sum#92, count#93] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(35) HashAggregate [codegen id : 2] +Input [5]: [count#89, sum#90, count#91, sum#92, count#93] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#81)), avg(UnscaledValue(ss_net_paid#82))] +Aggregate Attributes [3]: [count(1)#94, avg(UnscaledValue(ss_ext_discount_amt#81))#95, avg(UnscaledValue(ss_net_paid#82))#96] +Results [3]: [count(1)#94 AS count(1)#97, cast((avg(UnscaledValue(ss_ext_discount_amt#81))#95 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#98, cast((avg(UnscaledValue(ss_net_paid#82))#96 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#99] + +(36) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#97, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#98, avg(ss_net_paid), avg(ss_net_paid)#99) AS mergedValue#100] +Input [3]: [count(1)#97, avg(ss_ext_discount_amt)#98, avg(ss_net_paid)#99] + +Subquery:11 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] + +Subquery:12 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] + +Subquery:13 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#15, [id=#16] +* Project (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * ColumnarToRow (40) + +- CometProject (39) + +- CometFilter (38) + +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (37) + + +(37) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] +Arguments: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] + +(38) CometFilter +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] +Condition : ((isnotnull(ss_quantity#101) AND (ss_quantity#101 >= 81)) AND (ss_quantity#101 <= 100)) + +(39) CometProject +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] +Arguments: [ss_ext_discount_amt#102, ss_net_paid#103], [ss_ext_discount_amt#102, ss_net_paid#103] + +(40) ColumnarToRow [codegen id : 1] +Input [2]: [ss_ext_discount_amt#102, ss_net_paid#103] + +(41) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#102, ss_net_paid#103] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#102)), partial_avg(UnscaledValue(ss_net_paid#103))] +Aggregate Attributes [5]: [count#105, sum#106, count#107, sum#108, count#109] +Results [5]: [count#110, sum#111, count#112, sum#113, count#114] + +(42) Exchange +Input [5]: [count#110, sum#111, count#112, sum#113, count#114] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(43) HashAggregate [codegen id : 2] +Input [5]: [count#110, sum#111, count#112, sum#113, count#114] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#102)), avg(UnscaledValue(ss_net_paid#103))] +Aggregate Attributes [3]: [count(1)#115, avg(UnscaledValue(ss_ext_discount_amt#102))#116, avg(UnscaledValue(ss_net_paid#103))#117] +Results [3]: [count(1)#115 AS count(1)#118, cast((avg(UnscaledValue(ss_ext_discount_amt#102))#116 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#119, cast((avg(UnscaledValue(ss_net_paid#103))#117 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#120] + +(44) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#118, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#119, avg(ss_net_paid), avg(ss_net_paid)#120) AS mergedValue#121] +Input [3]: [count(1)#118, avg(ss_ext_discount_amt)#119, avg(ss_net_paid)#120] + +Subquery:14 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] + +Subquery:15 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9.native_datafusion/simplified.txt new file mode 100644 index 0000000000..919249c87f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9.native_datafusion/simplified.txt @@ -0,0 +1,81 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [bucket1,bucket2,bucket3,bucket4,bucket5] + Subquery #1 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #1 + ReusedSubquery [mergedValue] #1 + Subquery #2 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #2 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #2 + ReusedSubquery [mergedValue] #2 + Subquery #3 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #3 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #3 + ReusedSubquery [mergedValue] #3 + Subquery #4 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #4 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #4 + ReusedSubquery [mergedValue] #4 + Subquery #5 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #5 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #5 + ReusedSubquery [mergedValue] #5 + CometFilter [r_reason_sk] + CometNativeScan: `spark_catalog`.`default`.`reason` [r_reason_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q90.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q90.native_datafusion/explain.txt new file mode 100644 index 0000000000..6390d2d5eb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q90.native_datafusion/explain.txt @@ -0,0 +1,242 @@ +== Physical Plan == +* Project (47) ++- * BroadcastNestedLoopJoin Inner BuildRight (46) + :- * ColumnarToRow (25) + : +- CometHashAggregate (24) + : +- CometExchange (23) + : +- CometHashAggregate (22) + : +- CometProject (21) + : +- CometBroadcastHashJoin (20) + : :- CometProject (15) + : : +- CometBroadcastHashJoin (14) + : : :- CometProject (9) + : : : +- CometBroadcastHashJoin (8) + : : : :- CometProject (3) + : : : : +- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : +- CometBroadcastExchange (7) + : : : +- CometProject (6) + : : : +- CometFilter (5) + : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (4) + : : +- CometBroadcastExchange (13) + : : +- CometProject (12) + : : +- CometFilter (11) + : : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (10) + : +- CometBroadcastExchange (19) + : +- CometProject (18) + : +- CometFilter (17) + : +- CometNativeScan: `spark_catalog`.`default`.`web_page` (16) + +- BroadcastExchange (45) + +- * ColumnarToRow (44) + +- CometHashAggregate (43) + +- CometExchange (42) + +- CometHashAggregate (41) + +- CometProject (40) + +- CometBroadcastHashJoin (39) + :- CometProject (37) + : +- CometBroadcastHashJoin (36) + : :- CometProject (31) + : : +- CometBroadcastHashJoin (30) + : : :- CometProject (28) + : : : +- CometFilter (27) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (26) + : : +- ReusedExchange (29) + : +- CometBroadcastExchange (35) + : +- CometProject (34) + : +- CometFilter (33) + : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (32) + +- ReusedExchange (38) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Arguments: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(3) CometProject +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Arguments: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3], [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] + +(4) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#5, hd_dep_count#6] +Arguments: [hd_demo_sk#5, hd_dep_count#6] + +(5) CometFilter +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Condition : ((isnotnull(hd_dep_count#6) AND (hd_dep_count#6 = 6)) AND isnotnull(hd_demo_sk#5)) + +(6) CometProject +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Arguments: [hd_demo_sk#5], [hd_demo_sk#5] + +(7) CometBroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: [hd_demo_sk#5] + +(8) CometBroadcastHashJoin +Left output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Right output [1]: [hd_demo_sk#5] +Arguments: [ws_ship_hdemo_sk#2], [hd_demo_sk#5], Inner, BuildRight + +(9) CometProject +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#5] +Arguments: [ws_sold_time_sk#1, ws_web_page_sk#3], [ws_sold_time_sk#1, ws_web_page_sk#3] + +(10) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [2]: [t_time_sk#7, t_hour#8] +Arguments: [t_time_sk#7, t_hour#8] + +(11) CometFilter +Input [2]: [t_time_sk#7, t_hour#8] +Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 8)) AND (t_hour#8 <= 9)) AND isnotnull(t_time_sk#7)) + +(12) CometProject +Input [2]: [t_time_sk#7, t_hour#8] +Arguments: [t_time_sk#7], [t_time_sk#7] + +(13) CometBroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: [t_time_sk#7] + +(14) CometBroadcastHashJoin +Left output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] +Right output [1]: [t_time_sk#7] +Arguments: [ws_sold_time_sk#1], [t_time_sk#7], Inner, BuildRight + +(15) CometProject +Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] +Arguments: [ws_web_page_sk#3], [ws_web_page_sk#3] + +(16) CometNativeScan: `spark_catalog`.`default`.`web_page` +Output [2]: [wp_web_page_sk#9, wp_char_count#10] +Arguments: [wp_web_page_sk#9, wp_char_count#10] + +(17) CometFilter +Input [2]: [wp_web_page_sk#9, wp_char_count#10] +Condition : (((isnotnull(wp_char_count#10) AND (wp_char_count#10 >= 5000)) AND (wp_char_count#10 <= 5200)) AND isnotnull(wp_web_page_sk#9)) + +(18) CometProject +Input [2]: [wp_web_page_sk#9, wp_char_count#10] +Arguments: [wp_web_page_sk#9], [wp_web_page_sk#9] + +(19) CometBroadcastExchange +Input [1]: [wp_web_page_sk#9] +Arguments: [wp_web_page_sk#9] + +(20) CometBroadcastHashJoin +Left output [1]: [ws_web_page_sk#3] +Right output [1]: [wp_web_page_sk#9] +Arguments: [ws_web_page_sk#3], [wp_web_page_sk#9], Inner, BuildRight + +(21) CometProject +Input [2]: [ws_web_page_sk#3, wp_web_page_sk#9] + +(22) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(23) CometExchange +Input [1]: [count#11] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [1]: [count#11] +Keys: [] +Functions [1]: [count(1)] + +(25) ColumnarToRow [codegen id : 2] +Input [1]: [amc#12] + +(26) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] +Arguments: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] + +(27) CometFilter +Input [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] +Condition : ((isnotnull(ws_ship_hdemo_sk#14) AND isnotnull(ws_sold_time_sk#13)) AND isnotnull(ws_web_page_sk#15)) + +(28) CometProject +Input [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, ws_sold_date_sk#16] +Arguments: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15], [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15] + +(29) ReusedExchange [Reuses operator id: 7] +Output [1]: [hd_demo_sk#17] + +(30) CometBroadcastHashJoin +Left output [3]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15] +Right output [1]: [hd_demo_sk#17] +Arguments: [ws_ship_hdemo_sk#14], [hd_demo_sk#17], Inner, BuildRight + +(31) CometProject +Input [4]: [ws_sold_time_sk#13, ws_ship_hdemo_sk#14, ws_web_page_sk#15, hd_demo_sk#17] +Arguments: [ws_sold_time_sk#13, ws_web_page_sk#15], [ws_sold_time_sk#13, ws_web_page_sk#15] + +(32) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [2]: [t_time_sk#18, t_hour#19] +Arguments: [t_time_sk#18, t_hour#19] + +(33) CometFilter +Input [2]: [t_time_sk#18, t_hour#19] +Condition : (((isnotnull(t_hour#19) AND (t_hour#19 >= 19)) AND (t_hour#19 <= 20)) AND isnotnull(t_time_sk#18)) + +(34) CometProject +Input [2]: [t_time_sk#18, t_hour#19] +Arguments: [t_time_sk#18], [t_time_sk#18] + +(35) CometBroadcastExchange +Input [1]: [t_time_sk#18] +Arguments: [t_time_sk#18] + +(36) CometBroadcastHashJoin +Left output [2]: [ws_sold_time_sk#13, ws_web_page_sk#15] +Right output [1]: [t_time_sk#18] +Arguments: [ws_sold_time_sk#13], [t_time_sk#18], Inner, BuildRight + +(37) CometProject +Input [3]: [ws_sold_time_sk#13, ws_web_page_sk#15, t_time_sk#18] +Arguments: [ws_web_page_sk#15], [ws_web_page_sk#15] + +(38) ReusedExchange [Reuses operator id: 19] +Output [1]: [wp_web_page_sk#20] + +(39) CometBroadcastHashJoin +Left output [1]: [ws_web_page_sk#15] +Right output [1]: [wp_web_page_sk#20] +Arguments: [ws_web_page_sk#15], [wp_web_page_sk#20], Inner, BuildRight + +(40) CometProject +Input [2]: [ws_web_page_sk#15, wp_web_page_sk#20] + +(41) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(42) CometExchange +Input [1]: [count#21] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(43) CometHashAggregate +Input [1]: [count#21] +Keys: [] +Functions [1]: [count(1)] + +(44) ColumnarToRow [codegen id : 1] +Input [1]: [pmc#22] + +(45) BroadcastExchange +Input [1]: [pmc#22] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(46) BroadcastNestedLoopJoin [codegen id : 2] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 2] +Output [1]: [(cast(amc#12 as decimal(15,4)) / cast(pmc#22 as decimal(15,4))) AS am_pm_ratio#23] +Input [2]: [amc#12, pmc#22] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q90.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q90.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a746bdf6f4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q90.native_datafusion/simplified.txt @@ -0,0 +1,52 @@ +WholeStageCodegen (2) + Project [amc,pmc] + BroadcastNestedLoopJoin + ColumnarToRow + InputAdapter + CometHashAggregate [amc,count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + CometProject [ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_web_page_sk,t_time_sk] + CometProject [ws_sold_time_sk,ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,hd_demo_sk] + CometProject [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + CometFilter [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + CometBroadcastExchange [hd_demo_sk] #2 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count] + CometBroadcastExchange [t_time_sk] #3 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour] + CometBroadcastExchange [wp_web_page_sk] #4 + CometProject [wp_web_page_sk] + CometFilter [wp_web_page_sk,wp_char_count] + CometNativeScan: `spark_catalog`.`default`.`web_page` [wp_web_page_sk,wp_char_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [pmc,count,count(1)] + CometExchange #6 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + CometProject [ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_web_page_sk,t_time_sk] + CometProject [ws_sold_time_sk,ws_web_page_sk] + CometBroadcastHashJoin [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,hd_demo_sk] + CometProject [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + CometFilter [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + ReusedExchange [hd_demo_sk] #2 + CometBroadcastExchange [t_time_sk] #7 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour] + ReusedExchange [wp_web_page_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q91.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q91.native_datafusion/explain.txt new file mode 100644 index 0000000000..ecd5609ab4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q91.native_datafusion/explain.txt @@ -0,0 +1,223 @@ +== Physical Plan == +* ColumnarToRow (42) ++- CometSort (41) + +- CometColumnarExchange (40) + +- * HashAggregate (39) + +- Exchange (38) + +- * HashAggregate (37) + +- * ColumnarToRow (36) + +- CometProject (35) + +- CometBroadcastHashJoin (34) + :- CometProject (29) + : +- CometBroadcastHashJoin (28) + : :- CometProject (24) + : : +- CometBroadcastHashJoin (23) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (13) + : : : : +- CometBroadcastHashJoin (12) + : : : : :- CometProject (7) + : : : : : +- CometBroadcastHashJoin (6) + : : : : : :- CometFilter (2) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`call_center` (1) + : : : : : +- CometBroadcastExchange (5) + : : : : : +- CometFilter (4) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (3) + : : : : +- CometBroadcastExchange (11) + : : : : +- CometProject (10) + : : : : +- CometFilter (9) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : : +- CometBroadcastExchange (16) + : : : +- CometFilter (15) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (14) + : : +- CometBroadcastExchange (22) + : : +- CometProject (21) + : : +- CometFilter (20) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (19) + : +- CometBroadcastExchange (27) + : +- CometFilter (26) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (25) + +- CometBroadcastExchange (33) + +- CometProject (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`call_center` +Output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Arguments: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] + +(2) CometFilter +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Condition : isnotnull(cc_call_center_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] + +(4) CometFilter +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Condition : (isnotnull(cr_call_center_sk#6) AND isnotnull(cr_returning_customer_sk#5)) + +(5) CometBroadcastExchange +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] + +(6) CometBroadcastHashJoin +Left output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Right output [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cc_call_center_sk#1], [cr_call_center_sk#6], Inner, BuildRight + +(7) CometProject +Input [8]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9, d_year#10, d_moy#11] + +(9) CometFilter +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#11)) AND (d_year#10 = 1998)) AND (d_moy#11 = 11)) AND isnotnull(d_date_sk#9)) + +(10) CometProject +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(12) CometBroadcastHashJoin +Left output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [cr_returned_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(13) CometProject +Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8, d_date_sk#9] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7] + +(14) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(15) CometFilter +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Condition : (((isnotnull(c_customer_sk#12) AND isnotnull(c_current_addr_sk#15)) AND isnotnull(c_current_cdemo_sk#13)) AND isnotnull(c_current_hdemo_sk#14)) + +(16) CometBroadcastExchange +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(17) CometBroadcastHashJoin +Left output [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7] +Right output [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [cr_returning_customer_sk#5], [c_customer_sk#12], Inner, BuildRight + +(18) CometProject +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(19) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#16, ca_gmt_offset#17] +Arguments: [ca_address_sk#16, ca_gmt_offset#17] + +(20) CometFilter +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] +Condition : ((isnotnull(ca_gmt_offset#17) AND (ca_gmt_offset#17 = -7.00)) AND isnotnull(ca_address_sk#16)) + +(21) CometProject +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] +Arguments: [ca_address_sk#16], [ca_address_sk#16] + +(22) CometBroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: [ca_address_sk#16] + +(23) CometBroadcastHashJoin +Left output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Right output [1]: [ca_address_sk#16] +Arguments: [c_current_addr_sk#15], [ca_address_sk#16], Inner, BuildRight + +(24) CometProject +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15, ca_address_sk#16] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14] + +(25) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(26) CometFilter +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Condition : ((((cd_marital_status#19 = M) AND (cd_education_status#20 = Unknown )) OR ((cd_marital_status#19 = W) AND (cd_education_status#20 = Advanced Degree ))) AND isnotnull(cd_demo_sk#18)) + +(27) CometBroadcastExchange +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(28) CometBroadcastHashJoin +Left output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14] +Right output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [c_current_cdemo_sk#13], [cd_demo_sk#18], Inner, BuildRight + +(29) CometProject +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20] + +(30) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#21, hd_buy_potential#22] +Arguments: [hd_demo_sk#21, hd_buy_potential#22] + +(31) CometFilter +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] +Condition : ((isnotnull(hd_buy_potential#22) AND StartsWith(hd_buy_potential#22, Unknown)) AND isnotnull(hd_demo_sk#21)) + +(32) CometProject +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] +Arguments: [hd_demo_sk#21], [hd_demo_sk#21] + +(33) CometBroadcastExchange +Input [1]: [hd_demo_sk#21] +Arguments: [hd_demo_sk#21] + +(34) CometBroadcastHashJoin +Left output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20] +Right output [1]: [hd_demo_sk#21] +Arguments: [c_current_hdemo_sk#14], [hd_demo_sk#21], Inner, BuildRight + +(35) CometProject +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20, hd_demo_sk#21] +Arguments: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20], [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] + +(36) ColumnarToRow [codegen id : 1] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] + +(37) HashAggregate [codegen id : 1] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#7))] +Aggregate Attributes [1]: [sum#23] +Results [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#24] + +(38) Exchange +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#24] +Arguments: hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(39) HashAggregate [codegen id : 2] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#24] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20] +Functions [1]: [sum(UnscaledValue(cr_net_loss#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#7))#25] +Results [4]: [cc_call_center_id#2 AS Call_Center#26, cc_name#3 AS Call_Center_Name#27, cc_manager#4 AS Manager#28, MakeDecimal(sum(UnscaledValue(cr_net_loss#7))#25,17,2) AS Returns_Loss#29] + +(40) CometColumnarExchange +Input [4]: [Call_Center#26, Call_Center_Name#27, Manager#28, Returns_Loss#29] +Arguments: rangepartitioning(Returns_Loss#29 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(41) CometSort +Input [4]: [Call_Center#26, Call_Center_Name#27, Manager#28, Returns_Loss#29] +Arguments: [Call_Center#26, Call_Center_Name#27, Manager#28, Returns_Loss#29], [Returns_Loss#29 DESC NULLS LAST] + +(42) ColumnarToRow [codegen id : 3] +Input [4]: [Call_Center#26, Call_Center_Name#27, Manager#28, Returns_Loss#29] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q91.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q91.native_datafusion/simplified.txt new file mode 100644 index 0000000000..652ca90404 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q91.native_datafusion/simplified.txt @@ -0,0 +1,48 @@ +WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [Call_Center,Call_Center_Name,Manager,Returns_Loss] + CometColumnarExchange [Returns_Loss] #1 + WholeStageCodegen (2) + HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,sum] [sum(UnscaledValue(cr_net_loss)),Call_Center,Call_Center_Name,Manager,Returns_Loss,sum] + InputAdapter + Exchange [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status] #2 + WholeStageCodegen (1) + HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,cr_net_loss] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_hdemo_sk,cd_marital_status,cd_education_status,hd_demo_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_hdemo_sk,cd_marital_status,cd_education_status] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,ca_address_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss] + CometBroadcastHashJoin [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,cr_returned_date_sk,d_date_sk] + CometProject [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,cr_returned_date_sk] + CometBroadcastHashJoin [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + CometFilter [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + CometNativeScan: `spark_catalog`.`default`.`call_center` [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + CometBroadcastExchange [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] #3 + CometFilter [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] #5 + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_gmt_offset] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_gmt_offset] + CometBroadcastExchange [cd_demo_sk,cd_marital_status,cd_education_status] #7 + CometFilter [cd_demo_sk,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status,cd_education_status] + CometBroadcastExchange [hd_demo_sk] #8 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_buy_potential] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q92.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q92.native_datafusion/explain.txt new file mode 100644 index 0000000000..cdb99b45b3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q92.native_datafusion/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +* HashAggregate (35) ++- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * ColumnarToRow (9) + : : +- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : +- BroadcastExchange (23) + : +- * Filter (22) + : +- * HashAggregate (21) + : +- Exchange (20) + : +- * HashAggregate (19) + : +- * ColumnarToRow (18) + : +- CometProject (17) + : +- CometBroadcastHashJoin (16) + : :- CometFilter (11) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (10) + : +- CometBroadcastExchange (15) + : +- CometProject (14) + : +- CometFilter (13) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (12) + +- BroadcastExchange (30) + +- * ColumnarToRow (29) + +- CometProject (28) + +- CometFilter (27) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (26) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Condition : (isnotnull(ws_item_sk#1) AND isnotnull(ws_ext_discount_amt#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#4, i_manufact_id#5] +Arguments: [i_item_sk#4, i_manufact_id#5] + +(4) CometFilter +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 350)) AND isnotnull(i_item_sk#4)) + +(5) CometProject +Input [2]: [i_item_sk#4, i_manufact_id#5] +Arguments: [i_item_sk#4], [i_item_sk#4] + +(6) CometBroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: [i_item_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Right output [1]: [i_item_sk#4] +Arguments: [ws_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] +Arguments: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4], [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] + +(9) ColumnarToRow [codegen id : 4] +Input [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] + +(10) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Arguments: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] + +(11) CometFilter +Input [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Condition : isnotnull(ws_item_sk#6) + +(12) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9, d_date#10] + +(13) CometFilter +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-01-27)) AND (d_date#10 <= 2000-04-26)) AND isnotnull(d_date_sk#9)) + +(14) CometProject +Input [2]: [d_date_sk#9, d_date#10] +Arguments: [d_date_sk#9], [d_date_sk#9] + +(15) CometBroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: [d_date_sk#9] + +(16) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Right output [1]: [d_date_sk#9] +Arguments: [ws_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + +(17) CometProject +Input [4]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8, d_date_sk#9] +Arguments: [ws_item_sk#6, ws_ext_discount_amt#7], [ws_item_sk#6, ws_ext_discount_amt#7] + +(18) ColumnarToRow [codegen id : 1] +Input [2]: [ws_item_sk#6, ws_ext_discount_amt#7] + +(19) HashAggregate [codegen id : 1] +Input [2]: [ws_item_sk#6, ws_ext_discount_amt#7] +Keys [1]: [ws_item_sk#6] +Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#7))] +Aggregate Attributes [2]: [sum#11, count#12] +Results [3]: [ws_item_sk#6, sum#13, count#14] + +(20) Exchange +Input [3]: [ws_item_sk#6, sum#13, count#14] +Arguments: hashpartitioning(ws_item_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(21) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#6, sum#13, count#14] +Keys [1]: [ws_item_sk#6] +Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#7))] +Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#7))#15] +Results [2]: [(1.3 * cast((avg(UnscaledValue(ws_ext_discount_amt#7))#15 / 100.0) as decimal(11,6))) AS (1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] + +(22) Filter [codegen id : 2] +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] +Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#16) + +(23) BroadcastExchange +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=2] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#4] +Right keys [1]: [ws_item_sk#6] +Join type: Inner +Join condition: (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#16) + +(25) Project [codegen id : 4] +Output [2]: [ws_ext_discount_amt#2, ws_sold_date_sk#3] +Input [5]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4, (1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] + +(26) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#17, d_date#18] +Arguments: [d_date_sk#17, d_date#18] + +(27) CometFilter +Input [2]: [d_date_sk#17, d_date#18] +Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 2000-01-27)) AND (d_date#18 <= 2000-04-26)) AND isnotnull(d_date_sk#17)) + +(28) CometProject +Input [2]: [d_date_sk#17, d_date#18] +Arguments: [d_date_sk#17], [d_date_sk#17] + +(29) ColumnarToRow [codegen id : 3] +Input [1]: [d_date_sk#17] + +(30) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(31) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#17] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 4] +Output [1]: [ws_ext_discount_amt#2] +Input [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, d_date_sk#17] + +(33) HashAggregate [codegen id : 4] +Input [1]: [ws_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum#19] +Results [1]: [sum#20] + +(34) Exchange +Input [1]: [sum#20] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(35) HashAggregate [codegen id : 5] +Input [1]: [sum#20] +Keys: [] +Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))#21] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#2))#21,17,2) AS Excess Discount Amount #22] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q92.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q92.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6ba2c8ff43 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q92.native_datafusion/simplified.txt @@ -0,0 +1,47 @@ +WholeStageCodegen (5) + HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [ws_ext_discount_amt] [sum,sum] + Project [ws_ext_discount_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ext_discount_amt,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk,ws_ext_discount_amt,(1.3 * avg(ws_ext_discount_amt))] + ColumnarToRow + InputAdapter + CometProject [ws_ext_discount_amt,ws_sold_date_sk,i_item_sk] + CometBroadcastHashJoin [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk,i_item_sk] + CometFilter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometBroadcastExchange [i_item_sk] #2 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [(1.3 * avg(ws_ext_discount_amt))] + HashAggregate [ws_item_sk,sum,count] [avg(UnscaledValue(ws_ext_discount_amt)),(1.3 * avg(ws_ext_discount_amt)),sum,count] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen (1) + HashAggregate [ws_item_sk,ws_ext_discount_amt] [sum,count,sum,count] + ColumnarToRow + InputAdapter + CometProject [ws_item_sk,ws_ext_discount_amt] + CometBroadcastHashJoin [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q93.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q93.native_datafusion/explain.txt new file mode 100644 index 0000000000..58d4ecf05c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q93.native_datafusion/explain.txt @@ -0,0 +1,120 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * HashAggregate (21) + +- Exchange (20) + +- * HashAggregate (19) + +- * ColumnarToRow (18) + +- CometProject (17) + +- CometBroadcastHashJoin (16) + :- CometProject (11) + : +- CometSortMergeJoin (10) + : :- CometSort (4) + : : +- CometExchange (3) + : : +- CometProject (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : +- CometSort (9) + : +- CometExchange (8) + : +- CometProject (7) + : +- CometFilter (6) + : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (5) + +- CometBroadcastExchange (15) + +- CometProject (14) + +- CometFilter (13) + +- CometNativeScan: `spark_catalog`.`default`.`reason` (12) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] + +(2) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5], [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] + +(3) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#3, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5], [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#3 ASC NULLS FIRST] + +(5) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) CometFilter +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_item_sk#7) AND isnotnull(sr_ticket_number#9)) AND isnotnull(sr_reason_sk#8)) + +(7) CometProject +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10], [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] + +(8) CometExchange +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: hashpartitioning(sr_item_sk#7, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10], [sr_item_sk#7 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Right output [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [ss_item_sk#1, ss_ticket_number#3], [sr_item_sk#7, sr_ticket_number#9], Inner + +(11) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10], [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10] + +(12) CometNativeScan: `spark_catalog`.`default`.`reason` +Output [2]: [r_reason_sk#12, r_reason_desc#13] +Arguments: [r_reason_sk#12, r_reason_desc#13] + +(13) CometFilter +Input [2]: [r_reason_sk#12, r_reason_desc#13] +Condition : ((isnotnull(r_reason_desc#13) AND (r_reason_desc#13 = reason 28 )) AND isnotnull(r_reason_sk#12)) + +(14) CometProject +Input [2]: [r_reason_sk#12, r_reason_desc#13] +Arguments: [r_reason_sk#12], [r_reason_sk#12] + +(15) CometBroadcastExchange +Input [1]: [r_reason_sk#12] +Arguments: [r_reason_sk#12] + +(16) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10] +Right output [1]: [r_reason_sk#12] +Arguments: [sr_reason_sk#8], [r_reason_sk#12], Inner, BuildRight + +(17) CometProject +Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10, r_reason_sk#12] +Arguments: [ss_customer_sk#2, act_sales#14], [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#10) THEN (cast((ss_quantity#4 - sr_return_quantity#10) as decimal(10,0)) * ss_sales_price#5) ELSE (cast(ss_quantity#4 as decimal(10,0)) * ss_sales_price#5) END AS act_sales#14] + +(18) ColumnarToRow [codegen id : 1] +Input [2]: [ss_customer_sk#2, act_sales#14] + +(19) HashAggregate [codegen id : 1] +Input [2]: [ss_customer_sk#2, act_sales#14] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [partial_sum(act_sales#14)] +Aggregate Attributes [2]: [sum#15, isEmpty#16] +Results [3]: [ss_customer_sk#2, sum#17, isEmpty#18] + +(20) Exchange +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(21) HashAggregate [codegen id : 2] +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [sum(act_sales#14)] +Aggregate Attributes [1]: [sum(act_sales#14)#19] +Results [2]: [ss_customer_sk#2, sum(act_sales#14)#19 AS sumsales#20] + +(22) TakeOrderedAndProject +Input [2]: [ss_customer_sk#2, sumsales#20] +Arguments: 100, [sumsales#20 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], [ss_customer_sk#2, sumsales#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q93.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q93.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8d70da4080 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q93.native_datafusion/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [sumsales,ss_customer_sk] + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,sum,isEmpty] [sum(act_sales),sumsales,sum,isEmpty] + InputAdapter + Exchange [ss_customer_sk] #1 + WholeStageCodegen (1) + HashAggregate [ss_customer_sk,act_sales] [sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometProject [sr_return_quantity,ss_quantity,ss_sales_price] [ss_customer_sk,act_sales] + CometBroadcastHashJoin [ss_customer_sk,ss_quantity,ss_sales_price,sr_reason_sk,sr_return_quantity,r_reason_sk] + CometProject [ss_customer_sk,ss_quantity,ss_sales_price,sr_reason_sk,sr_return_quantity] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + CometSort [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price] + CometExchange [ss_item_sk,ss_ticket_number] #2 + CometProject [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometSort [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + CometExchange [sr_item_sk,sr_ticket_number] #3 + CometProject [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + CometFilter [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + CometBroadcastExchange [r_reason_sk] #4 + CometProject [r_reason_sk] + CometFilter [r_reason_sk,r_reason_desc] + CometNativeScan: `spark_catalog`.`default`.`reason` [r_reason_sk,r_reason_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q94.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q94.native_datafusion/explain.txt new file mode 100644 index 0000000000..fde4e750e9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q94.native_datafusion/explain.txt @@ -0,0 +1,219 @@ +== Physical Plan == +* HashAggregate (40) ++- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- * HashAggregate (36) + +- * ColumnarToRow (35) + +- CometProject (34) + +- CometBroadcastHashJoin (33) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (22) + : : +- CometBroadcastHashJoin (21) + : : :- CometSortMergeJoin (16) + : : : :- CometProject (11) + : : : : +- CometSortMergeJoin (10) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : : +- CometSort (9) + : : : : +- CometExchange (8) + : : : : +- CometProject (7) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (6) + : : : +- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (12) + : : +- CometBroadcastExchange (20) + : : +- CometProject (19) + : : +- CometFilter (18) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (17) + : +- CometBroadcastExchange (26) + : +- CometProject (25) + : +- CometFilter (24) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (23) + +- CometBroadcastExchange (32) + +- CometProject (31) + +- CometFilter (30) + +- CometNativeScan: `spark_catalog`.`default`.`web_site` (29) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(3) CometProject +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(4) CometExchange +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_order_number#5 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Arguments: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] + +(7) CometProject +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Arguments: [ws_warehouse_sk#9, ws_order_number#10], [ws_warehouse_sk#9, ws_order_number#10] + +(8) CometExchange +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: hashpartitioning(ws_order_number#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: [ws_warehouse_sk#9, ws_order_number#10], [ws_order_number#10 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: [ws_order_number#5], [ws_order_number#10], LeftSemi, NOT (ws_warehouse_sk#4 = ws_warehouse_sk#9) + +(11) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(12) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [2]: [wr_order_number#12, wr_returned_date_sk#13] +Arguments: [wr_order_number#12, wr_returned_date_sk#13] + +(13) CometProject +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] +Arguments: [wr_order_number#12], [wr_order_number#12] + +(14) CometExchange +Input [1]: [wr_order_number#12] +Arguments: hashpartitioning(wr_order_number#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(15) CometSort +Input [1]: [wr_order_number#12] +Arguments: [wr_order_number#12], [wr_order_number#12 ASC NULLS FIRST] + +(16) CometSortMergeJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [wr_order_number#12] +Arguments: [ws_order_number#5], [wr_order_number#12], LeftAnti + +(17) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#14, d_date#15] +Arguments: [d_date_sk#14, d_date#15] + +(18) CometFilter +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 1999-02-01)) AND (d_date#15 <= 1999-04-02)) AND isnotnull(d_date_sk#14)) + +(19) CometProject +Input [2]: [d_date_sk#14, d_date#15] +Arguments: [d_date_sk#14], [d_date_sk#14] + +(20) CometBroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [d_date_sk#14] +Arguments: [ws_ship_date_sk#1], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#14] +Arguments: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(23) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16, ca_state#17] + +(24) CometFilter +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = IL)) AND isnotnull(ca_address_sk#16)) + +(25) CometProject +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16], [ca_address_sk#16] + +(26) CometBroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: [ca_address_sk#16] + +(27) CometBroadcastHashJoin +Left output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [ca_address_sk#16] +Arguments: [ws_ship_addr_sk#2], [ca_address_sk#16], Inner, BuildRight + +(28) CometProject +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#16] +Arguments: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(29) CometNativeScan: `spark_catalog`.`default`.`web_site` +Output [2]: [web_site_sk#18, web_company_name#19] +Arguments: [web_site_sk#18, web_company_name#19] + +(30) CometFilter +Input [2]: [web_site_sk#18, web_company_name#19] +Condition : ((isnotnull(web_company_name#19) AND (web_company_name#19 = pri )) AND isnotnull(web_site_sk#18)) + +(31) CometProject +Input [2]: [web_site_sk#18, web_company_name#19] +Arguments: [web_site_sk#18], [web_site_sk#18] + +(32) CometBroadcastExchange +Input [1]: [web_site_sk#18] +Arguments: [web_site_sk#18] + +(33) CometBroadcastHashJoin +Left output [4]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Right output [1]: [web_site_sk#18] +Arguments: [ws_web_site_sk#3], [web_site_sk#18], Inner, BuildRight + +(34) CometProject +Input [5]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#18] +Arguments: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7], [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(35) ColumnarToRow [codegen id : 1] +Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(36) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Keys [1]: [ws_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21] +Results [3]: [ws_order_number#5, sum#22, sum#23] + +(37) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#5, sum#22, sum#23] +Keys [1]: [ws_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21] +Results [3]: [ws_order_number#5, sum#22, sum#23] + +(38) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#5, sum#22, sum#23] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24] +Results [3]: [sum#22, sum#23, count#25] + +(39) Exchange +Input [3]: [sum#22, sum#23, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 2] +Input [3]: [sum#22, sum#23, count#25] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24] +Results [3]: [count(ws_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#21,17,2) AS total net profit #28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q94.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q94.native_datafusion/simplified.txt new file mode 100644 index 0000000000..aad98bbfcf --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q94.native_datafusion/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (2) + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,web_site_sk] + CometProject [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ca_address_sk] + CometProject [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,d_date_sk] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,wr_order_number] + CometProject [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_warehouse_sk] + CometSort [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometExchange [ws_order_number] #2 + CometProject [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometFilter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometSort [ws_warehouse_sk,ws_order_number] + CometExchange [ws_order_number] #3 + CometProject [ws_warehouse_sk,ws_order_number] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + CometSort [wr_order_number] + CometExchange [wr_order_number] #4 + CometProject [wr_order_number] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_order_number,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [web_site_sk] #7 + CometProject [web_site_sk] + CometFilter [web_site_sk,web_company_name] + CometNativeScan: `spark_catalog`.`default`.`web_site` [web_site_sk,web_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q95.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q95.native_datafusion/explain.txt new file mode 100644 index 0000000000..164424d188 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q95.native_datafusion/explain.txt @@ -0,0 +1,284 @@ +== Physical Plan == +* HashAggregate (53) ++- Exchange (52) + +- * HashAggregate (51) + +- * HashAggregate (50) + +- * HashAggregate (49) + +- * ColumnarToRow (48) + +- CometProject (47) + +- CometBroadcastHashJoin (46) + :- CometProject (41) + : +- CometBroadcastHashJoin (40) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometSortMergeJoin (29) + : : : :- CometSortMergeJoin (15) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : : +- CometProject (14) + : : : : +- CometSortMergeJoin (13) + : : : : :- CometSort (10) + : : : : : +- CometExchange (9) + : : : : : +- CometProject (8) + : : : : : +- CometFilter (7) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (6) + : : : : +- CometSort (12) + : : : : +- ReusedExchange (11) + : : : +- CometProject (28) + : : : +- CometSortMergeJoin (27) + : : : :- CometSort (20) + : : : : +- CometExchange (19) + : : : : +- CometProject (18) + : : : : +- CometFilter (17) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (16) + : : : +- CometProject (26) + : : : +- CometSortMergeJoin (25) + : : : :- CometSort (22) + : : : : +- ReusedExchange (21) + : : : +- CometSort (24) + : : : +- ReusedExchange (23) + : : +- CometBroadcastExchange (33) + : : +- CometProject (32) + : : +- CometFilter (31) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (30) + : +- CometBroadcastExchange (39) + : +- CometProject (38) + : +- CometFilter (37) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (36) + +- CometBroadcastExchange (45) + +- CometProject (44) + +- CometFilter (43) + +- CometNativeScan: `spark_catalog`.`default`.`web_site` (42) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(3) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(4) CometExchange +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_order_number#4 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Arguments: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] + +(7) CometFilter +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8)) + +(8) CometProject +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Arguments: [ws_warehouse_sk#8, ws_order_number#9], [ws_warehouse_sk#8, ws_order_number#9] + +(9) CometExchange +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: hashpartitioning(ws_order_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_warehouse_sk#8, ws_order_number#9], [ws_order_number#9 ASC NULLS FIRST] + +(11) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] + +(12) CometSort +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_warehouse_sk#11, ws_order_number#12], [ws_order_number#12 ASC NULLS FIRST] + +(13) CometSortMergeJoin +Left output [2]: [ws_warehouse_sk#8, ws_order_number#9] +Right output [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#9], [ws_order_number#12], Inner, NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) + +(14) CometProject +Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#9], [ws_order_number#9] + +(15) CometSortMergeJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [ws_order_number#9] +Arguments: [ws_order_number#4], [ws_order_number#9], LeftSemi + +(16) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [2]: [wr_order_number#13, wr_returned_date_sk#14] +Arguments: [wr_order_number#13, wr_returned_date_sk#14] + +(17) CometFilter +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] +Condition : isnotnull(wr_order_number#13) + +(18) CometProject +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] +Arguments: [wr_order_number#13], [wr_order_number#13] + +(19) CometExchange +Input [1]: [wr_order_number#13] +Arguments: hashpartitioning(wr_order_number#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometSort +Input [1]: [wr_order_number#13] +Arguments: [wr_order_number#13], [wr_order_number#13 ASC NULLS FIRST] + +(21) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#15, ws_order_number#16] + +(22) CometSort +Input [2]: [ws_warehouse_sk#15, ws_order_number#16] +Arguments: [ws_warehouse_sk#15, ws_order_number#16], [ws_order_number#16 ASC NULLS FIRST] + +(23) ReusedExchange [Reuses operator id: 9] +Output [2]: [ws_warehouse_sk#17, ws_order_number#18] + +(24) CometSort +Input [2]: [ws_warehouse_sk#17, ws_order_number#18] +Arguments: [ws_warehouse_sk#17, ws_order_number#18], [ws_order_number#18 ASC NULLS FIRST] + +(25) CometSortMergeJoin +Left output [2]: [ws_warehouse_sk#15, ws_order_number#16] +Right output [2]: [ws_warehouse_sk#17, ws_order_number#18] +Arguments: [ws_order_number#16], [ws_order_number#18], Inner, NOT (ws_warehouse_sk#15 = ws_warehouse_sk#17) + +(26) CometProject +Input [4]: [ws_warehouse_sk#15, ws_order_number#16, ws_warehouse_sk#17, ws_order_number#18] +Arguments: [ws_order_number#16], [ws_order_number#16] + +(27) CometSortMergeJoin +Left output [1]: [wr_order_number#13] +Right output [1]: [ws_order_number#16] +Arguments: [wr_order_number#13], [ws_order_number#16], Inner + +(28) CometProject +Input [2]: [wr_order_number#13, ws_order_number#16] +Arguments: [wr_order_number#13], [wr_order_number#13] + +(29) CometSortMergeJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [wr_order_number#13] +Arguments: [ws_order_number#4], [wr_order_number#13], LeftSemi + +(30) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#19, d_date#20] +Arguments: [d_date_sk#19, d_date#20] + +(31) CometFilter +Input [2]: [d_date_sk#19, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-01)) AND (d_date#20 <= 1999-04-02)) AND isnotnull(d_date_sk#19)) + +(32) CometProject +Input [2]: [d_date_sk#19, d_date#20] +Arguments: [d_date_sk#19], [d_date_sk#19] + +(33) CometBroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: [d_date_sk#19] + +(34) CometBroadcastHashJoin +Left output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [d_date_sk#19] +Arguments: [ws_ship_date_sk#1], [d_date_sk#19], Inner, BuildRight + +(35) CometProject +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#19] +Arguments: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(36) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#21, ca_state#22] +Arguments: [ca_address_sk#21, ca_state#22] + +(37) CometFilter +Input [2]: [ca_address_sk#21, ca_state#22] +Condition : ((isnotnull(ca_state#22) AND (ca_state#22 = IL)) AND isnotnull(ca_address_sk#21)) + +(38) CometProject +Input [2]: [ca_address_sk#21, ca_state#22] +Arguments: [ca_address_sk#21], [ca_address_sk#21] + +(39) CometBroadcastExchange +Input [1]: [ca_address_sk#21] +Arguments: [ca_address_sk#21] + +(40) CometBroadcastHashJoin +Left output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [ca_address_sk#21] +Arguments: [ws_ship_addr_sk#2], [ca_address_sk#21], Inner, BuildRight + +(41) CometProject +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#21] +Arguments: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(42) CometNativeScan: `spark_catalog`.`default`.`web_site` +Output [2]: [web_site_sk#23, web_company_name#24] +Arguments: [web_site_sk#23, web_company_name#24] + +(43) CometFilter +Input [2]: [web_site_sk#23, web_company_name#24] +Condition : ((isnotnull(web_company_name#24) AND (web_company_name#24 = pri )) AND isnotnull(web_site_sk#23)) + +(44) CometProject +Input [2]: [web_site_sk#23, web_company_name#24] +Arguments: [web_site_sk#23], [web_site_sk#23] + +(45) CometBroadcastExchange +Input [1]: [web_site_sk#23] +Arguments: [web_site_sk#23] + +(46) CometBroadcastHashJoin +Left output [4]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Right output [1]: [web_site_sk#23] +Arguments: [ws_web_site_sk#3], [web_site_sk#23], Inner, BuildRight + +(47) CometProject +Input [5]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#23] +Arguments: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6], [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(48) ColumnarToRow [codegen id : 1] +Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(49) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Keys [1]: [ws_order_number#4] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#25, sum(UnscaledValue(ws_net_profit#6))#26] +Results [3]: [ws_order_number#4, sum#27, sum#28] + +(50) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#4, sum#27, sum#28] +Keys [1]: [ws_order_number#4] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#25, sum(UnscaledValue(ws_net_profit#6))#26] +Results [3]: [ws_order_number#4, sum#27, sum#28] + +(51) HashAggregate [codegen id : 1] +Input [3]: [ws_order_number#4, sum#27, sum#28] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#25, sum(UnscaledValue(ws_net_profit#6))#26, count(ws_order_number#4)#29] +Results [3]: [sum#27, sum#28, count#30] + +(52) Exchange +Input [3]: [sum#27, sum#28, count#30] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(53) HashAggregate [codegen id : 2] +Input [3]: [sum#27, sum#28, count#30] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#25, sum(UnscaledValue(ws_net_profit#6))#26, count(ws_order_number#4)#29] +Results [3]: [count(ws_order_number#4)#29 AS order count #31, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#25,17,2) AS total shipping cost #32, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#26,17,2) AS total net profit #33] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q95.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q95.native_datafusion/simplified.txt new file mode 100644 index 0000000000..88077488d0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q95.native_datafusion/simplified.txt @@ -0,0 +1,57 @@ +WholeStageCodegen (2) + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,web_site_sk] + CometProject [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ca_address_sk] + CometProject [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometBroadcastHashJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,d_date_sk] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,wr_order_number] + CometSortMergeJoin [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_order_number] + CometSort [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometExchange [ws_order_number] #2 + CometProject [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + CometFilter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + CometProject [ws_order_number] + CometSortMergeJoin [ws_warehouse_sk,ws_order_number,ws_warehouse_sk,ws_order_number] + CometSort [ws_warehouse_sk,ws_order_number] + CometExchange [ws_order_number] #3 + CometProject [ws_warehouse_sk,ws_order_number] + CometFilter [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + CometSort [ws_warehouse_sk,ws_order_number] + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + CometProject [wr_order_number] + CometSortMergeJoin [wr_order_number,ws_order_number] + CometSort [wr_order_number] + CometExchange [wr_order_number] #4 + CometProject [wr_order_number] + CometFilter [wr_order_number,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_order_number,wr_returned_date_sk] + CometProject [ws_order_number] + CometSortMergeJoin [ws_warehouse_sk,ws_order_number,ws_warehouse_sk,ws_order_number] + CometSort [ws_warehouse_sk,ws_order_number] + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + CometSort [ws_warehouse_sk,ws_order_number] + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [ca_address_sk] #6 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [web_site_sk] #7 + CometProject [web_site_sk] + CometFilter [web_site_sk,web_company_name] + CometNativeScan: `spark_catalog`.`default`.`web_site` [web_site_sk,web_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q96.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q96.native_datafusion/explain.txt new file mode 100644 index 0000000000..16424de5f3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q96.native_datafusion/explain.txt @@ -0,0 +1,131 @@ +== Physical Plan == +* ColumnarToRow (25) ++- CometHashAggregate (24) + +- CometExchange (23) + +- CometHashAggregate (22) + +- CometProject (21) + +- CometBroadcastHashJoin (20) + :- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (7) + : : +- CometProject (6) + : : +- CometFilter (5) + : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (4) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`time_dim` (10) + +- CometBroadcastExchange (19) + +- CometProject (18) + +- CometFilter (17) + +- CometNativeScan: `spark_catalog`.`default`.`store` (16) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Arguments: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(3) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Arguments: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3], [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(4) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#5, hd_dep_count#6] +Arguments: [hd_demo_sk#5, hd_dep_count#6] + +(5) CometFilter +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Condition : ((isnotnull(hd_dep_count#6) AND (hd_dep_count#6 = 7)) AND isnotnull(hd_demo_sk#5)) + +(6) CometProject +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Arguments: [hd_demo_sk#5], [hd_demo_sk#5] + +(7) CometBroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: [hd_demo_sk#5] + +(8) CometBroadcastHashJoin +Left output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Right output [1]: [hd_demo_sk#5] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#5], Inner, BuildRight + +(9) CometProject +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] +Arguments: [ss_sold_time_sk#1, ss_store_sk#3], [ss_sold_time_sk#1, ss_store_sk#3] + +(10) CometNativeScan: `spark_catalog`.`default`.`time_dim` +Output [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Arguments: [t_time_sk#7, t_hour#8, t_minute#9] + +(11) CometFilter +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Condition : ((((isnotnull(t_hour#8) AND isnotnull(t_minute#9)) AND (t_hour#8 = 20)) AND (t_minute#9 >= 30)) AND isnotnull(t_time_sk#7)) + +(12) CometProject +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Arguments: [t_time_sk#7], [t_time_sk#7] + +(13) CometBroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: [t_time_sk#7] + +(14) CometBroadcastHashJoin +Left output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Right output [1]: [t_time_sk#7] +Arguments: [ss_sold_time_sk#1], [t_time_sk#7], Inner, BuildRight + +(15) CometProject +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#7] +Arguments: [ss_store_sk#3], [ss_store_sk#3] + +(16) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#10, s_store_name#11] +Arguments: [s_store_sk#10, s_store_name#11] + +(17) CometFilter +Input [2]: [s_store_sk#10, s_store_name#11] +Condition : ((isnotnull(s_store_name#11) AND (s_store_name#11 = ese)) AND isnotnull(s_store_sk#10)) + +(18) CometProject +Input [2]: [s_store_sk#10, s_store_name#11] +Arguments: [s_store_sk#10], [s_store_sk#10] + +(19) CometBroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: [s_store_sk#10] + +(20) CometBroadcastHashJoin +Left output [1]: [ss_store_sk#3] +Right output [1]: [s_store_sk#10] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(21) CometProject +Input [2]: [ss_store_sk#3, s_store_sk#10] + +(22) CometHashAggregate +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] + +(23) CometExchange +Input [1]: [count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(24) CometHashAggregate +Input [1]: [count#12] +Keys: [] +Functions [1]: [count(1)] + +(25) ColumnarToRow [codegen id : 1] +Input [1]: [count(1)#13] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q96.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q96.native_datafusion/simplified.txt new file mode 100644 index 0000000000..1996d860c8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q96.native_datafusion/simplified.txt @@ -0,0 +1,27 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [count(1),count,count(1)] + CometExchange #1 + CometHashAggregate [count] + CometProject + CometBroadcastHashJoin [ss_store_sk,s_store_sk] + CometProject [ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_store_sk,t_time_sk] + CometProject [ss_sold_time_sk,ss_store_sk] + CometBroadcastHashJoin [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,hd_demo_sk] + CometProject [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + CometFilter [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + CometBroadcastExchange [hd_demo_sk] #2 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_dep_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_dep_count] + CometBroadcastExchange [t_time_sk] #3 + CometProject [t_time_sk] + CometFilter [t_time_sk,t_hour,t_minute] + CometNativeScan: `spark_catalog`.`default`.`time_dim` [t_time_sk,t_hour,t_minute] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_store_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q97.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q97.native_datafusion/explain.txt new file mode 100644 index 0000000000..d0c03cb144 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q97.native_datafusion/explain.txt @@ -0,0 +1,139 @@ +== Physical Plan == +* HashAggregate (25) ++- Exchange (24) + +- * HashAggregate (23) + +- * ColumnarToRow (22) + +- CometProject (21) + +- CometSortMergeJoin (20) + :- CometSort (11) + : +- CometHashAggregate (10) + : +- CometExchange (9) + : +- CometHashAggregate (8) + : +- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometProject (4) + : +- CometFilter (3) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (2) + +- CometSort (19) + +- CometHashAggregate (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometProject (15) + +- CometBroadcastHashJoin (14) + :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (12) + +- ReusedExchange (13) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] + +(2) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4, d_month_seq#5] + +(3) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(4) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(5) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(7) CometProject +Input [4]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_item_sk#1, ss_customer_sk#2], [ss_item_sk#1, ss_customer_sk#2] + +(8) CometHashAggregate +Input [2]: [ss_item_sk#1, ss_customer_sk#2] +Keys [2]: [ss_customer_sk#2, ss_item_sk#1] +Functions: [] + +(9) CometExchange +Input [2]: [ss_customer_sk#2, ss_item_sk#1] +Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometHashAggregate +Input [2]: [ss_customer_sk#2, ss_item_sk#1] +Keys [2]: [ss_customer_sk#2, ss_item_sk#1] +Functions: [] + +(11) CometSort +Input [2]: [customer_sk#6, item_sk#7] +Arguments: [customer_sk#6, item_sk#7], [customer_sk#6 ASC NULLS FIRST, item_sk#7 ASC NULLS FIRST] + +(12) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Arguments: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] + +(13) ReusedExchange [Reuses operator id: 5] +Output [1]: [d_date_sk#11] + +(14) CometBroadcastHashJoin +Left output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Right output [1]: [d_date_sk#11] +Arguments: [cs_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(15) CometProject +Input [4]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10, d_date_sk#11] +Arguments: [cs_bill_customer_sk#8, cs_item_sk#9], [cs_bill_customer_sk#8, cs_item_sk#9] + +(16) CometHashAggregate +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Functions: [] + +(17) CometExchange +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Arguments: hashpartitioning(cs_bill_customer_sk#8, cs_item_sk#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometHashAggregate +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Functions: [] + +(19) CometSort +Input [2]: [customer_sk#12, item_sk#13] +Arguments: [customer_sk#12, item_sk#13], [customer_sk#12 ASC NULLS FIRST, item_sk#13 ASC NULLS FIRST] + +(20) CometSortMergeJoin +Left output [2]: [customer_sk#6, item_sk#7] +Right output [2]: [customer_sk#12, item_sk#13] +Arguments: [customer_sk#6, item_sk#7], [customer_sk#12, item_sk#13], FullOuter + +(21) CometProject +Input [4]: [customer_sk#6, item_sk#7, customer_sk#12, item_sk#13] +Arguments: [customer_sk#6, customer_sk#12], [customer_sk#6, customer_sk#12] + +(22) ColumnarToRow [codegen id : 1] +Input [2]: [customer_sk#6, customer_sk#12] + +(23) HashAggregate [codegen id : 1] +Input [2]: [customer_sk#6, customer_sk#12] +Keys: [] +Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum#14, sum#15, sum#16] +Results [3]: [sum#17, sum#18, sum#19] + +(24) Exchange +Input [3]: [sum#17, sum#18, sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] + +(25) HashAggregate [codegen id : 2] +Input [3]: [sum#17, sum#18, sum#19] +Keys: [] +Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END)#20, sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#21, sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#22] +Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END)#20 AS store_only#23, sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#21 AS catalog_only#24, sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#22 AS store_and_catalog#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q97.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q97.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2f53fb71f6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q97.native_datafusion/simplified.txt @@ -0,0 +1,29 @@ +WholeStageCodegen (2) + HashAggregate [sum,sum,sum] [sum(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),store_only,catalog_only,store_and_catalog,sum,sum,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [customer_sk,customer_sk] + CometSortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + CometSort [customer_sk,item_sk] + CometHashAggregate [customer_sk,item_sk,ss_customer_sk,ss_item_sk] + CometExchange [ss_customer_sk,ss_item_sk] #2 + CometHashAggregate [ss_customer_sk,ss_item_sk] + CometProject [ss_item_sk,ss_customer_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometSort [customer_sk,item_sk] + CometHashAggregate [customer_sk,item_sk,cs_bill_customer_sk,cs_item_sk] + CometExchange [cs_bill_customer_sk,cs_item_sk] #4 + CometHashAggregate [cs_bill_customer_sk,cs_item_sk] + CometProject [cs_bill_customer_sk,cs_item_sk] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q98.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q98.native_datafusion/explain.txt new file mode 100644 index 0000000000..3097726fc5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q98.native_datafusion/explain.txt @@ -0,0 +1,134 @@ +== Physical Plan == +* ColumnarToRow (25) ++- CometProject (24) + +- CometSort (23) + +- CometColumnarExchange (22) + +- * Project (21) + +- Window (20) + +- * Sort (19) + +- Exchange (18) + +- * HashAggregate (17) + +- Exchange (16) + +- * HashAggregate (15) + +- * ColumnarToRow (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Arguments: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ss_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [ss_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) ColumnarToRow [codegen id : 1] +Input [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(15) HashAggregate [codegen id : 1] +Input [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(16) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(17) HashAggregate [codegen id : 2] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#5] + +(18) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(19) Sort [codegen id : 3] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(20) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(21) Project [codegen id : 4] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5, _we0#17] + +(22) CometColumnarExchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=3] + +(23) CometSort +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5], [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST] + +(24) CometProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + +(25) ColumnarToRow [codegen id : 5] +Input [6]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q98.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q98.native_datafusion/simplified.txt new file mode 100644 index 0000000000..911dc29722 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q98.native_datafusion/simplified.txt @@ -0,0 +1,35 @@ +WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometProject [i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio] + CometSort [i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio,i_item_id] + CometColumnarExchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 + WholeStageCodegen (4) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (3) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (2) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 + WholeStageCodegen (1) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ss_ext_sales_price] [sum,sum] + ColumnarToRow + InputAdapter + CometProject [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #4 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q99.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q99.native_datafusion/explain.txt new file mode 100644 index 0000000000..cc6adee3c7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q99.native_datafusion/explain.txt @@ -0,0 +1,130 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * ColumnarToRow (20) + +- CometProject (19) + +- CometBroadcastHashJoin (18) + :- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (10) + : : +- CometBroadcastHashJoin (9) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (3) + : : +- ReusedExchange (8) + : +- ReusedExchange (11) + +- CometBroadcastExchange (17) + +- CometProject (16) + +- CometFilter (15) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (14) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Arguments: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] + +(2) CometFilter +Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Condition : (((isnotnull(cs_warehouse_sk#4) AND isnotnull(cs_ship_mode_sk#3)) AND isnotnull(cs_call_center_sk#2)) AND isnotnull(cs_ship_date_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(4) CometFilter +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(5) CometBroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Right output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [cs_warehouse_sk#4], [w_warehouse_sk#6], Inner, BuildRight + +(7) CometProject +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Arguments: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7], [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7] + +(8) ReusedExchange [Reuses operator id: 5] +Output [2]: [sm_ship_mode_sk#8, sm_type#9] + +(9) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7] +Right output [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: [cs_ship_mode_sk#3], [sm_ship_mode_sk#8], Inner, BuildRight + +(10) CometProject +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] +Arguments: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9], [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9] + +(11) ReusedExchange [Reuses operator id: 5] +Output [2]: [cc_call_center_sk#10, cc_name#11] + +(12) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Right output [2]: [cc_call_center_sk#10, cc_name#11] +Arguments: [cs_call_center_sk#2], [cc_call_center_sk#10], Inner, BuildRight + +(13) CometProject +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_call_center_sk#10, cc_name#11] +Arguments: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11], [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11] + +(14) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#12, d_month_seq#13] +Arguments: [d_date_sk#12, d_month_seq#13] + +(15) CometFilter +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) + +(16) CometProject +Input [2]: [d_date_sk#12, d_month_seq#13] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(17) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(18) CometBroadcastHashJoin +Left output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11] +Right output [1]: [d_date_sk#12] +Arguments: [cs_ship_date_sk#1], [d_date_sk#12], Inner, BuildRight + +(19) CometProject +Input [6]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11, d_date_sk#12] +Arguments: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14], [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] + +(20) ColumnarToRow [codegen id : 1] +Input [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14] + +(21) HashAggregate [codegen id : 1] +Input [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] +Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] + +(22) Exchange +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, cc_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(23) HashAggregate [codegen id : 2] +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] +Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29] +Results [8]: [_groupingexpression#14 AS substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25 AS 30 days #31, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26 AS 31 - 60 days #32, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27 AS 61 - 90 days #33, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28 AS 91 - 120 days #34, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29 AS >120 days #35] + +(24) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#30 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, cc_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q99.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q99.native_datafusion/simplified.txt new file mode 100644 index 0000000000..626d31bc0b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q99.native_datafusion/simplified.txt @@ -0,0 +1,28 @@ +TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (2) + HashAggregate [_groupingexpression,sm_type,cc_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + InputAdapter + Exchange [_groupingexpression,sm_type,cc_name] #1 + WholeStageCodegen (1) + HashAggregate [_groupingexpression,sm_type,cc_name,cs_ship_date_sk,cs_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometProject [w_warehouse_name] [cs_ship_date_sk,cs_sold_date_sk,sm_type,cc_name,_groupingexpression] + CometBroadcastHashJoin [cs_ship_date_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_name,d_date_sk] + CometProject [cs_ship_date_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_call_center_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_call_center_sk,cc_name] + CometProject [cs_ship_date_sk,cs_call_center_sk,cs_sold_date_sk,w_warehouse_name,sm_type] + CometBroadcastHashJoin [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name,sm_ship_mode_sk,sm_type] + CometProject [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk,w_warehouse_sk,w_warehouse_name] + CometFilter [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #2 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + ReusedExchange [sm_ship_mode_sk,sm_type] #2 + ReusedExchange [cc_call_center_sk,cc_name] #2 + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_datafusion/explain.txt new file mode 100644 index 0000000000..2836e4c129 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_datafusion/explain.txt @@ -0,0 +1,262 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Filter (14) + : : : +- * HashAggregate (13) + : : : +- Exchange (12) + : : : +- * HashAggregate (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_returns (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * HashAggregate (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- * HashAggregate (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet spark_catalog.default.store_returns (15) + : : +- ReusedExchange (18) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet spark_catalog.default.store (31) + +- BroadcastExchange (41) + +- * Filter (40) + +- * ColumnarToRow (39) + +- Scan parquet spark_catalog.default.customer (38) + + +(1) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#4)] +PushedFilters: [IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] + +(3) Filter [codegen id : 2] +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Condition : (isnotnull(sr_store_sk#2) AND isnotnull(sr_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [sr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Input [5]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4, d_date_sk#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#3))] +Aggregate Attributes [1]: [sum#7] +Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] + +(12) Exchange +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(13) HashAggregate [codegen id : 9] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#9] +Results [3]: [sr_customer_sk#1 AS ctr_customer_sk#10, sr_store_sk#2 AS ctr_store_sk#11, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#9,17,2) AS ctr_total_return#12] + +(14) Filter [codegen id : 9] +Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12] +Condition : isnotnull(ctr_total_return#12) + +(15) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#4)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] + +(17) Filter [codegen id : 4] +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Condition : isnotnull(sr_store_sk#2) + +(18) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(19) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [sr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 4] +Output [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Input [5]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4, d_date_sk#5] + +(21) HashAggregate [codegen id : 4] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#3))] +Aggregate Attributes [1]: [sum#13] +Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] + +(22) Exchange +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(23) HashAggregate [codegen id : 5] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#9] +Results [2]: [sr_store_sk#2 AS ctr_store_sk#11, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#9,17,2) AS ctr_total_return#12] + +(24) HashAggregate [codegen id : 5] +Input [2]: [ctr_store_sk#11, ctr_total_return#12] +Keys [1]: [ctr_store_sk#11] +Functions [1]: [partial_avg(ctr_total_return#12)] +Aggregate Attributes [2]: [sum#15, count#16] +Results [3]: [ctr_store_sk#11, sum#17, count#18] + +(25) Exchange +Input [3]: [ctr_store_sk#11, sum#17, count#18] +Arguments: hashpartitioning(ctr_store_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 6] +Input [3]: [ctr_store_sk#11, sum#17, count#18] +Keys [1]: [ctr_store_sk#11] +Functions [1]: [avg(ctr_total_return#12)] +Aggregate Attributes [1]: [avg(ctr_total_return#12)#19] +Results [2]: [(avg(ctr_total_return#12)#19 * 1.2) AS (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11 AS ctr_store_sk#11#21] + +(27) Filter [codegen id : 6] +Input [2]: [(avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#20) + +(28) BroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=5] + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ctr_store_sk#11] +Right keys [1]: [ctr_store_sk#11#21] +Join type: Inner +Join condition: (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#20) + +(30) Project [codegen id : 9] +Output [2]: [ctr_customer_sk#10, ctr_store_sk#11] +Input [5]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] + +(31) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#22, s_state#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#22, s_state#23] + +(33) Filter [codegen id : 7] +Input [2]: [s_store_sk#22, s_state#23] +Condition : ((isnotnull(s_state#23) AND (s_state#23 = TN)) AND isnotnull(s_store_sk#22)) + +(34) Project [codegen id : 7] +Output [1]: [s_store_sk#22] +Input [2]: [s_store_sk#22, s_state#23] + +(35) BroadcastExchange +Input [1]: [s_store_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ctr_store_sk#11] +Right keys [1]: [s_store_sk#22] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 9] +Output [1]: [ctr_customer_sk#10] +Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, s_store_sk#22] + +(38) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#24, c_customer_id#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 8] +Input [2]: [c_customer_sk#24, c_customer_id#25] + +(40) Filter [codegen id : 8] +Input [2]: [c_customer_sk#24, c_customer_id#25] +Condition : isnotnull(c_customer_sk#24) + +(41) BroadcastExchange +Input [2]: [c_customer_sk#24, c_customer_id#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ctr_customer_sk#10] +Right keys [1]: [c_customer_sk#24] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 9] +Output [1]: [c_customer_id#25] +Input [3]: [ctr_customer_sk#10, c_customer_sk#24, c_customer_id#25] + +(44) TakeOrderedAndProject +Input [1]: [c_customer_id#25] +Arguments: 100, [c_customer_id#25 ASC NULLS FIRST], [c_customer_id#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_datafusion/simplified.txt new file mode 100644 index 0000000000..0e1c105b70 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_datafusion/simplified.txt @@ -0,0 +1,65 @@ +TakeOrderedAndProject [c_customer_id] + WholeStageCodegen (9) + Project [c_customer_id] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk] + BroadcastHashJoin [ctr_store_sk,s_store_sk] + Project [ctr_customer_sk,ctr_store_sk] + BroadcastHashJoin [ctr_store_sk,ctr_store_sk,ctr_total_return,(avg(ctr_total_return) * 1.2)] + Filter [ctr_total_return] + HashAggregate [sr_customer_sk,sr_store_sk,sum] [sum(UnscaledValue(sr_return_amt)),ctr_customer_sk,ctr_store_sk,ctr_total_return,sum] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #1 + WholeStageCodegen (2) + HashAggregate [sr_customer_sk,sr_store_sk,sr_return_amt] [sum,sum] + Project [sr_customer_sk,sr_store_sk,sr_return_amt] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_store_sk,sr_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (6) + Filter [(avg(ctr_total_return) * 1.2)] + HashAggregate [ctr_store_sk,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),ctr_store_sk,sum,count] + InputAdapter + Exchange [ctr_store_sk] #4 + WholeStageCodegen (5) + HashAggregate [ctr_store_sk,ctr_total_return] [sum,count,sum,count] + HashAggregate [sr_customer_sk,sr_store_sk,sum] [sum(UnscaledValue(sr_return_amt)),ctr_store_sk,ctr_total_return,sum] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #5 + WholeStageCodegen (4) + HashAggregate [sr_customer_sk,sr_store_sk,sr_return_amt] [sum,sum] + Project [sr_customer_sk,sr_store_sk,sr_return_amt] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..2836e4c129 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_iceberg_compat/explain.txt @@ -0,0 +1,262 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Filter (14) + : : : +- * HashAggregate (13) + : : : +- Exchange (12) + : : : +- * HashAggregate (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_returns (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * HashAggregate (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- * HashAggregate (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet spark_catalog.default.store_returns (15) + : : +- ReusedExchange (18) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet spark_catalog.default.store (31) + +- BroadcastExchange (41) + +- * Filter (40) + +- * ColumnarToRow (39) + +- Scan parquet spark_catalog.default.customer (38) + + +(1) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#4)] +PushedFilters: [IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] + +(3) Filter [codegen id : 2] +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Condition : (isnotnull(sr_store_sk#2) AND isnotnull(sr_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [sr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Input [5]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4, d_date_sk#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#3))] +Aggregate Attributes [1]: [sum#7] +Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] + +(12) Exchange +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(13) HashAggregate [codegen id : 9] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#9] +Results [3]: [sr_customer_sk#1 AS ctr_customer_sk#10, sr_store_sk#2 AS ctr_store_sk#11, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#9,17,2) AS ctr_total_return#12] + +(14) Filter [codegen id : 9] +Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12] +Condition : isnotnull(ctr_total_return#12) + +(15) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#4)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] + +(17) Filter [codegen id : 4] +Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] +Condition : isnotnull(sr_store_sk#2) + +(18) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(19) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [sr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 4] +Output [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Input [5]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4, d_date_sk#5] + +(21) HashAggregate [codegen id : 4] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#3))] +Aggregate Attributes [1]: [sum#13] +Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] + +(22) Exchange +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(23) HashAggregate [codegen id : 5] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] +Keys [2]: [sr_customer_sk#1, sr_store_sk#2] +Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#9] +Results [2]: [sr_store_sk#2 AS ctr_store_sk#11, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#9,17,2) AS ctr_total_return#12] + +(24) HashAggregate [codegen id : 5] +Input [2]: [ctr_store_sk#11, ctr_total_return#12] +Keys [1]: [ctr_store_sk#11] +Functions [1]: [partial_avg(ctr_total_return#12)] +Aggregate Attributes [2]: [sum#15, count#16] +Results [3]: [ctr_store_sk#11, sum#17, count#18] + +(25) Exchange +Input [3]: [ctr_store_sk#11, sum#17, count#18] +Arguments: hashpartitioning(ctr_store_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 6] +Input [3]: [ctr_store_sk#11, sum#17, count#18] +Keys [1]: [ctr_store_sk#11] +Functions [1]: [avg(ctr_total_return#12)] +Aggregate Attributes [1]: [avg(ctr_total_return#12)#19] +Results [2]: [(avg(ctr_total_return#12)#19 * 1.2) AS (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11 AS ctr_store_sk#11#21] + +(27) Filter [codegen id : 6] +Input [2]: [(avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#20) + +(28) BroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=5] + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ctr_store_sk#11] +Right keys [1]: [ctr_store_sk#11#21] +Join type: Inner +Join condition: (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#20) + +(30) Project [codegen id : 9] +Output [2]: [ctr_customer_sk#10, ctr_store_sk#11] +Input [5]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] + +(31) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#22, s_state#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#22, s_state#23] + +(33) Filter [codegen id : 7] +Input [2]: [s_store_sk#22, s_state#23] +Condition : ((isnotnull(s_state#23) AND (s_state#23 = TN)) AND isnotnull(s_store_sk#22)) + +(34) Project [codegen id : 7] +Output [1]: [s_store_sk#22] +Input [2]: [s_store_sk#22, s_state#23] + +(35) BroadcastExchange +Input [1]: [s_store_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ctr_store_sk#11] +Right keys [1]: [s_store_sk#22] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 9] +Output [1]: [ctr_customer_sk#10] +Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, s_store_sk#22] + +(38) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#24, c_customer_id#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 8] +Input [2]: [c_customer_sk#24, c_customer_id#25] + +(40) Filter [codegen id : 8] +Input [2]: [c_customer_sk#24, c_customer_id#25] +Condition : isnotnull(c_customer_sk#24) + +(41) BroadcastExchange +Input [2]: [c_customer_sk#24, c_customer_id#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ctr_customer_sk#10] +Right keys [1]: [c_customer_sk#24] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 9] +Output [1]: [c_customer_id#25] +Input [3]: [ctr_customer_sk#10, c_customer_sk#24, c_customer_id#25] + +(44) TakeOrderedAndProject +Input [1]: [c_customer_id#25] +Arguments: 100, [c_customer_id#25 ASC NULLS FIRST], [c_customer_id#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..0e1c105b70 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.native_iceberg_compat/simplified.txt @@ -0,0 +1,65 @@ +TakeOrderedAndProject [c_customer_id] + WholeStageCodegen (9) + Project [c_customer_id] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk] + BroadcastHashJoin [ctr_store_sk,s_store_sk] + Project [ctr_customer_sk,ctr_store_sk] + BroadcastHashJoin [ctr_store_sk,ctr_store_sk,ctr_total_return,(avg(ctr_total_return) * 1.2)] + Filter [ctr_total_return] + HashAggregate [sr_customer_sk,sr_store_sk,sum] [sum(UnscaledValue(sr_return_amt)),ctr_customer_sk,ctr_store_sk,ctr_total_return,sum] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #1 + WholeStageCodegen (2) + HashAggregate [sr_customer_sk,sr_store_sk,sr_return_amt] [sum,sum] + Project [sr_customer_sk,sr_store_sk,sr_return_amt] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_store_sk,sr_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (6) + Filter [(avg(ctr_total_return) * 1.2)] + HashAggregate [ctr_store_sk,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),ctr_store_sk,sum,count] + InputAdapter + Exchange [ctr_store_sk] #4 + WholeStageCodegen (5) + HashAggregate [ctr_store_sk,ctr_total_return] [sum,count,sum,count] + HashAggregate [sr_customer_sk,sr_store_sk,sum] [sum(UnscaledValue(sr_return_amt)),ctr_store_sk,ctr_total_return,sum] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #5 + WholeStageCodegen (4) + HashAggregate [sr_customer_sk,sr_store_sk,sr_return_amt] [sum,sum] + Project [sr_customer_sk,sr_store_sk,sr_return_amt] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_datafusion/explain.txt new file mode 100644 index 0000000000..10320c7c3c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_datafusion/explain.txt @@ -0,0 +1,272 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (30) + : : +- * Filter (29) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (28) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (21) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : +- BroadcastExchange (10) + : : : : +- * Project (9) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : : +- BroadcastExchange (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * ColumnarToRow (23) + : : : +- Scan parquet spark_catalog.default.catalog_sales (22) + : : +- ReusedExchange (24) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet spark_catalog.default.customer_address (31) + +- BroadcastExchange (41) + +- * Filter (40) + +- * ColumnarToRow (39) + +- Scan parquet spark_catalog.default.customer_demographics (38) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : (((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2002)) AND (d_moy#10 >= 1)) AND (d_moy#10 <= 4)) AND isnotnull(d_date_sk#8)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(10) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#6] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#12)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#13] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#11] +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] + +(20) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(22) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] + +(24) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#16] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#14] +Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] + +(27) BroadcastExchange +Input [1]: [cs_ship_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(29) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(30) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(31) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_county#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_county, [Dona Ana County,Jefferson County,La Porte County,Rush County,Toole County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_county#18] + +(33) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_county#18] +Condition : (ca_county#18 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#17)) + +(34) Project [codegen id : 7] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_county#18] + +(35) BroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#4] +Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#17] + +(38) Scan parquet spark_catalog.default.customer_demographics +Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 8] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(40) Filter [codegen id : 8] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#19) + +(41) BroadcastExchange +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 9] +Output [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(44) HashAggregate [codegen id : 9] +Input [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] + +(45) Exchange +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Arguments: hashpartitioning(cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(46) HashAggregate [codegen id : 10] +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#30] +Results [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, count(1)#30 AS cnt1#31, cd_purchase_estimate#23, count(1)#30 AS cnt2#32, cd_credit_rating#24, count(1)#30 AS cnt3#33, cd_dep_count#25, count(1)#30 AS cnt4#34, cd_dep_employed_count#26, count(1)#30 AS cnt5#35, cd_dep_college_count#27, count(1)#30 AS cnt6#36] + +(47) TakeOrderedAndProject +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] +Arguments: 100, [cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_education_status#22 ASC NULLS FIRST, cd_purchase_estimate#23 ASC NULLS FIRST, cd_credit_rating#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bd3d7f63ba --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_datafusion/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (10) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count] [count(1),cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_county,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..10320c7c3c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_iceberg_compat/explain.txt @@ -0,0 +1,272 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (30) + : : +- * Filter (29) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (28) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (21) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : +- BroadcastExchange (10) + : : : : +- * Project (9) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : : +- BroadcastExchange (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * ColumnarToRow (23) + : : : +- Scan parquet spark_catalog.default.catalog_sales (22) + : : +- ReusedExchange (24) + : +- BroadcastExchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet spark_catalog.default.customer_address (31) + +- BroadcastExchange (41) + +- * Filter (40) + +- * ColumnarToRow (39) + +- Scan parquet spark_catalog.default.customer_demographics (38) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : (((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2002)) AND (d_moy#10 >= 1)) AND (d_moy#10 <= 4)) AND isnotnull(d_date_sk#8)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(10) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#6] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#12)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#13] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#11] +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] + +(20) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(22) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] + +(24) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#16] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#14] +Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] + +(27) BroadcastExchange +Input [1]: [cs_ship_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(29) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(30) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(31) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_county#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_county, [Dona Ana County,Jefferson County,La Porte County,Rush County,Toole County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_county#18] + +(33) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_county#18] +Condition : (ca_county#18 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#17)) + +(34) Project [codegen id : 7] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_county#18] + +(35) BroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#4] +Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#17] + +(38) Scan parquet spark_catalog.default.customer_demographics +Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 8] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(40) Filter [codegen id : 8] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#19) + +(41) BroadcastExchange +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 9] +Output [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(44) HashAggregate [codegen id : 9] +Input [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] + +(45) Exchange +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Arguments: hashpartitioning(cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(46) HashAggregate [codegen id : 10] +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#30] +Results [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, count(1)#30 AS cnt1#31, cd_purchase_estimate#23, count(1)#30 AS cnt2#32, cd_credit_rating#24, count(1)#30 AS cnt3#33, cd_dep_count#25, count(1)#30 AS cnt4#34, cd_dep_employed_count#26, count(1)#30 AS cnt5#35, cd_dep_college_count#27, count(1)#30 AS cnt6#36] + +(47) TakeOrderedAndProject +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] +Arguments: 100, [cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_education_status#22 ASC NULLS FIRST, cd_purchase_estimate#23 ASC NULLS FIRST, cd_credit_rating#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bd3d7f63ba --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.native_iceberg_compat/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (10) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count] [count(1),cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_county,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_datafusion/explain.txt new file mode 100644 index 0000000000..994ad18545 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_datafusion/explain.txt @@ -0,0 +1,423 @@ +== Physical Plan == +TakeOrderedAndProject (72) ++- * Project (71) + +- * BroadcastHashJoin Inner BuildRight (70) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet spark_catalog.default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet spark_catalog.default.date_dim (26) + : +- BroadcastExchange (54) + : +- * Filter (53) + : +- * HashAggregate (52) + : +- Exchange (51) + : +- * HashAggregate (50) + : +- * Project (49) + : +- * BroadcastHashJoin Inner BuildRight (48) + : :- * Project (46) + : : +- * BroadcastHashJoin Inner BuildRight (45) + : : :- * Filter (40) + : : : +- * ColumnarToRow (39) + : : : +- Scan parquet spark_catalog.default.customer (38) + : : +- BroadcastExchange (44) + : : +- * Filter (43) + : : +- * ColumnarToRow (42) + : : +- Scan parquet spark_catalog.default.web_sales (41) + : +- ReusedExchange (47) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Filter (59) + : : +- * ColumnarToRow (58) + : : +- Scan parquet spark_catalog.default.customer (57) + : +- ReusedExchange (60) + +- ReusedExchange (63) + + +(1) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(6) Filter [codegen id : 1] +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_customer_sk#9) + +(7) BroadcastExchange +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14] + +(16) HashAggregate [codegen id : 3] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] +Aggregate Attributes [1]: [sum#15] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] + +(17) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 16] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#17] +Results [2]: [c_customer_id#2 AS customer_id#18, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#17,18,2) AS year_total#19] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#18, year_total#19] +Condition : (isnotnull(year_total#19) AND (year_total#19 > 0.00)) + +(20) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Condition : (isnotnull(c_customer_sk#20) AND isnotnull(c_customer_id#21)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [4]: [ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#20] +Right keys [1]: [ss_customer_sk#28] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] +Input [12]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#32, d_year#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#32, d_year#33] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#32, d_year#33] +Condition : ((isnotnull(d_year#33) AND (d_year#33 = 2002)) AND isnotnull(d_date_sk#32)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#32, d_year#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#31] +Right keys [1]: [d_date_sk#32] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] +Input [12]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31, d_date_sk#32, d_year#33] + +(32) HashAggregate [codegen id : 6] +Input [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] +Keys [8]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))] +Aggregate Attributes [1]: [sum#34] +Results [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] + +(33) Exchange +Input [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] +Arguments: hashpartitioning(c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(34) HashAggregate [codegen id : 7] +Input [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] +Keys [8]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))#17] +Results [3]: [c_customer_id#21 AS customer_id#36, c_preferred_cust_flag#24 AS customer_preferred_cust_flag#37, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))#17,18,2) AS year_total#38] + +(35) BroadcastExchange +Input [3]: [customer_id#36, customer_preferred_cust_flag#37, year_total#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#36] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 16] +Output [4]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38] +Input [5]: [customer_id#18, year_total#19, customer_id#36, customer_preferred_cust_flag#37, year_total#38] + +(38) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] + +(40) Filter [codegen id : 10] +Input [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] +Condition : (isnotnull(c_customer_sk#39) AND isnotnull(c_customer_id#40)) + +(41) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#50)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 8] +Input [4]: [ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] + +(43) Filter [codegen id : 8] +Input [4]: [ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Condition : isnotnull(ws_bill_customer_sk#47) + +(44) BroadcastExchange +Input [4]: [ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(45) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#39] +Right keys [1]: [ws_bill_customer_sk#47] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 10] +Output [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Input [12]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] + +(47) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#51, d_year#52] + +(48) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#50] +Right keys [1]: [d_date_sk#51] +Join type: Inner +Join condition: None + +(49) Project [codegen id : 10] +Output [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, d_year#52] +Input [12]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50, d_date_sk#51, d_year#52] + +(50) HashAggregate [codegen id : 10] +Input [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, d_year#52] +Keys [8]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))] +Aggregate Attributes [1]: [sum#53] +Results [9]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, sum#54] + +(51) Exchange +Input [9]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, sum#54] +Arguments: hashpartitioning(c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(52) HashAggregate [codegen id : 11] +Input [9]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, sum#54] +Keys [8]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))#55] +Results [2]: [c_customer_id#40 AS customer_id#56, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))#55,18,2) AS year_total#57] + +(53) Filter [codegen id : 11] +Input [2]: [customer_id#56, year_total#57] +Condition : (isnotnull(year_total#57) AND (year_total#57 > 0.00)) + +(54) BroadcastExchange +Input [2]: [customer_id#56, year_total#57] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(55) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#56] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 16] +Output [5]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38, year_total#57] +Input [6]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38, customer_id#56, year_total#57] + +(57) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65] + +(59) Filter [codegen id : 14] +Input [8]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65] +Condition : (isnotnull(c_customer_sk#58) AND isnotnull(c_customer_id#59)) + +(60) ReusedExchange [Reuses operator id: 44] +Output [4]: [ws_bill_customer_sk#66, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#58] +Right keys [1]: [ws_bill_customer_sk#66] +Join type: Inner +Join condition: None + +(62) Project [codegen id : 14] +Output [10]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] +Input [12]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_bill_customer_sk#66, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] + +(63) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#70, d_year#71] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#69] +Right keys [1]: [d_date_sk#70] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 14] +Output [10]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, d_year#71] +Input [12]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69, d_date_sk#70, d_year#71] + +(66) HashAggregate [codegen id : 14] +Input [10]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, d_year#71] +Keys [8]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))] +Aggregate Attributes [1]: [sum#72] +Results [9]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, sum#73] + +(67) Exchange +Input [9]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, sum#73] +Arguments: hashpartitioning(c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(68) HashAggregate [codegen id : 15] +Input [9]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, sum#73] +Keys [8]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))#55] +Results [2]: [c_customer_id#59 AS customer_id#74, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))#55,18,2) AS year_total#75] + +(69) BroadcastExchange +Input [2]: [customer_id#74, year_total#75] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(70) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#74] +Join type: Inner +Join condition: (CASE WHEN (year_total#57 > 0.00) THEN (year_total#75 / year_total#57) END > CASE WHEN (year_total#19 > 0.00) THEN (year_total#38 / year_total#19) END) + +(71) Project [codegen id : 16] +Output [1]: [customer_preferred_cust_flag#37] +Input [7]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38, year_total#57, customer_id#74, year_total#75] + +(72) TakeOrderedAndProject +Input [1]: [customer_preferred_cust_flag#37] +Arguments: 100, [customer_preferred_cust_flag#37 ASC NULLS FIRST], [customer_preferred_cust_flag#37] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_datafusion/simplified.txt new file mode 100644 index 0000000000..944e89eee5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_datafusion/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [customer_preferred_cust_flag] + WholeStageCodegen (16) + Project [customer_preferred_cust_flag] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_preferred_cust_flag,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + Project [customer_id,year_total,customer_preferred_cust_flag,year_total] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,customer_preferred_cust_flag,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..994ad18545 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_iceberg_compat/explain.txt @@ -0,0 +1,423 @@ +== Physical Plan == +TakeOrderedAndProject (72) ++- * Project (71) + +- * BroadcastHashJoin Inner BuildRight (70) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet spark_catalog.default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet spark_catalog.default.date_dim (26) + : +- BroadcastExchange (54) + : +- * Filter (53) + : +- * HashAggregate (52) + : +- Exchange (51) + : +- * HashAggregate (50) + : +- * Project (49) + : +- * BroadcastHashJoin Inner BuildRight (48) + : :- * Project (46) + : : +- * BroadcastHashJoin Inner BuildRight (45) + : : :- * Filter (40) + : : : +- * ColumnarToRow (39) + : : : +- Scan parquet spark_catalog.default.customer (38) + : : +- BroadcastExchange (44) + : : +- * Filter (43) + : : +- * ColumnarToRow (42) + : : +- Scan parquet spark_catalog.default.web_sales (41) + : +- ReusedExchange (47) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Filter (59) + : : +- * ColumnarToRow (58) + : : +- Scan parquet spark_catalog.default.customer (57) + : +- ReusedExchange (60) + +- ReusedExchange (63) + + +(1) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(6) Filter [codegen id : 1] +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_customer_sk#9) + +(7) BroadcastExchange +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14] + +(16) HashAggregate [codegen id : 3] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] +Aggregate Attributes [1]: [sum#15] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] + +(17) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 16] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#17] +Results [2]: [c_customer_id#2 AS customer_id#18, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#17,18,2) AS year_total#19] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#18, year_total#19] +Condition : (isnotnull(year_total#19) AND (year_total#19 > 0.00)) + +(20) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Condition : (isnotnull(c_customer_sk#20) AND isnotnull(c_customer_id#21)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [4]: [ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#20] +Right keys [1]: [ss_customer_sk#28] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] +Input [12]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#32, d_year#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#32, d_year#33] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#32, d_year#33] +Condition : ((isnotnull(d_year#33) AND (d_year#33 = 2002)) AND isnotnull(d_date_sk#32)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#32, d_year#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#31] +Right keys [1]: [d_date_sk#32] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] +Input [12]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31, d_date_sk#32, d_year#33] + +(32) HashAggregate [codegen id : 6] +Input [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] +Keys [8]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))] +Aggregate Attributes [1]: [sum#34] +Results [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] + +(33) Exchange +Input [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] +Arguments: hashpartitioning(c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(34) HashAggregate [codegen id : 7] +Input [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] +Keys [8]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))#17] +Results [3]: [c_customer_id#21 AS customer_id#36, c_preferred_cust_flag#24 AS customer_preferred_cust_flag#37, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))#17,18,2) AS year_total#38] + +(35) BroadcastExchange +Input [3]: [customer_id#36, customer_preferred_cust_flag#37, year_total#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#36] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 16] +Output [4]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38] +Input [5]: [customer_id#18, year_total#19, customer_id#36, customer_preferred_cust_flag#37, year_total#38] + +(38) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] + +(40) Filter [codegen id : 10] +Input [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] +Condition : (isnotnull(c_customer_sk#39) AND isnotnull(c_customer_id#40)) + +(41) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#50)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 8] +Input [4]: [ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] + +(43) Filter [codegen id : 8] +Input [4]: [ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Condition : isnotnull(ws_bill_customer_sk#47) + +(44) BroadcastExchange +Input [4]: [ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(45) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#39] +Right keys [1]: [ws_bill_customer_sk#47] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 10] +Output [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] +Input [12]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_bill_customer_sk#47, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50] + +(47) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#51, d_year#52] + +(48) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#50] +Right keys [1]: [d_date_sk#51] +Join type: Inner +Join condition: None + +(49) Project [codegen id : 10] +Output [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, d_year#52] +Input [12]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, ws_sold_date_sk#50, d_date_sk#51, d_year#52] + +(50) HashAggregate [codegen id : 10] +Input [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ws_ext_discount_amt#48, ws_ext_list_price#49, d_year#52] +Keys [8]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))] +Aggregate Attributes [1]: [sum#53] +Results [9]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, sum#54] + +(51) Exchange +Input [9]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, sum#54] +Arguments: hashpartitioning(c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(52) HashAggregate [codegen id : 11] +Input [9]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52, sum#54] +Keys [8]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#52] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))#55] +Results [2]: [c_customer_id#40 AS customer_id#56, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#49 - ws_ext_discount_amt#48)))#55,18,2) AS year_total#57] + +(53) Filter [codegen id : 11] +Input [2]: [customer_id#56, year_total#57] +Condition : (isnotnull(year_total#57) AND (year_total#57 > 0.00)) + +(54) BroadcastExchange +Input [2]: [customer_id#56, year_total#57] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(55) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#56] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 16] +Output [5]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38, year_total#57] +Input [6]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38, customer_id#56, year_total#57] + +(57) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65] + +(59) Filter [codegen id : 14] +Input [8]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65] +Condition : (isnotnull(c_customer_sk#58) AND isnotnull(c_customer_id#59)) + +(60) ReusedExchange [Reuses operator id: 44] +Output [4]: [ws_bill_customer_sk#66, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#58] +Right keys [1]: [ws_bill_customer_sk#66] +Join type: Inner +Join condition: None + +(62) Project [codegen id : 14] +Output [10]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] +Input [12]: [c_customer_sk#58, c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_bill_customer_sk#66, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69] + +(63) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#70, d_year#71] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#69] +Right keys [1]: [d_date_sk#70] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 14] +Output [10]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, d_year#71] +Input [12]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, ws_sold_date_sk#69, d_date_sk#70, d_year#71] + +(66) HashAggregate [codegen id : 14] +Input [10]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, ws_ext_discount_amt#67, ws_ext_list_price#68, d_year#71] +Keys [8]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))] +Aggregate Attributes [1]: [sum#72] +Results [9]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, sum#73] + +(67) Exchange +Input [9]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, sum#73] +Arguments: hashpartitioning(c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(68) HashAggregate [codegen id : 15] +Input [9]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71, sum#73] +Keys [8]: [c_customer_id#59, c_first_name#60, c_last_name#61, c_preferred_cust_flag#62, c_birth_country#63, c_login#64, c_email_address#65, d_year#71] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))#55] +Results [2]: [c_customer_id#59 AS customer_id#74, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#68 - ws_ext_discount_amt#67)))#55,18,2) AS year_total#75] + +(69) BroadcastExchange +Input [2]: [customer_id#74, year_total#75] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(70) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#74] +Join type: Inner +Join condition: (CASE WHEN (year_total#57 > 0.00) THEN (year_total#75 / year_total#57) END > CASE WHEN (year_total#19 > 0.00) THEN (year_total#38 / year_total#19) END) + +(71) Project [codegen id : 16] +Output [1]: [customer_preferred_cust_flag#37] +Input [7]: [customer_id#18, year_total#19, customer_preferred_cust_flag#37, year_total#38, year_total#57, customer_id#74, year_total#75] + +(72) TakeOrderedAndProject +Input [1]: [customer_preferred_cust_flag#37] +Arguments: 100, [customer_preferred_cust_flag#37 ASC NULLS FIRST], [customer_preferred_cust_flag#37] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..944e89eee5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.native_iceberg_compat/simplified.txt @@ -0,0 +1,107 @@ +TakeOrderedAndProject [customer_preferred_cust_flag] + WholeStageCodegen (16) + Project [customer_preferred_cust_flag] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_preferred_cust_flag,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + Project [customer_id,year_total,customer_preferred_cust_flag,year_total] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,customer_preferred_cust_flag,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_datafusion/explain.txt new file mode 100644 index 0000000000..d70125ed20 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_datafusion/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.web_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14] +Results [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#5] + +(20) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5, _we0#17] + +(24) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_datafusion/simplified.txt new file mode 100644 index 0000000000..3bbeaadc8f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_datafusion/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (6) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d70125ed20 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_iceberg_compat/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.web_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14] +Results [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#5] + +(20) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5, _we0#17] + +(24) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..3bbeaadc8f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.native_iceberg_compat/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (6) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_datafusion/explain.txt new file mode 100644 index 0000000000..fcf177544f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_datafusion/explain.txt @@ -0,0 +1,222 @@ +== Physical Plan == +* HashAggregate (38) ++- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store (4) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.customer_address (10) + : : +- BroadcastExchange (21) + : : +- * Project (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet spark_catalog.default.date_dim (17) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet spark_catalog.default.customer_demographics (24) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet spark_catalog.default.household_demographics (30) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] + +(3) Filter [codegen id : 6] +Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_hdemo_sk#2)) AND ((((ss_net_profit#9 >= 100.00) AND (ss_net_profit#9 <= 200.00)) OR ((ss_net_profit#9 >= 150.00) AND (ss_net_profit#9 <= 300.00))) OR ((ss_net_profit#9 >= 50.00) AND (ss_net_profit#9 <= 250.00)))) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00)))) + +(4) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [s_store_sk#11] + +(6) Filter [codegen id : 1] +Input [1]: [s_store_sk#11] +Condition : isnotnull(s_store_sk#11) + +(7) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 6] +Output [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, s_store_sk#11] + +(10) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [OH,TX]),In(ca_state, [KY,NM,OR])),In(ca_state, [MS,TX,VA]))] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Condition : (((isnotnull(ca_country#14) AND (ca_country#14 = United States)) AND isnotnull(ca_address_sk#12)) AND ((ca_state#13 IN (TX,OH) OR ca_state#13 IN (OR,NM,KY)) OR ca_state#13 IN (VA,TX,MS))) + +(13) Project [codegen id : 2] +Output [2]: [ca_address_sk#12, ca_state#13] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] + +(14) BroadcastExchange +Input [2]: [ca_address_sk#12, ca_state#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#12] +Join type: Inner +Join condition: ((((ca_state#13 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#13 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#13 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00))) + +(16) Project [codegen id : 6] +Output [7]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10] +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, ca_address_sk#12, ca_state#13] + +(17) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#15, d_year#16] + +(19) Filter [codegen id : 3] +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(20) Project [codegen id : 3] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_year#16] + +(21) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 6] +Output [6]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Input [8]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10, d_date_sk#15] + +(24) Scan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree )))] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] + +(26) Filter [codegen id : 4] +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Condition : (isnotnull(cd_demo_sk#17) AND ((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) OR ((cd_marital_status#18 = S) AND (cd_education_status#19 = College ))) OR ((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )))) + +(27) BroadcastExchange +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_cdemo_sk#1] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: ((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))) + +(29) Project [codegen id : 6] +Output [7]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19] +Input [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] + +(30) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#20, hd_dep_count#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1))] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#20, hd_dep_count#21] + +(32) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Condition : (isnotnull(hd_demo_sk#20) AND ((hd_dep_count#21 = 3) OR (hd_dep_count#21 = 1))) + +(33) BroadcastExchange +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#20] +Join type: Inner +Join condition: (((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#21 = 3)) OR (((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#21 = 1))) OR (((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#21 = 1))) + +(35) Project [codegen id : 6] +Output [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Input [9]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19, hd_demo_sk#20, hd_dep_count#21] + +(36) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Keys: [] +Functions [4]: [partial_avg(ss_quantity#5), partial_avg(UnscaledValue(ss_ext_sales_price#7)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#8)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#8))] +Aggregate Attributes [7]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28] +Results [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] + +(37) Exchange +Input [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(38) HashAggregate [codegen id : 7] +Input [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] +Keys: [] +Functions [4]: [avg(ss_quantity#5), avg(UnscaledValue(ss_ext_sales_price#7)), avg(UnscaledValue(ss_ext_wholesale_cost#8)), sum(UnscaledValue(ss_ext_wholesale_cost#8))] +Aggregate Attributes [4]: [avg(ss_quantity#5)#36, avg(UnscaledValue(ss_ext_sales_price#7))#37, avg(UnscaledValue(ss_ext_wholesale_cost#8))#38, sum(UnscaledValue(ss_ext_wholesale_cost#8))#39] +Results [4]: [avg(ss_quantity#5)#36 AS avg(ss_quantity)#40, cast((avg(UnscaledValue(ss_ext_sales_price#7))#37 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#41, cast((avg(UnscaledValue(ss_ext_wholesale_cost#8))#38 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#42, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#8))#39,17,2) AS sum(ss_ext_wholesale_cost)#43] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_datafusion/simplified.txt new file mode 100644 index 0000000000..7c63e82534 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_datafusion/simplified.txt @@ -0,0 +1,57 @@ +WholeStageCodegen (7) + HashAggregate [sum,count,sum,count,sum,count,sum] [avg(ss_quantity),avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),sum(UnscaledValue(ss_ext_wholesale_cost)),avg(ss_quantity),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),sum(ss_ext_wholesale_cost),sum,count,sum,count,sum,count,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] [sum,count,sum,count,sum,count,sum,sum,count,sum,count,sum,count,sum] + Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price,hd_dep_count] + Project [ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_marital_status,cd_education_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price] + Project [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] + Project [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_addr_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [hd_demo_sk,hd_dep_count] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..fcf177544f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_iceberg_compat/explain.txt @@ -0,0 +1,222 @@ +== Physical Plan == +* HashAggregate (38) ++- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store (4) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.customer_address (10) + : : +- BroadcastExchange (21) + : : +- * Project (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet spark_catalog.default.date_dim (17) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet spark_catalog.default.customer_demographics (24) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet spark_catalog.default.household_demographics (30) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] + +(3) Filter [codegen id : 6] +Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_hdemo_sk#2)) AND ((((ss_net_profit#9 >= 100.00) AND (ss_net_profit#9 <= 200.00)) OR ((ss_net_profit#9 >= 150.00) AND (ss_net_profit#9 <= 300.00))) OR ((ss_net_profit#9 >= 50.00) AND (ss_net_profit#9 <= 250.00)))) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00)))) + +(4) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [s_store_sk#11] + +(6) Filter [codegen id : 1] +Input [1]: [s_store_sk#11] +Condition : isnotnull(s_store_sk#11) + +(7) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 6] +Output [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, s_store_sk#11] + +(10) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [OH,TX]),In(ca_state, [KY,NM,OR])),In(ca_state, [MS,TX,VA]))] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Condition : (((isnotnull(ca_country#14) AND (ca_country#14 = United States)) AND isnotnull(ca_address_sk#12)) AND ((ca_state#13 IN (TX,OH) OR ca_state#13 IN (OR,NM,KY)) OR ca_state#13 IN (VA,TX,MS))) + +(13) Project [codegen id : 2] +Output [2]: [ca_address_sk#12, ca_state#13] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] + +(14) BroadcastExchange +Input [2]: [ca_address_sk#12, ca_state#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#12] +Join type: Inner +Join condition: ((((ca_state#13 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#13 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#13 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00))) + +(16) Project [codegen id : 6] +Output [7]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10] +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, ca_address_sk#12, ca_state#13] + +(17) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#15, d_year#16] + +(19) Filter [codegen id : 3] +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(20) Project [codegen id : 3] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_year#16] + +(21) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 6] +Output [6]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Input [8]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10, d_date_sk#15] + +(24) Scan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree )))] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] + +(26) Filter [codegen id : 4] +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Condition : (isnotnull(cd_demo_sk#17) AND ((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) OR ((cd_marital_status#18 = S) AND (cd_education_status#19 = College ))) OR ((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )))) + +(27) BroadcastExchange +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_cdemo_sk#1] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: ((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))) + +(29) Project [codegen id : 6] +Output [7]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19] +Input [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] + +(30) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#20, hd_dep_count#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1))] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#20, hd_dep_count#21] + +(32) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Condition : (isnotnull(hd_demo_sk#20) AND ((hd_dep_count#21 = 3) OR (hd_dep_count#21 = 1))) + +(33) BroadcastExchange +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#20] +Join type: Inner +Join condition: (((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#21 = 3)) OR (((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#21 = 1))) OR (((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#21 = 1))) + +(35) Project [codegen id : 6] +Output [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Input [9]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19, hd_demo_sk#20, hd_dep_count#21] + +(36) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] +Keys: [] +Functions [4]: [partial_avg(ss_quantity#5), partial_avg(UnscaledValue(ss_ext_sales_price#7)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#8)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#8))] +Aggregate Attributes [7]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28] +Results [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] + +(37) Exchange +Input [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(38) HashAggregate [codegen id : 7] +Input [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] +Keys: [] +Functions [4]: [avg(ss_quantity#5), avg(UnscaledValue(ss_ext_sales_price#7)), avg(UnscaledValue(ss_ext_wholesale_cost#8)), sum(UnscaledValue(ss_ext_wholesale_cost#8))] +Aggregate Attributes [4]: [avg(ss_quantity#5)#36, avg(UnscaledValue(ss_ext_sales_price#7))#37, avg(UnscaledValue(ss_ext_wholesale_cost#8))#38, sum(UnscaledValue(ss_ext_wholesale_cost#8))#39] +Results [4]: [avg(ss_quantity#5)#36 AS avg(ss_quantity)#40, cast((avg(UnscaledValue(ss_ext_sales_price#7))#37 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#41, cast((avg(UnscaledValue(ss_ext_wholesale_cost#8))#38 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#42, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#8))#39,17,2) AS sum(ss_ext_wholesale_cost)#43] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..7c63e82534 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.native_iceberg_compat/simplified.txt @@ -0,0 +1,57 @@ +WholeStageCodegen (7) + HashAggregate [sum,count,sum,count,sum,count,sum] [avg(ss_quantity),avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),sum(UnscaledValue(ss_ext_wholesale_cost)),avg(ss_quantity),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),sum(ss_ext_wholesale_cost),sum,count,sum,count,sum,count,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] [sum,count,sum,count,sum,count,sum,sum,count,sum,count,sum,count,sum] + Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price,hd_dep_count] + Project [ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_marital_status,cd_education_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price] + Project [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] + Project [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_addr_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [hd_demo_sk,hd_dep_count] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_datafusion/explain.txt new file mode 100644 index 0000000000..aee3f209cf --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_datafusion/explain.txt @@ -0,0 +1,772 @@ +== Physical Plan == +TakeOrderedAndProject (113) ++- * HashAggregate (112) + +- Exchange (111) + +- * HashAggregate (110) + +- * Expand (109) + +- Union (108) + :- * Project (75) + : +- * Filter (74) + : +- * HashAggregate (73) + : +- Exchange (72) + : +- * HashAggregate (71) + : +- * Project (70) + : +- * BroadcastHashJoin Inner BuildRight (69) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * BroadcastHashJoin LeftSemi BuildRight (55) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (54) + : : : +- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.item (4) + : : : +- BroadcastExchange (51) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (50) + : : : :- * HashAggregate (39) + : : : : +- Exchange (38) + : : : : +- * HashAggregate (37) + : : : : +- * Project (36) + : : : : +- * BroadcastHashJoin Inner BuildRight (35) + : : : : :- * Project (33) + : : : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : : : :- * Filter (9) + : : : : : : +- * ColumnarToRow (8) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (7) + : : : : : +- BroadcastExchange (31) + : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : : : : :- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet spark_catalog.default.item (10) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Project (28) + : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : :- * Project (21) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : :- * Filter (15) + : : : : : : : +- * ColumnarToRow (14) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : +- BroadcastExchange (19) + : : : : : : +- * Filter (18) + : : : : : : +- * ColumnarToRow (17) + : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : +- BroadcastExchange (26) + : : : : : +- * Project (25) + : : : : : +- * Filter (24) + : : : : : +- * ColumnarToRow (23) + : : : : : +- Scan parquet spark_catalog.default.date_dim (22) + : : : : +- ReusedExchange (34) + : : : +- BroadcastExchange (49) + : : : +- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (45) + : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : :- * Filter (42) + : : : : : +- * ColumnarToRow (41) + : : : : : +- Scan parquet spark_catalog.default.web_sales (40) + : : : : +- ReusedExchange (43) + : : : +- ReusedExchange (46) + : : +- BroadcastExchange (61) + : : +- * BroadcastHashJoin LeftSemi BuildRight (60) + : : :- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet spark_catalog.default.item (56) + : : +- ReusedExchange (59) + : +- BroadcastExchange (68) + : +- * Project (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet spark_catalog.default.date_dim (64) + :- * Project (91) + : +- * Filter (90) + : +- * HashAggregate (89) + : +- Exchange (88) + : +- * HashAggregate (87) + : +- * Project (86) + : +- * BroadcastHashJoin Inner BuildRight (85) + : :- * Project (83) + : : +- * BroadcastHashJoin Inner BuildRight (82) + : : :- * BroadcastHashJoin LeftSemi BuildRight (80) + : : : :- * Filter (78) + : : : : +- * ColumnarToRow (77) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (76) + : : : +- ReusedExchange (79) + : : +- ReusedExchange (81) + : +- ReusedExchange (84) + +- * Project (107) + +- * Filter (106) + +- * HashAggregate (105) + +- Exchange (104) + +- * HashAggregate (103) + +- * Project (102) + +- * BroadcastHashJoin Inner BuildRight (101) + :- * Project (99) + : +- * BroadcastHashJoin Inner BuildRight (98) + : :- * BroadcastHashJoin LeftSemi BuildRight (96) + : : :- * Filter (94) + : : : +- * ColumnarToRow (93) + : : : +- Scan parquet spark_catalog.default.web_sales (92) + : : +- ReusedExchange (95) + : +- ReusedExchange (97) + +- ReusedExchange (100) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] + +(9) Filter [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(10) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(12) Filter [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] + +(15) Filter [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(16) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(18) Filter [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#15] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 3] +Output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] + +(24) Filter [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(25) Project [codegen id : 2] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_year#22] + +(26) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(27) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 3] +Output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] + +(29) BroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=3] + +(30) BroadcastHashJoin [codegen id : 4] +Left keys [6]: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)] +Right keys [6]: [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)] +Join type: LeftSemi +Join condition: None + +(31) BroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#9] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(34) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#23] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#23] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] + +(37) HashAggregate [codegen id : 6] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(38) Exchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(39) HashAggregate [codegen id : 10] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(40) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] + +(42) Filter [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(43) ReusedExchange [Reuses operator id: 19] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_item_sk#27] +Right keys [1]: [i_item_sk#29] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 9] +Output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(46) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#33] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 9] +Output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] + +(49) BroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=6] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [6]: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)] +Right keys [6]: [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)] +Join type: LeftSemi +Join condition: None + +(51) BroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=7] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#24, class_id#25, category_id#26] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#34] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] + +(54) BroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(55) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(56) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(58) Filter [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : isnotnull(i_item_sk#35) + +(59) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(60) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#35] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(61) BroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(62) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#35] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 25] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(64) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#39, d_year#40, d_moy#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 24] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] + +(66) Filter [codegen id : 24] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] +Condition : ((((isnotnull(d_year#40) AND isnotnull(d_moy#41)) AND (d_year#40 = 2001)) AND (d_moy#41 = 11)) AND isnotnull(d_date_sk#39)) + +(67) Project [codegen id : 24] +Output [1]: [d_date_sk#39] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] + +(68) BroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#39] +Join type: Inner +Join condition: None + +(70) Project [codegen id : 25] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] + +(71) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] +Aggregate Attributes [3]: [sum#42, isEmpty#43, count#44] +Results [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] + +(72) Exchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 26] +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#48, count(1)#49] +Results [5]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#48 AS sales#50, count(1)#49 AS number_sales#51] + +(74) Filter [codegen id : 26] +Input [5]: [i_brand_id#36, i_class_id#37, i_category_id#38, sales#50, number_sales#51] +Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) + +(75) Project [codegen id : 26] +Output [6]: [sales#50, number_sales#51, store AS channel#54, i_brand_id#36 AS i_brand_id#55, i_class_id#37 AS i_class_id#56, i_category_id#38 AS i_category_id#57] +Input [5]: [i_brand_id#36, i_class_id#37, i_category_id#38, sales#50, number_sales#51] + +(76) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#58, cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#61)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(77) ColumnarToRow [codegen id : 51] +Input [4]: [cs_item_sk#58, cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61] + +(78) Filter [codegen id : 51] +Input [4]: [cs_item_sk#58, cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61] +Condition : isnotnull(cs_item_sk#58) + +(79) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(80) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#58] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(81) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65] + +(82) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#58] +Right keys [1]: [i_item_sk#62] +Join type: Inner +Join condition: None + +(83) Project [codegen id : 51] +Output [6]: [cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61, i_brand_id#63, i_class_id#64, i_category_id#65] +Input [8]: [cs_item_sk#58, cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65] + +(84) ReusedExchange [Reuses operator id: 68] +Output [1]: [d_date_sk#66] + +(85) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_sold_date_sk#61] +Right keys [1]: [d_date_sk#66] +Join type: Inner +Join condition: None + +(86) Project [codegen id : 51] +Output [5]: [cs_quantity#59, cs_list_price#60, i_brand_id#63, i_class_id#64, i_category_id#65] +Input [7]: [cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61, i_brand_id#63, i_class_id#64, i_category_id#65, d_date_sk#66] + +(87) HashAggregate [codegen id : 51] +Input [5]: [cs_quantity#59, cs_list_price#60, i_brand_id#63, i_class_id#64, i_category_id#65] +Keys [3]: [i_brand_id#63, i_class_id#64, i_category_id#65] +Functions [2]: [partial_sum((cast(cs_quantity#59 as decimal(10,0)) * cs_list_price#60)), partial_count(1)] +Aggregate Attributes [3]: [sum#67, isEmpty#68, count#69] +Results [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#70, isEmpty#71, count#72] + +(88) Exchange +Input [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#70, isEmpty#71, count#72] +Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(89) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#70, isEmpty#71, count#72] +Keys [3]: [i_brand_id#63, i_class_id#64, i_category_id#65] +Functions [2]: [sum((cast(cs_quantity#59 as decimal(10,0)) * cs_list_price#60)), count(1)] +Aggregate Attributes [2]: [sum((cast(cs_quantity#59 as decimal(10,0)) * cs_list_price#60))#73, count(1)#74] +Results [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum((cast(cs_quantity#59 as decimal(10,0)) * cs_list_price#60))#73 AS sales#75, count(1)#74 AS number_sales#76] + +(90) Filter [codegen id : 52] +Input [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sales#75, number_sales#76] +Condition : (isnotnull(sales#75) AND (cast(sales#75 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) + +(91) Project [codegen id : 52] +Output [6]: [sales#75, number_sales#76, catalog AS channel#77, i_brand_id#63, i_class_id#64, i_category_id#65] +Input [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sales#75, number_sales#76] + +(92) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#78, ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#81)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(93) ColumnarToRow [codegen id : 77] +Input [4]: [ws_item_sk#78, ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81] + +(94) Filter [codegen id : 77] +Input [4]: [ws_item_sk#78, ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81] +Condition : isnotnull(ws_item_sk#78) + +(95) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(96) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#78] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(97) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#82, i_brand_id#83, i_class_id#84, i_category_id#85] + +(98) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#78] +Right keys [1]: [i_item_sk#82] +Join type: Inner +Join condition: None + +(99) Project [codegen id : 77] +Output [6]: [ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81, i_brand_id#83, i_class_id#84, i_category_id#85] +Input [8]: [ws_item_sk#78, ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81, i_item_sk#82, i_brand_id#83, i_class_id#84, i_category_id#85] + +(100) ReusedExchange [Reuses operator id: 68] +Output [1]: [d_date_sk#86] + +(101) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_sold_date_sk#81] +Right keys [1]: [d_date_sk#86] +Join type: Inner +Join condition: None + +(102) Project [codegen id : 77] +Output [5]: [ws_quantity#79, ws_list_price#80, i_brand_id#83, i_class_id#84, i_category_id#85] +Input [7]: [ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81, i_brand_id#83, i_class_id#84, i_category_id#85, d_date_sk#86] + +(103) HashAggregate [codegen id : 77] +Input [5]: [ws_quantity#79, ws_list_price#80, i_brand_id#83, i_class_id#84, i_category_id#85] +Keys [3]: [i_brand_id#83, i_class_id#84, i_category_id#85] +Functions [2]: [partial_sum((cast(ws_quantity#79 as decimal(10,0)) * ws_list_price#80)), partial_count(1)] +Aggregate Attributes [3]: [sum#87, isEmpty#88, count#89] +Results [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#90, isEmpty#91, count#92] + +(104) Exchange +Input [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#90, isEmpty#91, count#92] +Arguments: hashpartitioning(i_brand_id#83, i_class_id#84, i_category_id#85, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(105) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#90, isEmpty#91, count#92] +Keys [3]: [i_brand_id#83, i_class_id#84, i_category_id#85] +Functions [2]: [sum((cast(ws_quantity#79 as decimal(10,0)) * ws_list_price#80)), count(1)] +Aggregate Attributes [2]: [sum((cast(ws_quantity#79 as decimal(10,0)) * ws_list_price#80))#93, count(1)#94] +Results [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum((cast(ws_quantity#79 as decimal(10,0)) * ws_list_price#80))#93 AS sales#95, count(1)#94 AS number_sales#96] + +(106) Filter [codegen id : 78] +Input [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sales#95, number_sales#96] +Condition : (isnotnull(sales#95) AND (cast(sales#95 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) + +(107) Project [codegen id : 78] +Output [6]: [sales#95, number_sales#96, web AS channel#97, i_brand_id#83, i_class_id#84, i_category_id#85] +Input [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sales#95, number_sales#96] + +(108) Union + +(109) Expand [codegen id : 79] +Input [6]: [sales#50, number_sales#51, channel#54, i_brand_id#55, i_class_id#56, i_category_id#57] +Arguments: [[sales#50, number_sales#51, channel#54, i_brand_id#55, i_class_id#56, i_category_id#57, 0], [sales#50, number_sales#51, channel#54, i_brand_id#55, i_class_id#56, null, 1], [sales#50, number_sales#51, channel#54, i_brand_id#55, null, null, 3], [sales#50, number_sales#51, channel#54, null, null, null, 7], [sales#50, number_sales#51, null, null, null, null, 15]], [sales#50, number_sales#51, channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102] + +(110) HashAggregate [codegen id : 79] +Input [7]: [sales#50, number_sales#51, channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102] +Keys [5]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102] +Functions [2]: [partial_sum(sales#50), partial_sum(number_sales#51)] +Aggregate Attributes [3]: [sum#103, isEmpty#104, sum#105] +Results [8]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102, sum#106, isEmpty#107, sum#108] + +(111) Exchange +Input [8]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102, sum#106, isEmpty#107, sum#108] +Arguments: hashpartitioning(channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(112) HashAggregate [codegen id : 80] +Input [8]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102, sum#106, isEmpty#107, sum#108] +Keys [5]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#109, sum(number_sales#51)#110] +Results [6]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, sum(sales#50)#109 AS sum(sales)#111, sum(number_sales#51)#110 AS sum(number_sales)#112] + +(113) TakeOrderedAndProject +Input [6]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, sum(sales)#111, sum(number_sales)#112] +Arguments: 100, [channel#98 ASC NULLS FIRST, i_brand_id#99 ASC NULLS FIRST, i_class_id#100 ASC NULLS FIRST, i_category_id#101 ASC NULLS FIRST], [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, sum(sales)#111, sum(number_sales)#112] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 74 Hosting Expression = Subquery scalar-subquery#52, [id=#53] +* HashAggregate (132) ++- Exchange (131) + +- * HashAggregate (130) + +- Union (129) + :- * Project (118) + : +- * BroadcastHashJoin Inner BuildRight (117) + : :- * ColumnarToRow (115) + : : +- Scan parquet spark_catalog.default.store_sales (114) + : +- ReusedExchange (116) + :- * Project (123) + : +- * BroadcastHashJoin Inner BuildRight (122) + : :- * ColumnarToRow (120) + : : +- Scan parquet spark_catalog.default.catalog_sales (119) + : +- ReusedExchange (121) + +- * Project (128) + +- * BroadcastHashJoin Inner BuildRight (127) + :- * ColumnarToRow (125) + : +- Scan parquet spark_catalog.default.web_sales (124) + +- ReusedExchange (126) + + +(114) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#113, ss_list_price#114, ss_sold_date_sk#115] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#115)] +ReadSchema: struct + +(115) ColumnarToRow [codegen id : 2] +Input [3]: [ss_quantity#113, ss_list_price#114, ss_sold_date_sk#115] + +(116) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#116] + +(117) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#115] +Right keys [1]: [d_date_sk#116] +Join type: Inner +Join condition: None + +(118) Project [codegen id : 2] +Output [2]: [ss_quantity#113 AS quantity#117, ss_list_price#114 AS list_price#118] +Input [4]: [ss_quantity#113, ss_list_price#114, ss_sold_date_sk#115, d_date_sk#116] + +(119) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#119, cs_list_price#120, cs_sold_date_sk#121] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#121)] +ReadSchema: struct + +(120) ColumnarToRow [codegen id : 4] +Input [3]: [cs_quantity#119, cs_list_price#120, cs_sold_date_sk#121] + +(121) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#122] + +(122) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#121] +Right keys [1]: [d_date_sk#122] +Join type: Inner +Join condition: None + +(123) Project [codegen id : 4] +Output [2]: [cs_quantity#119 AS quantity#123, cs_list_price#120 AS list_price#124] +Input [4]: [cs_quantity#119, cs_list_price#120, cs_sold_date_sk#121, d_date_sk#122] + +(124) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#125, ws_list_price#126, ws_sold_date_sk#127] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#127)] +ReadSchema: struct + +(125) ColumnarToRow [codegen id : 6] +Input [3]: [ws_quantity#125, ws_list_price#126, ws_sold_date_sk#127] + +(126) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#128] + +(127) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#127] +Right keys [1]: [d_date_sk#128] +Join type: Inner +Join condition: None + +(128) Project [codegen id : 6] +Output [2]: [ws_quantity#125 AS quantity#129, ws_list_price#126 AS list_price#130] +Input [4]: [ws_quantity#125, ws_list_price#126, ws_sold_date_sk#127, d_date_sk#128] + +(129) Union + +(130) HashAggregate [codegen id : 7] +Input [2]: [quantity#117, list_price#118] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#117 as decimal(10,0)) * list_price#118))] +Aggregate Attributes [2]: [sum#131, count#132] +Results [2]: [sum#133, count#134] + +(131) Exchange +Input [2]: [sum#133, count#134] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(132) HashAggregate [codegen id : 8] +Input [2]: [sum#133, count#134] +Keys: [] +Functions [1]: [avg((cast(quantity#117 as decimal(10,0)) * list_price#118))] +Aggregate Attributes [1]: [avg((cast(quantity#117 as decimal(10,0)) * list_price#118))#135] +Results [1]: [avg((cast(quantity#117 as decimal(10,0)) * list_price#118))#135 AS average_sales#136] + +Subquery:2 Hosting operator id = 90 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] + +Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9afe9e29ee --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_datafusion/simplified.txt @@ -0,0 +1,203 @@ +TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales)] + WholeStageCodegen (80) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum(sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] #1 + WholeStageCodegen (79) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + Expand [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + InputAdapter + Union + WholeStageCodegen (26) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sales] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count] + InputAdapter + Exchange #13 + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #9 + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #9 + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #9 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #2 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (10) + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #5 + WholeStageCodegen (6) + HashAggregate [brand_id,class_id,category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk] #9 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (9) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 + InputAdapter + ReusedExchange [d_date_sk] #9 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (52) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sales] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1),sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #14 + WholeStageCodegen (51) + HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_list_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + InputAdapter + ReusedExchange [d_date_sk] #12 + WholeStageCodegen (78) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sales] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1),sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #15 + WholeStageCodegen (77) + HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,ss_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + InputAdapter + ReusedExchange [d_date_sk] #12 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..aee3f209cf --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_iceberg_compat/explain.txt @@ -0,0 +1,772 @@ +== Physical Plan == +TakeOrderedAndProject (113) ++- * HashAggregate (112) + +- Exchange (111) + +- * HashAggregate (110) + +- * Expand (109) + +- Union (108) + :- * Project (75) + : +- * Filter (74) + : +- * HashAggregate (73) + : +- Exchange (72) + : +- * HashAggregate (71) + : +- * Project (70) + : +- * BroadcastHashJoin Inner BuildRight (69) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * BroadcastHashJoin LeftSemi BuildRight (55) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (54) + : : : +- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.item (4) + : : : +- BroadcastExchange (51) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (50) + : : : :- * HashAggregate (39) + : : : : +- Exchange (38) + : : : : +- * HashAggregate (37) + : : : : +- * Project (36) + : : : : +- * BroadcastHashJoin Inner BuildRight (35) + : : : : :- * Project (33) + : : : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : : : :- * Filter (9) + : : : : : : +- * ColumnarToRow (8) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (7) + : : : : : +- BroadcastExchange (31) + : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : : : : :- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet spark_catalog.default.item (10) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Project (28) + : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : :- * Project (21) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : :- * Filter (15) + : : : : : : : +- * ColumnarToRow (14) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : +- BroadcastExchange (19) + : : : : : : +- * Filter (18) + : : : : : : +- * ColumnarToRow (17) + : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : +- BroadcastExchange (26) + : : : : : +- * Project (25) + : : : : : +- * Filter (24) + : : : : : +- * ColumnarToRow (23) + : : : : : +- Scan parquet spark_catalog.default.date_dim (22) + : : : : +- ReusedExchange (34) + : : : +- BroadcastExchange (49) + : : : +- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (45) + : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : :- * Filter (42) + : : : : : +- * ColumnarToRow (41) + : : : : : +- Scan parquet spark_catalog.default.web_sales (40) + : : : : +- ReusedExchange (43) + : : : +- ReusedExchange (46) + : : +- BroadcastExchange (61) + : : +- * BroadcastHashJoin LeftSemi BuildRight (60) + : : :- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet spark_catalog.default.item (56) + : : +- ReusedExchange (59) + : +- BroadcastExchange (68) + : +- * Project (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet spark_catalog.default.date_dim (64) + :- * Project (91) + : +- * Filter (90) + : +- * HashAggregate (89) + : +- Exchange (88) + : +- * HashAggregate (87) + : +- * Project (86) + : +- * BroadcastHashJoin Inner BuildRight (85) + : :- * Project (83) + : : +- * BroadcastHashJoin Inner BuildRight (82) + : : :- * BroadcastHashJoin LeftSemi BuildRight (80) + : : : :- * Filter (78) + : : : : +- * ColumnarToRow (77) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (76) + : : : +- ReusedExchange (79) + : : +- ReusedExchange (81) + : +- ReusedExchange (84) + +- * Project (107) + +- * Filter (106) + +- * HashAggregate (105) + +- Exchange (104) + +- * HashAggregate (103) + +- * Project (102) + +- * BroadcastHashJoin Inner BuildRight (101) + :- * Project (99) + : +- * BroadcastHashJoin Inner BuildRight (98) + : :- * BroadcastHashJoin LeftSemi BuildRight (96) + : : :- * Filter (94) + : : : +- * ColumnarToRow (93) + : : : +- Scan parquet spark_catalog.default.web_sales (92) + : : +- ReusedExchange (95) + : +- ReusedExchange (97) + +- ReusedExchange (100) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] + +(9) Filter [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(10) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(12) Filter [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] + +(15) Filter [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(16) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(18) Filter [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#15] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 3] +Output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] + +(24) Filter [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(25) Project [codegen id : 2] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_year#22] + +(26) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(27) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 3] +Output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] + +(29) BroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=3] + +(30) BroadcastHashJoin [codegen id : 4] +Left keys [6]: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)] +Right keys [6]: [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)] +Join type: LeftSemi +Join condition: None + +(31) BroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#9] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(34) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#23] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#23] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] + +(37) HashAggregate [codegen id : 6] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(38) Exchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(39) HashAggregate [codegen id : 10] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(40) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] + +(42) Filter [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(43) ReusedExchange [Reuses operator id: 19] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_item_sk#27] +Right keys [1]: [i_item_sk#29] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 9] +Output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(46) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#33] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 9] +Output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] + +(49) BroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=6] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [6]: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)] +Right keys [6]: [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)] +Join type: LeftSemi +Join condition: None + +(51) BroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=7] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#24, class_id#25, category_id#26] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#34] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] + +(54) BroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(55) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(56) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(58) Filter [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : isnotnull(i_item_sk#35) + +(59) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(60) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#35] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(61) BroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(62) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#35] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 25] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(64) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#39, d_year#40, d_moy#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 24] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] + +(66) Filter [codegen id : 24] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] +Condition : ((((isnotnull(d_year#40) AND isnotnull(d_moy#41)) AND (d_year#40 = 2001)) AND (d_moy#41 = 11)) AND isnotnull(d_date_sk#39)) + +(67) Project [codegen id : 24] +Output [1]: [d_date_sk#39] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] + +(68) BroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#39] +Join type: Inner +Join condition: None + +(70) Project [codegen id : 25] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] + +(71) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] +Aggregate Attributes [3]: [sum#42, isEmpty#43, count#44] +Results [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] + +(72) Exchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 26] +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#48, count(1)#49] +Results [5]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#48 AS sales#50, count(1)#49 AS number_sales#51] + +(74) Filter [codegen id : 26] +Input [5]: [i_brand_id#36, i_class_id#37, i_category_id#38, sales#50, number_sales#51] +Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) + +(75) Project [codegen id : 26] +Output [6]: [sales#50, number_sales#51, store AS channel#54, i_brand_id#36 AS i_brand_id#55, i_class_id#37 AS i_class_id#56, i_category_id#38 AS i_category_id#57] +Input [5]: [i_brand_id#36, i_class_id#37, i_category_id#38, sales#50, number_sales#51] + +(76) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#58, cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#61)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(77) ColumnarToRow [codegen id : 51] +Input [4]: [cs_item_sk#58, cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61] + +(78) Filter [codegen id : 51] +Input [4]: [cs_item_sk#58, cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61] +Condition : isnotnull(cs_item_sk#58) + +(79) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(80) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#58] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(81) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65] + +(82) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#58] +Right keys [1]: [i_item_sk#62] +Join type: Inner +Join condition: None + +(83) Project [codegen id : 51] +Output [6]: [cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61, i_brand_id#63, i_class_id#64, i_category_id#65] +Input [8]: [cs_item_sk#58, cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65] + +(84) ReusedExchange [Reuses operator id: 68] +Output [1]: [d_date_sk#66] + +(85) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_sold_date_sk#61] +Right keys [1]: [d_date_sk#66] +Join type: Inner +Join condition: None + +(86) Project [codegen id : 51] +Output [5]: [cs_quantity#59, cs_list_price#60, i_brand_id#63, i_class_id#64, i_category_id#65] +Input [7]: [cs_quantity#59, cs_list_price#60, cs_sold_date_sk#61, i_brand_id#63, i_class_id#64, i_category_id#65, d_date_sk#66] + +(87) HashAggregate [codegen id : 51] +Input [5]: [cs_quantity#59, cs_list_price#60, i_brand_id#63, i_class_id#64, i_category_id#65] +Keys [3]: [i_brand_id#63, i_class_id#64, i_category_id#65] +Functions [2]: [partial_sum((cast(cs_quantity#59 as decimal(10,0)) * cs_list_price#60)), partial_count(1)] +Aggregate Attributes [3]: [sum#67, isEmpty#68, count#69] +Results [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#70, isEmpty#71, count#72] + +(88) Exchange +Input [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#70, isEmpty#71, count#72] +Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(89) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#70, isEmpty#71, count#72] +Keys [3]: [i_brand_id#63, i_class_id#64, i_category_id#65] +Functions [2]: [sum((cast(cs_quantity#59 as decimal(10,0)) * cs_list_price#60)), count(1)] +Aggregate Attributes [2]: [sum((cast(cs_quantity#59 as decimal(10,0)) * cs_list_price#60))#73, count(1)#74] +Results [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum((cast(cs_quantity#59 as decimal(10,0)) * cs_list_price#60))#73 AS sales#75, count(1)#74 AS number_sales#76] + +(90) Filter [codegen id : 52] +Input [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sales#75, number_sales#76] +Condition : (isnotnull(sales#75) AND (cast(sales#75 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) + +(91) Project [codegen id : 52] +Output [6]: [sales#75, number_sales#76, catalog AS channel#77, i_brand_id#63, i_class_id#64, i_category_id#65] +Input [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sales#75, number_sales#76] + +(92) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#78, ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#81)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(93) ColumnarToRow [codegen id : 77] +Input [4]: [ws_item_sk#78, ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81] + +(94) Filter [codegen id : 77] +Input [4]: [ws_item_sk#78, ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81] +Condition : isnotnull(ws_item_sk#78) + +(95) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(96) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#78] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(97) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#82, i_brand_id#83, i_class_id#84, i_category_id#85] + +(98) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#78] +Right keys [1]: [i_item_sk#82] +Join type: Inner +Join condition: None + +(99) Project [codegen id : 77] +Output [6]: [ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81, i_brand_id#83, i_class_id#84, i_category_id#85] +Input [8]: [ws_item_sk#78, ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81, i_item_sk#82, i_brand_id#83, i_class_id#84, i_category_id#85] + +(100) ReusedExchange [Reuses operator id: 68] +Output [1]: [d_date_sk#86] + +(101) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_sold_date_sk#81] +Right keys [1]: [d_date_sk#86] +Join type: Inner +Join condition: None + +(102) Project [codegen id : 77] +Output [5]: [ws_quantity#79, ws_list_price#80, i_brand_id#83, i_class_id#84, i_category_id#85] +Input [7]: [ws_quantity#79, ws_list_price#80, ws_sold_date_sk#81, i_brand_id#83, i_class_id#84, i_category_id#85, d_date_sk#86] + +(103) HashAggregate [codegen id : 77] +Input [5]: [ws_quantity#79, ws_list_price#80, i_brand_id#83, i_class_id#84, i_category_id#85] +Keys [3]: [i_brand_id#83, i_class_id#84, i_category_id#85] +Functions [2]: [partial_sum((cast(ws_quantity#79 as decimal(10,0)) * ws_list_price#80)), partial_count(1)] +Aggregate Attributes [3]: [sum#87, isEmpty#88, count#89] +Results [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#90, isEmpty#91, count#92] + +(104) Exchange +Input [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#90, isEmpty#91, count#92] +Arguments: hashpartitioning(i_brand_id#83, i_class_id#84, i_category_id#85, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(105) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#90, isEmpty#91, count#92] +Keys [3]: [i_brand_id#83, i_class_id#84, i_category_id#85] +Functions [2]: [sum((cast(ws_quantity#79 as decimal(10,0)) * ws_list_price#80)), count(1)] +Aggregate Attributes [2]: [sum((cast(ws_quantity#79 as decimal(10,0)) * ws_list_price#80))#93, count(1)#94] +Results [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum((cast(ws_quantity#79 as decimal(10,0)) * ws_list_price#80))#93 AS sales#95, count(1)#94 AS number_sales#96] + +(106) Filter [codegen id : 78] +Input [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sales#95, number_sales#96] +Condition : (isnotnull(sales#95) AND (cast(sales#95 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) + +(107) Project [codegen id : 78] +Output [6]: [sales#95, number_sales#96, web AS channel#97, i_brand_id#83, i_class_id#84, i_category_id#85] +Input [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sales#95, number_sales#96] + +(108) Union + +(109) Expand [codegen id : 79] +Input [6]: [sales#50, number_sales#51, channel#54, i_brand_id#55, i_class_id#56, i_category_id#57] +Arguments: [[sales#50, number_sales#51, channel#54, i_brand_id#55, i_class_id#56, i_category_id#57, 0], [sales#50, number_sales#51, channel#54, i_brand_id#55, i_class_id#56, null, 1], [sales#50, number_sales#51, channel#54, i_brand_id#55, null, null, 3], [sales#50, number_sales#51, channel#54, null, null, null, 7], [sales#50, number_sales#51, null, null, null, null, 15]], [sales#50, number_sales#51, channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102] + +(110) HashAggregate [codegen id : 79] +Input [7]: [sales#50, number_sales#51, channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102] +Keys [5]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102] +Functions [2]: [partial_sum(sales#50), partial_sum(number_sales#51)] +Aggregate Attributes [3]: [sum#103, isEmpty#104, sum#105] +Results [8]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102, sum#106, isEmpty#107, sum#108] + +(111) Exchange +Input [8]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102, sum#106, isEmpty#107, sum#108] +Arguments: hashpartitioning(channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(112) HashAggregate [codegen id : 80] +Input [8]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102, sum#106, isEmpty#107, sum#108] +Keys [5]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, spark_grouping_id#102] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#109, sum(number_sales#51)#110] +Results [6]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, sum(sales#50)#109 AS sum(sales)#111, sum(number_sales#51)#110 AS sum(number_sales)#112] + +(113) TakeOrderedAndProject +Input [6]: [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, sum(sales)#111, sum(number_sales)#112] +Arguments: 100, [channel#98 ASC NULLS FIRST, i_brand_id#99 ASC NULLS FIRST, i_class_id#100 ASC NULLS FIRST, i_category_id#101 ASC NULLS FIRST], [channel#98, i_brand_id#99, i_class_id#100, i_category_id#101, sum(sales)#111, sum(number_sales)#112] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 74 Hosting Expression = Subquery scalar-subquery#52, [id=#53] +* HashAggregate (132) ++- Exchange (131) + +- * HashAggregate (130) + +- Union (129) + :- * Project (118) + : +- * BroadcastHashJoin Inner BuildRight (117) + : :- * ColumnarToRow (115) + : : +- Scan parquet spark_catalog.default.store_sales (114) + : +- ReusedExchange (116) + :- * Project (123) + : +- * BroadcastHashJoin Inner BuildRight (122) + : :- * ColumnarToRow (120) + : : +- Scan parquet spark_catalog.default.catalog_sales (119) + : +- ReusedExchange (121) + +- * Project (128) + +- * BroadcastHashJoin Inner BuildRight (127) + :- * ColumnarToRow (125) + : +- Scan parquet spark_catalog.default.web_sales (124) + +- ReusedExchange (126) + + +(114) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#113, ss_list_price#114, ss_sold_date_sk#115] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#115)] +ReadSchema: struct + +(115) ColumnarToRow [codegen id : 2] +Input [3]: [ss_quantity#113, ss_list_price#114, ss_sold_date_sk#115] + +(116) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#116] + +(117) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#115] +Right keys [1]: [d_date_sk#116] +Join type: Inner +Join condition: None + +(118) Project [codegen id : 2] +Output [2]: [ss_quantity#113 AS quantity#117, ss_list_price#114 AS list_price#118] +Input [4]: [ss_quantity#113, ss_list_price#114, ss_sold_date_sk#115, d_date_sk#116] + +(119) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#119, cs_list_price#120, cs_sold_date_sk#121] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#121)] +ReadSchema: struct + +(120) ColumnarToRow [codegen id : 4] +Input [3]: [cs_quantity#119, cs_list_price#120, cs_sold_date_sk#121] + +(121) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#122] + +(122) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#121] +Right keys [1]: [d_date_sk#122] +Join type: Inner +Join condition: None + +(123) Project [codegen id : 4] +Output [2]: [cs_quantity#119 AS quantity#123, cs_list_price#120 AS list_price#124] +Input [4]: [cs_quantity#119, cs_list_price#120, cs_sold_date_sk#121, d_date_sk#122] + +(124) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#125, ws_list_price#126, ws_sold_date_sk#127] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#127)] +ReadSchema: struct + +(125) ColumnarToRow [codegen id : 6] +Input [3]: [ws_quantity#125, ws_list_price#126, ws_sold_date_sk#127] + +(126) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#128] + +(127) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#127] +Right keys [1]: [d_date_sk#128] +Join type: Inner +Join condition: None + +(128) Project [codegen id : 6] +Output [2]: [ws_quantity#125 AS quantity#129, ws_list_price#126 AS list_price#130] +Input [4]: [ws_quantity#125, ws_list_price#126, ws_sold_date_sk#127, d_date_sk#128] + +(129) Union + +(130) HashAggregate [codegen id : 7] +Input [2]: [quantity#117, list_price#118] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#117 as decimal(10,0)) * list_price#118))] +Aggregate Attributes [2]: [sum#131, count#132] +Results [2]: [sum#133, count#134] + +(131) Exchange +Input [2]: [sum#133, count#134] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(132) HashAggregate [codegen id : 8] +Input [2]: [sum#133, count#134] +Keys: [] +Functions [1]: [avg((cast(quantity#117 as decimal(10,0)) * list_price#118))] +Aggregate Attributes [1]: [avg((cast(quantity#117 as decimal(10,0)) * list_price#118))#135] +Results [1]: [avg((cast(quantity#117 as decimal(10,0)) * list_price#118))#135 AS average_sales#136] + +Subquery:2 Hosting operator id = 90 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] + +Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9afe9e29ee --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.native_iceberg_compat/simplified.txt @@ -0,0 +1,203 @@ +TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales)] + WholeStageCodegen (80) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum(sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] #1 + WholeStageCodegen (79) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + Expand [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + InputAdapter + Union + WholeStageCodegen (26) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sales] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count] + InputAdapter + Exchange #13 + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #9 + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #9 + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #9 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #2 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (10) + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #5 + WholeStageCodegen (6) + HashAggregate [brand_id,class_id,category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk] #9 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (9) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 + InputAdapter + ReusedExchange [d_date_sk] #9 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (52) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sales] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1),sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #14 + WholeStageCodegen (51) + HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_list_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + InputAdapter + ReusedExchange [d_date_sk] #12 + WholeStageCodegen (78) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sales] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1),sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #15 + WholeStageCodegen (77) + HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,ss_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #3 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + InputAdapter + ReusedExchange [d_date_sk] #12 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_datafusion/explain.txt new file mode 100644 index 0000000000..bae177c32b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_datafusion/explain.txt @@ -0,0 +1,724 @@ +== Physical Plan == +TakeOrderedAndProject (96) ++- * BroadcastHashJoin Inner BuildRight (95) + :- * Filter (74) + : +- * HashAggregate (73) + : +- Exchange (72) + : +- * HashAggregate (71) + : +- * Project (70) + : +- * BroadcastHashJoin Inner BuildRight (69) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * BroadcastHashJoin LeftSemi BuildRight (55) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (54) + : : : +- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.item (4) + : : : +- BroadcastExchange (51) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (50) + : : : :- * HashAggregate (39) + : : : : +- Exchange (38) + : : : : +- * HashAggregate (37) + : : : : +- * Project (36) + : : : : +- * BroadcastHashJoin Inner BuildRight (35) + : : : : :- * Project (33) + : : : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : : : :- * Filter (9) + : : : : : : +- * ColumnarToRow (8) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (7) + : : : : : +- BroadcastExchange (31) + : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : : : : :- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet spark_catalog.default.item (10) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Project (28) + : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : :- * Project (21) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : :- * Filter (15) + : : : : : : : +- * ColumnarToRow (14) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : +- BroadcastExchange (19) + : : : : : : +- * Filter (18) + : : : : : : +- * ColumnarToRow (17) + : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : +- BroadcastExchange (26) + : : : : : +- * Project (25) + : : : : : +- * Filter (24) + : : : : : +- * ColumnarToRow (23) + : : : : : +- Scan parquet spark_catalog.default.date_dim (22) + : : : : +- ReusedExchange (34) + : : : +- BroadcastExchange (49) + : : : +- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (45) + : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : :- * Filter (42) + : : : : : +- * ColumnarToRow (41) + : : : : : +- Scan parquet spark_catalog.default.web_sales (40) + : : : : +- ReusedExchange (43) + : : : +- ReusedExchange (46) + : : +- BroadcastExchange (61) + : : +- * BroadcastHashJoin LeftSemi BuildRight (60) + : : :- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet spark_catalog.default.item (56) + : : +- ReusedExchange (59) + : +- BroadcastExchange (68) + : +- * Project (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet spark_catalog.default.date_dim (64) + +- BroadcastExchange (94) + +- * Filter (93) + +- * HashAggregate (92) + +- Exchange (91) + +- * HashAggregate (90) + +- * Project (89) + +- * BroadcastHashJoin Inner BuildRight (88) + :- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * BroadcastHashJoin LeftSemi BuildRight (79) + : : :- * Filter (77) + : : : +- * ColumnarToRow (76) + : : : +- Scan parquet spark_catalog.default.store_sales (75) + : : +- ReusedExchange (78) + : +- ReusedExchange (80) + +- BroadcastExchange (87) + +- * Project (86) + +- * Filter (85) + +- * ColumnarToRow (84) + +- Scan parquet spark_catalog.default.date_dim (83) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] + +(9) Filter [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(10) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(12) Filter [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] + +(15) Filter [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(16) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(18) Filter [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#15] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 3] +Output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] + +(24) Filter [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(25) Project [codegen id : 2] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_year#22] + +(26) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(27) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 3] +Output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] + +(29) BroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=3] + +(30) BroadcastHashJoin [codegen id : 4] +Left keys [6]: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)] +Right keys [6]: [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)] +Join type: LeftSemi +Join condition: None + +(31) BroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#9] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(34) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#23] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#23] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] + +(37) HashAggregate [codegen id : 6] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(38) Exchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(39) HashAggregate [codegen id : 10] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(40) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] + +(42) Filter [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(43) ReusedExchange [Reuses operator id: 19] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_item_sk#27] +Right keys [1]: [i_item_sk#29] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 9] +Output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(46) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#33] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 9] +Output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] + +(49) BroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=6] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [6]: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)] +Right keys [6]: [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)] +Join type: LeftSemi +Join condition: None + +(51) BroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=7] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#24, class_id#25, category_id#26] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#34] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] + +(54) BroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(55) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(56) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(58) Filter [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : (((isnotnull(i_item_sk#35) AND isnotnull(i_brand_id#36)) AND isnotnull(i_class_id#37)) AND isnotnull(i_category_id#38)) + +(59) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(60) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#35] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(61) BroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(62) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#35] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 25] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(64) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#39, d_week_seq#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 24] +Input [2]: [d_date_sk#39, d_week_seq#40] + +(66) Filter [codegen id : 24] +Input [2]: [d_date_sk#39, d_week_seq#40] +Condition : ((isnotnull(d_week_seq#40) AND (d_week_seq#40 = Subquery scalar-subquery#41, [id=#42])) AND isnotnull(d_date_sk#39)) + +(67) Project [codegen id : 24] +Output [1]: [d_date_sk#39] +Input [2]: [d_date_sk#39, d_week_seq#40] + +(68) BroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#39] +Join type: Inner +Join condition: None + +(70) Project [codegen id : 25] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] + +(71) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] +Aggregate Attributes [3]: [sum#43, isEmpty#44, count#45] +Results [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] + +(72) Exchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#49, count(1)#50] +Results [6]: [store AS channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#49 AS sales#52, count(1)#50 AS number_sales#53] + +(74) Filter [codegen id : 52] +Input [6]: [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53] +Condition : (isnotnull(sales#52) AND (cast(sales#52 as decimal(32,6)) > cast(Subquery scalar-subquery#54, [id=#55] as decimal(32,6)))) + +(75) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#59)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 50] +Input [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] + +(77) Filter [codegen id : 50] +Input [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] +Condition : isnotnull(ss_item_sk#56) + +(78) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(79) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#56] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(80) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] + +(81) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#56] +Right keys [1]: [i_item_sk#60] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 50] +Output [6]: [ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63] +Input [8]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] + +(83) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#64, d_week_seq#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(84) ColumnarToRow [codegen id : 49] +Input [2]: [d_date_sk#64, d_week_seq#65] + +(85) Filter [codegen id : 49] +Input [2]: [d_date_sk#64, d_week_seq#65] +Condition : ((isnotnull(d_week_seq#65) AND (d_week_seq#65 = Subquery scalar-subquery#66, [id=#67])) AND isnotnull(d_date_sk#64)) + +(86) Project [codegen id : 49] +Output [1]: [d_date_sk#64] +Input [2]: [d_date_sk#64, d_week_seq#65] + +(87) BroadcastExchange +Input [1]: [d_date_sk#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(88) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_sold_date_sk#59] +Right keys [1]: [d_date_sk#64] +Join type: Inner +Join condition: None + +(89) Project [codegen id : 50] +Output [5]: [ss_quantity#57, ss_list_price#58, i_brand_id#61, i_class_id#62, i_category_id#63] +Input [7]: [ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, d_date_sk#64] + +(90) HashAggregate [codegen id : 50] +Input [5]: [ss_quantity#57, ss_list_price#58, i_brand_id#61, i_class_id#62, i_category_id#63] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [partial_sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58)), partial_count(1)] +Aggregate Attributes [3]: [sum#68, isEmpty#69, count#70] +Results [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] + +(91) Exchange +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] +Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(92) HashAggregate [codegen id : 51] +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58))#74, count(1)#75] +Results [6]: [store AS channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58))#74 AS sales#77, count(1)#75 AS number_sales#78] + +(93) Filter [codegen id : 51] +Input [6]: [channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Condition : (isnotnull(sales#77) AND (cast(sales#77 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#54, [id=#55] as decimal(32,6)))) + +(94) BroadcastExchange +Input [6]: [channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=14] + +(95) BroadcastHashJoin [codegen id : 52] +Left keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Right keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Join type: Inner +Join condition: None + +(96) TakeOrderedAndProject +Input [12]: [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53, channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Arguments: 100, [i_brand_id#36 ASC NULLS FIRST, i_class_id#37 ASC NULLS FIRST, i_category_id#38 ASC NULLS FIRST], [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53, channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 74 Hosting Expression = Subquery scalar-subquery#54, [id=#55] +* HashAggregate (115) ++- Exchange (114) + +- * HashAggregate (113) + +- Union (112) + :- * Project (101) + : +- * BroadcastHashJoin Inner BuildRight (100) + : :- * ColumnarToRow (98) + : : +- Scan parquet spark_catalog.default.store_sales (97) + : +- ReusedExchange (99) + :- * Project (106) + : +- * BroadcastHashJoin Inner BuildRight (105) + : :- * ColumnarToRow (103) + : : +- Scan parquet spark_catalog.default.catalog_sales (102) + : +- ReusedExchange (104) + +- * Project (111) + +- * BroadcastHashJoin Inner BuildRight (110) + :- * ColumnarToRow (108) + : +- Scan parquet spark_catalog.default.web_sales (107) + +- ReusedExchange (109) + + +(97) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#81)] +ReadSchema: struct + +(98) ColumnarToRow [codegen id : 2] +Input [3]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81] + +(99) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#82] + +(100) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#81] +Right keys [1]: [d_date_sk#82] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 2] +Output [2]: [ss_quantity#79 AS quantity#83, ss_list_price#80 AS list_price#84] +Input [4]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81, d_date_sk#82] + +(102) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#87)] +ReadSchema: struct + +(103) ColumnarToRow [codegen id : 4] +Input [3]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87] + +(104) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#88] + +(105) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#87] +Right keys [1]: [d_date_sk#88] +Join type: Inner +Join condition: None + +(106) Project [codegen id : 4] +Output [2]: [cs_quantity#85 AS quantity#89, cs_list_price#86 AS list_price#90] +Input [4]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87, d_date_sk#88] + +(107) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#93)] +ReadSchema: struct + +(108) ColumnarToRow [codegen id : 6] +Input [3]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93] + +(109) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#94] + +(110) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#93] +Right keys [1]: [d_date_sk#94] +Join type: Inner +Join condition: None + +(111) Project [codegen id : 6] +Output [2]: [ws_quantity#91 AS quantity#95, ws_list_price#92 AS list_price#96] +Input [4]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93, d_date_sk#94] + +(112) Union + +(113) HashAggregate [codegen id : 7] +Input [2]: [quantity#83, list_price#84] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#83 as decimal(10,0)) * list_price#84))] +Aggregate Attributes [2]: [sum#97, count#98] +Results [2]: [sum#99, count#100] + +(114) Exchange +Input [2]: [sum#99, count#100] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(115) HashAggregate [codegen id : 8] +Input [2]: [sum#99, count#100] +Keys: [] +Functions [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))] +Aggregate Attributes [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))#101] +Results [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))#101 AS average_sales#102] + +Subquery:2 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#41, [id=#42] +* Project (119) ++- * Filter (118) + +- * ColumnarToRow (117) + +- Scan parquet spark_catalog.default.date_dim (116) + + +(116) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(117) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] + +(118) Filter [codegen id : 1] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] +Condition : (((((isnotnull(d_year#104) AND isnotnull(d_moy#105)) AND isnotnull(d_dom#106)) AND (d_year#104 = 2000)) AND (d_moy#105 = 12)) AND (d_dom#106 = 11)) + +(119) Project [codegen id : 1] +Output [1]: [d_week_seq#103] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] + +Subquery:3 Hosting operator id = 93 Hosting Expression = ReusedSubquery Subquery scalar-subquery#54, [id=#55] + +Subquery:4 Hosting operator id = 85 Hosting Expression = Subquery scalar-subquery#66, [id=#67] +* Project (123) ++- * Filter (122) + +- * ColumnarToRow (121) + +- Scan parquet spark_catalog.default.date_dim (120) + + +(120) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(121) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] + +(122) Filter [codegen id : 1] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] +Condition : (((((isnotnull(d_year#108) AND isnotnull(d_moy#109)) AND isnotnull(d_dom#110)) AND (d_year#108 = 1999)) AND (d_moy#109 = 12)) AND (d_dom#110 = 11)) + +(123) Project [codegen id : 1] +Output [1]: [d_week_seq#107] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b9a845cb11 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_datafusion/simplified.txt @@ -0,0 +1,191 @@ +TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + WholeStageCodegen (52) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [sales] + Subquery #2 + WholeStageCodegen (8) + HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #1 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (10) + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #4 + WholeStageCodegen (6) + HashAggregate [brand_id,class_id,category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (51) + Filter [sales] + ReusedSubquery [average_sales] #2 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #14 + WholeStageCodegen (50) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (49) + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #3 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..bae177c32b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_iceberg_compat/explain.txt @@ -0,0 +1,724 @@ +== Physical Plan == +TakeOrderedAndProject (96) ++- * BroadcastHashJoin Inner BuildRight (95) + :- * Filter (74) + : +- * HashAggregate (73) + : +- Exchange (72) + : +- * HashAggregate (71) + : +- * Project (70) + : +- * BroadcastHashJoin Inner BuildRight (69) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * BroadcastHashJoin LeftSemi BuildRight (55) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (54) + : : : +- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.item (4) + : : : +- BroadcastExchange (51) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (50) + : : : :- * HashAggregate (39) + : : : : +- Exchange (38) + : : : : +- * HashAggregate (37) + : : : : +- * Project (36) + : : : : +- * BroadcastHashJoin Inner BuildRight (35) + : : : : :- * Project (33) + : : : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : : : :- * Filter (9) + : : : : : : +- * ColumnarToRow (8) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (7) + : : : : : +- BroadcastExchange (31) + : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : : : : :- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet spark_catalog.default.item (10) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Project (28) + : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : :- * Project (21) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : :- * Filter (15) + : : : : : : : +- * ColumnarToRow (14) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : +- BroadcastExchange (19) + : : : : : : +- * Filter (18) + : : : : : : +- * ColumnarToRow (17) + : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : +- BroadcastExchange (26) + : : : : : +- * Project (25) + : : : : : +- * Filter (24) + : : : : : +- * ColumnarToRow (23) + : : : : : +- Scan parquet spark_catalog.default.date_dim (22) + : : : : +- ReusedExchange (34) + : : : +- BroadcastExchange (49) + : : : +- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (45) + : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : :- * Filter (42) + : : : : : +- * ColumnarToRow (41) + : : : : : +- Scan parquet spark_catalog.default.web_sales (40) + : : : : +- ReusedExchange (43) + : : : +- ReusedExchange (46) + : : +- BroadcastExchange (61) + : : +- * BroadcastHashJoin LeftSemi BuildRight (60) + : : :- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet spark_catalog.default.item (56) + : : +- ReusedExchange (59) + : +- BroadcastExchange (68) + : +- * Project (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet spark_catalog.default.date_dim (64) + +- BroadcastExchange (94) + +- * Filter (93) + +- * HashAggregate (92) + +- Exchange (91) + +- * HashAggregate (90) + +- * Project (89) + +- * BroadcastHashJoin Inner BuildRight (88) + :- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * BroadcastHashJoin LeftSemi BuildRight (79) + : : :- * Filter (77) + : : : +- * ColumnarToRow (76) + : : : +- Scan parquet spark_catalog.default.store_sales (75) + : : +- ReusedExchange (78) + : +- ReusedExchange (80) + +- BroadcastExchange (87) + +- * Project (86) + +- * Filter (85) + +- * ColumnarToRow (84) + +- Scan parquet spark_catalog.default.date_dim (83) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] + +(9) Filter [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(10) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(12) Filter [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] + +(15) Filter [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(16) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(18) Filter [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#15] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 3] +Output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] + +(24) Filter [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(25) Project [codegen id : 2] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_year#22] + +(26) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(27) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 3] +Output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] + +(29) BroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=3] + +(30) BroadcastHashJoin [codegen id : 4] +Left keys [6]: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)] +Right keys [6]: [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)] +Join type: LeftSemi +Join condition: None + +(31) BroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#9] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(34) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#23] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#23] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] + +(37) HashAggregate [codegen id : 6] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(38) Exchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(39) HashAggregate [codegen id : 10] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(40) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] + +(42) Filter [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(43) ReusedExchange [Reuses operator id: 19] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_item_sk#27] +Right keys [1]: [i_item_sk#29] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 9] +Output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(46) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#33] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 9] +Output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] + +(49) BroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=6] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [6]: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)] +Right keys [6]: [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)] +Join type: LeftSemi +Join condition: None + +(51) BroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=7] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#24, class_id#25, category_id#26] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#34] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] + +(54) BroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(55) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(56) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(58) Filter [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : (((isnotnull(i_item_sk#35) AND isnotnull(i_brand_id#36)) AND isnotnull(i_class_id#37)) AND isnotnull(i_category_id#38)) + +(59) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(60) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#35] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(61) BroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(62) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#35] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 25] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(64) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#39, d_week_seq#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 24] +Input [2]: [d_date_sk#39, d_week_seq#40] + +(66) Filter [codegen id : 24] +Input [2]: [d_date_sk#39, d_week_seq#40] +Condition : ((isnotnull(d_week_seq#40) AND (d_week_seq#40 = Subquery scalar-subquery#41, [id=#42])) AND isnotnull(d_date_sk#39)) + +(67) Project [codegen id : 24] +Output [1]: [d_date_sk#39] +Input [2]: [d_date_sk#39, d_week_seq#40] + +(68) BroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#39] +Join type: Inner +Join condition: None + +(70) Project [codegen id : 25] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] + +(71) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] +Aggregate Attributes [3]: [sum#43, isEmpty#44, count#45] +Results [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] + +(72) Exchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#49, count(1)#50] +Results [6]: [store AS channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#49 AS sales#52, count(1)#50 AS number_sales#53] + +(74) Filter [codegen id : 52] +Input [6]: [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53] +Condition : (isnotnull(sales#52) AND (cast(sales#52 as decimal(32,6)) > cast(Subquery scalar-subquery#54, [id=#55] as decimal(32,6)))) + +(75) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#59)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 50] +Input [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] + +(77) Filter [codegen id : 50] +Input [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] +Condition : isnotnull(ss_item_sk#56) + +(78) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(79) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#56] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(80) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] + +(81) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#56] +Right keys [1]: [i_item_sk#60] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 50] +Output [6]: [ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63] +Input [8]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] + +(83) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#64, d_week_seq#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(84) ColumnarToRow [codegen id : 49] +Input [2]: [d_date_sk#64, d_week_seq#65] + +(85) Filter [codegen id : 49] +Input [2]: [d_date_sk#64, d_week_seq#65] +Condition : ((isnotnull(d_week_seq#65) AND (d_week_seq#65 = Subquery scalar-subquery#66, [id=#67])) AND isnotnull(d_date_sk#64)) + +(86) Project [codegen id : 49] +Output [1]: [d_date_sk#64] +Input [2]: [d_date_sk#64, d_week_seq#65] + +(87) BroadcastExchange +Input [1]: [d_date_sk#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(88) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_sold_date_sk#59] +Right keys [1]: [d_date_sk#64] +Join type: Inner +Join condition: None + +(89) Project [codegen id : 50] +Output [5]: [ss_quantity#57, ss_list_price#58, i_brand_id#61, i_class_id#62, i_category_id#63] +Input [7]: [ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, d_date_sk#64] + +(90) HashAggregate [codegen id : 50] +Input [5]: [ss_quantity#57, ss_list_price#58, i_brand_id#61, i_class_id#62, i_category_id#63] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [partial_sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58)), partial_count(1)] +Aggregate Attributes [3]: [sum#68, isEmpty#69, count#70] +Results [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] + +(91) Exchange +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] +Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(92) HashAggregate [codegen id : 51] +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58))#74, count(1)#75] +Results [6]: [store AS channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58))#74 AS sales#77, count(1)#75 AS number_sales#78] + +(93) Filter [codegen id : 51] +Input [6]: [channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Condition : (isnotnull(sales#77) AND (cast(sales#77 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#54, [id=#55] as decimal(32,6)))) + +(94) BroadcastExchange +Input [6]: [channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=14] + +(95) BroadcastHashJoin [codegen id : 52] +Left keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Right keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Join type: Inner +Join condition: None + +(96) TakeOrderedAndProject +Input [12]: [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53, channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Arguments: 100, [i_brand_id#36 ASC NULLS FIRST, i_class_id#37 ASC NULLS FIRST, i_category_id#38 ASC NULLS FIRST], [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53, channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 74 Hosting Expression = Subquery scalar-subquery#54, [id=#55] +* HashAggregate (115) ++- Exchange (114) + +- * HashAggregate (113) + +- Union (112) + :- * Project (101) + : +- * BroadcastHashJoin Inner BuildRight (100) + : :- * ColumnarToRow (98) + : : +- Scan parquet spark_catalog.default.store_sales (97) + : +- ReusedExchange (99) + :- * Project (106) + : +- * BroadcastHashJoin Inner BuildRight (105) + : :- * ColumnarToRow (103) + : : +- Scan parquet spark_catalog.default.catalog_sales (102) + : +- ReusedExchange (104) + +- * Project (111) + +- * BroadcastHashJoin Inner BuildRight (110) + :- * ColumnarToRow (108) + : +- Scan parquet spark_catalog.default.web_sales (107) + +- ReusedExchange (109) + + +(97) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#81)] +ReadSchema: struct + +(98) ColumnarToRow [codegen id : 2] +Input [3]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81] + +(99) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#82] + +(100) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#81] +Right keys [1]: [d_date_sk#82] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 2] +Output [2]: [ss_quantity#79 AS quantity#83, ss_list_price#80 AS list_price#84] +Input [4]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81, d_date_sk#82] + +(102) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#87)] +ReadSchema: struct + +(103) ColumnarToRow [codegen id : 4] +Input [3]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87] + +(104) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#88] + +(105) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#87] +Right keys [1]: [d_date_sk#88] +Join type: Inner +Join condition: None + +(106) Project [codegen id : 4] +Output [2]: [cs_quantity#85 AS quantity#89, cs_list_price#86 AS list_price#90] +Input [4]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87, d_date_sk#88] + +(107) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#93)] +ReadSchema: struct + +(108) ColumnarToRow [codegen id : 6] +Input [3]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93] + +(109) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#94] + +(110) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#93] +Right keys [1]: [d_date_sk#94] +Join type: Inner +Join condition: None + +(111) Project [codegen id : 6] +Output [2]: [ws_quantity#91 AS quantity#95, ws_list_price#92 AS list_price#96] +Input [4]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93, d_date_sk#94] + +(112) Union + +(113) HashAggregate [codegen id : 7] +Input [2]: [quantity#83, list_price#84] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#83 as decimal(10,0)) * list_price#84))] +Aggregate Attributes [2]: [sum#97, count#98] +Results [2]: [sum#99, count#100] + +(114) Exchange +Input [2]: [sum#99, count#100] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(115) HashAggregate [codegen id : 8] +Input [2]: [sum#99, count#100] +Keys: [] +Functions [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))] +Aggregate Attributes [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))#101] +Results [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))#101 AS average_sales#102] + +Subquery:2 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#41, [id=#42] +* Project (119) ++- * Filter (118) + +- * ColumnarToRow (117) + +- Scan parquet spark_catalog.default.date_dim (116) + + +(116) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(117) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] + +(118) Filter [codegen id : 1] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] +Condition : (((((isnotnull(d_year#104) AND isnotnull(d_moy#105)) AND isnotnull(d_dom#106)) AND (d_year#104 = 2000)) AND (d_moy#105 = 12)) AND (d_dom#106 = 11)) + +(119) Project [codegen id : 1] +Output [1]: [d_week_seq#103] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] + +Subquery:3 Hosting operator id = 93 Hosting Expression = ReusedSubquery Subquery scalar-subquery#54, [id=#55] + +Subquery:4 Hosting operator id = 85 Hosting Expression = Subquery scalar-subquery#66, [id=#67] +* Project (123) ++- * Filter (122) + +- * ColumnarToRow (121) + +- Scan parquet spark_catalog.default.date_dim (120) + + +(120) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(121) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] + +(122) Filter [codegen id : 1] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] +Condition : (((((isnotnull(d_year#108) AND isnotnull(d_moy#109)) AND isnotnull(d_dom#110)) AND (d_year#108 = 1999)) AND (d_moy#109 = 12)) AND (d_dom#110 = 11)) + +(123) Project [codegen id : 1] +Output [1]: [d_week_seq#107] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..b9a845cb11 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.native_iceberg_compat/simplified.txt @@ -0,0 +1,191 @@ +TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + WholeStageCodegen (52) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [sales] + Subquery #2 + WholeStageCodegen (8) + HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #1 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (10) + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #4 + WholeStageCodegen (6) + HashAggregate [brand_id,class_id,category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (51) + Filter [sales] + ReusedSubquery [average_sales] #2 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #14 + WholeStageCodegen (50) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (49) + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #3 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_datafusion/explain.txt new file mode 100644 index 0000000000..397cbe07ce --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_datafusion/explain.txt @@ -0,0 +1,154 @@ +== Physical Plan == +TakeOrderedAndProject (26) ++- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (15) + : +- * BroadcastHashJoin Inner BuildRight (14) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.customer (4) + : +- BroadcastExchange (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet spark_catalog.default.customer_address (10) + +- BroadcastExchange (20) + +- * Project (19) + +- * Filter (18) + +- * ColumnarToRow (17) + +- Scan parquet spark_catalog.default.date_dim (16) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_bill_customer_sk#1) + +(4) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_customer_sk#4) AND isnotnull(c_current_addr_sk#5)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [3]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5] +Input [5]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3, c_customer_sk#4, c_current_addr_sk#5] + +(10) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Condition : isnotnull(ca_address_sk#6) + +(13) BroadcastExchange +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#6] +Join type: Inner +Join condition: ((substr(ca_zip#8, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#7 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00)) + +(15) Project [codegen id : 4] +Output [3]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8] +Input [6]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5, ca_address_sk#6, ca_state#7, ca_zip#8] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Condition : ((((isnotnull(d_qoy#11) AND isnotnull(d_year#10)) AND (d_qoy#11 = 2)) AND (d_year#10 = 2001)) AND isnotnull(d_date_sk#9)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#3] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [2]: [cs_sales_price#2, ca_zip#8] +Input [4]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8, d_date_sk#9] + +(23) HashAggregate [codegen id : 4] +Input [2]: [cs_sales_price#2, ca_zip#8] +Keys [1]: [ca_zip#8] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [2]: [ca_zip#8, sum#13] + +(24) Exchange +Input [2]: [ca_zip#8, sum#13] +Arguments: hashpartitioning(ca_zip#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [2]: [ca_zip#8, sum#13] +Keys [1]: [ca_zip#8] +Functions [1]: [sum(UnscaledValue(cs_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#2))#14] +Results [2]: [ca_zip#8, MakeDecimal(sum(UnscaledValue(cs_sales_price#2))#14,17,2) AS sum(cs_sales_price)#15] + +(26) TakeOrderedAndProject +Input [2]: [ca_zip#8, sum(cs_sales_price)#15] +Arguments: 100, [ca_zip#8 ASC NULLS FIRST], [ca_zip#8, sum(cs_sales_price)#15] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_datafusion/simplified.txt new file mode 100644 index 0000000000..1509f18b9d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_datafusion/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] + WholeStageCodegen (5) + HashAggregate [ca_zip,sum] [sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price),sum] + InputAdapter + Exchange [ca_zip] #1 + WholeStageCodegen (4) + HashAggregate [ca_zip,cs_sales_price] [sum,sum] + Project [cs_sales_price,ca_zip] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sales_price,cs_sold_date_sk,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_zip,ca_state,cs_sales_price] + Project [cs_sales_price,cs_sold_date_sk,c_current_addr_sk] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Filter [cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..397cbe07ce --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_iceberg_compat/explain.txt @@ -0,0 +1,154 @@ +== Physical Plan == +TakeOrderedAndProject (26) ++- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (15) + : +- * BroadcastHashJoin Inner BuildRight (14) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.customer (4) + : +- BroadcastExchange (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet spark_catalog.default.customer_address (10) + +- BroadcastExchange (20) + +- * Project (19) + +- * Filter (18) + +- * ColumnarToRow (17) + +- Scan parquet spark_catalog.default.date_dim (16) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_bill_customer_sk#1) + +(4) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_customer_sk#4) AND isnotnull(c_current_addr_sk#5)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [3]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5] +Input [5]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3, c_customer_sk#4, c_current_addr_sk#5] + +(10) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Condition : isnotnull(ca_address_sk#6) + +(13) BroadcastExchange +Input [3]: [ca_address_sk#6, ca_state#7, ca_zip#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#6] +Join type: Inner +Join condition: ((substr(ca_zip#8, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#7 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00)) + +(15) Project [codegen id : 4] +Output [3]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8] +Input [6]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#5, ca_address_sk#6, ca_state#7, ca_zip#8] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] +Condition : ((((isnotnull(d_qoy#11) AND isnotnull(d_year#10)) AND (d_qoy#11 = 2)) AND (d_year#10 = 2001)) AND isnotnull(d_date_sk#9)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_qoy#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#3] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [2]: [cs_sales_price#2, ca_zip#8] +Input [4]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#8, d_date_sk#9] + +(23) HashAggregate [codegen id : 4] +Input [2]: [cs_sales_price#2, ca_zip#8] +Keys [1]: [ca_zip#8] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [2]: [ca_zip#8, sum#13] + +(24) Exchange +Input [2]: [ca_zip#8, sum#13] +Arguments: hashpartitioning(ca_zip#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [2]: [ca_zip#8, sum#13] +Keys [1]: [ca_zip#8] +Functions [1]: [sum(UnscaledValue(cs_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#2))#14] +Results [2]: [ca_zip#8, MakeDecimal(sum(UnscaledValue(cs_sales_price#2))#14,17,2) AS sum(cs_sales_price)#15] + +(26) TakeOrderedAndProject +Input [2]: [ca_zip#8, sum(cs_sales_price)#15] +Arguments: 100, [ca_zip#8 ASC NULLS FIRST], [ca_zip#8, sum(cs_sales_price)#15] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..1509f18b9d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.native_iceberg_compat/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] + WholeStageCodegen (5) + HashAggregate [ca_zip,sum] [sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price),sum] + InputAdapter + Exchange [ca_zip] #1 + WholeStageCodegen (4) + HashAggregate [ca_zip,cs_sales_price] [sum,sum] + Project [cs_sales_price,ca_zip] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sales_price,cs_sold_date_sk,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_zip,ca_state,cs_sales_price] + Project [cs_sales_price,cs_sold_date_sk,c_current_addr_sk] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Filter [cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_datafusion/explain.txt new file mode 100644 index 0000000000..70a5c5bed4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_datafusion/explain.txt @@ -0,0 +1,260 @@ +== Physical Plan == +* HashAggregate (45) ++- Exchange (44) + +- * HashAggregate (43) + +- * HashAggregate (42) + +- * HashAggregate (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * SortMergeJoin LeftAnti (19) + : : : :- * Project (13) + : : : : +- * SortMergeJoin LeftSemi (12) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : +- * Sort (11) + : : : : +- Exchange (10) + : : : : +- * Project (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (7) + : : : +- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet spark_catalog.default.catalog_returns (14) + : : +- BroadcastExchange (24) + : : +- * Project (23) + : : +- * Filter (22) + : : +- * ColumnarToRow (21) + : : +- Scan parquet spark_catalog.default.date_dim (20) + : +- BroadcastExchange (31) + : +- * Project (30) + : +- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet spark_catalog.default.customer_address (27) + +- BroadcastExchange (38) + +- * Project (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.call_center (34) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] + +(3) Filter [codegen id : 1] +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Condition : ((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3)) + +(4) Project [codegen id : 1] +Output [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] + +(5) Exchange +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: [cs_order_number#5 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] + +(9) Project [codegen id : 3] +Output [2]: [cs_warehouse_sk#9, cs_order_number#10] +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] + +(10) Exchange +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: hashpartitioning(cs_order_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: [cs_order_number#10 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 5] +Left keys [1]: [cs_order_number#5] +Right keys [1]: [cs_order_number#10] +Join type: LeftSemi +Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#9) + +(13) Project [codegen id : 5] +Output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(14) Scan parquet spark_catalog.default.catalog_returns +Output [2]: [cr_order_number#12, cr_returned_date_sk#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 6] +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] + +(16) Project [codegen id : 6] +Output [1]: [cr_order_number#12] +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] + +(17) Exchange +Input [1]: [cr_order_number#12] +Arguments: hashpartitioning(cr_order_number#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 7] +Input [1]: [cr_order_number#12] +Arguments: [cr_order_number#12 ASC NULLS FIRST], false, 0 + +(19) SortMergeJoin [codegen id : 11] +Left keys [1]: [cs_order_number#5] +Right keys [1]: [cr_order_number#12] +Join type: LeftAnti +Join condition: None + +(20) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#14, d_date#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#14, d_date#15] + +(22) Filter [codegen id : 8] +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 2002-02-01)) AND (d_date#15 <= 2002-04-02)) AND isnotnull(d_date_sk#14)) + +(23) Project [codegen id : 8] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_date#15] + +(24) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 11] +Output [5]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#14] + +(27) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#16, ca_state#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 9] +Input [2]: [ca_address_sk#16, ca_state#17] + +(29) Filter [codegen id : 9] +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = GA)) AND isnotnull(ca_address_sk#16)) + +(30) Project [codegen id : 9] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_state#17] + +(31) BroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#16] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 11] +Output [4]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#16] + +(34) Scan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#18, cc_county#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 10] +Input [2]: [cc_call_center_sk#18, cc_county#19] + +(36) Filter [codegen id : 10] +Input [2]: [cc_call_center_sk#18, cc_county#19] +Condition : ((isnotnull(cc_county#19) AND (cc_county#19 = Williamson County)) AND isnotnull(cc_call_center_sk#18)) + +(37) Project [codegen id : 10] +Output [1]: [cc_call_center_sk#18] +Input [2]: [cc_call_center_sk#18, cc_county#19] + +(38) BroadcastExchange +Input [1]: [cc_call_center_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_call_center_sk#3] +Right keys [1]: [cc_call_center_sk#18] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 11] +Output [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [5]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#18] + +(41) HashAggregate [codegen id : 11] +Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Keys [1]: [cs_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21] +Results [3]: [cs_order_number#5, sum#22, sum#23] + +(42) HashAggregate [codegen id : 11] +Input [3]: [cs_order_number#5, sum#22, sum#23] +Keys [1]: [cs_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21] +Results [3]: [cs_order_number#5, sum#22, sum#23] + +(43) HashAggregate [codegen id : 11] +Input [3]: [cs_order_number#5, sum#22, sum#23] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24] +Results [3]: [sum#22, sum#23, count#25] + +(44) Exchange +Input [3]: [sum#22, sum#23, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 12] +Input [3]: [sum#22, sum#23, count#25] +Keys: [] +Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24] +Results [3]: [count(cs_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#21,17,2) AS total net profit #28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_datafusion/simplified.txt new file mode 100644 index 0000000000..0ae1584bd9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_datafusion/simplified.txt @@ -0,0 +1,74 @@ +WholeStageCodegen (12) + HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (11) + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,count] + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] + HashAggregate [cs_order_number,cs_ext_ship_cost,cs_net_profit] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] + Project [cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_ship_addr_sk,ca_address_sk] + Project [cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + SortMergeJoin [cs_order_number,cr_order_number] + InputAdapter + WholeStageCodegen (5) + Project [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + SortMergeJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] + InputAdapter + WholeStageCodegen (2) + Sort [cs_order_number] + InputAdapter + Exchange [cs_order_number] #2 + WholeStageCodegen (1) + Project [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [cs_order_number] + InputAdapter + Exchange [cs_order_number] #3 + WholeStageCodegen (3) + Project [cs_warehouse_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_warehouse_sk,cs_order_number,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (7) + Sort [cr_order_number] + InputAdapter + Exchange [cr_order_number] #4 + WholeStageCodegen (6) + Project [cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_order_number,cr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + Project [cc_call_center_sk] + Filter [cc_county,cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_county] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..70a5c5bed4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_iceberg_compat/explain.txt @@ -0,0 +1,260 @@ +== Physical Plan == +* HashAggregate (45) ++- Exchange (44) + +- * HashAggregate (43) + +- * HashAggregate (42) + +- * HashAggregate (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * SortMergeJoin LeftAnti (19) + : : : :- * Project (13) + : : : : +- * SortMergeJoin LeftSemi (12) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : +- * Sort (11) + : : : : +- Exchange (10) + : : : : +- * Project (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (7) + : : : +- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet spark_catalog.default.catalog_returns (14) + : : +- BroadcastExchange (24) + : : +- * Project (23) + : : +- * Filter (22) + : : +- * ColumnarToRow (21) + : : +- Scan parquet spark_catalog.default.date_dim (20) + : +- BroadcastExchange (31) + : +- * Project (30) + : +- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet spark_catalog.default.customer_address (27) + +- BroadcastExchange (38) + +- * Project (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.call_center (34) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] + +(3) Filter [codegen id : 1] +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] +Condition : ((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3)) + +(4) Project [codegen id : 1] +Output [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8] + +(5) Exchange +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Arguments: [cs_order_number#5 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] + +(9) Project [codegen id : 3] +Output [2]: [cs_warehouse_sk#9, cs_order_number#10] +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] + +(10) Exchange +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: hashpartitioning(cs_order_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: [cs_order_number#10 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 5] +Left keys [1]: [cs_order_number#5] +Right keys [1]: [cs_order_number#10] +Join type: LeftSemi +Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#9) + +(13) Project [codegen id : 5] +Output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(14) Scan parquet spark_catalog.default.catalog_returns +Output [2]: [cr_order_number#12, cr_returned_date_sk#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 6] +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] + +(16) Project [codegen id : 6] +Output [1]: [cr_order_number#12] +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] + +(17) Exchange +Input [1]: [cr_order_number#12] +Arguments: hashpartitioning(cr_order_number#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 7] +Input [1]: [cr_order_number#12] +Arguments: [cr_order_number#12 ASC NULLS FIRST], false, 0 + +(19) SortMergeJoin [codegen id : 11] +Left keys [1]: [cs_order_number#5] +Right keys [1]: [cr_order_number#12] +Join type: LeftAnti +Join condition: None + +(20) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#14, d_date#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#14, d_date#15] + +(22) Filter [codegen id : 8] +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 2002-02-01)) AND (d_date#15 <= 2002-04-02)) AND isnotnull(d_date_sk#14)) + +(23) Project [codegen id : 8] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_date#15] + +(24) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 11] +Output [5]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#14] + +(27) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#16, ca_state#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 9] +Input [2]: [ca_address_sk#16, ca_state#17] + +(29) Filter [codegen id : 9] +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = GA)) AND isnotnull(ca_address_sk#16)) + +(30) Project [codegen id : 9] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_state#17] + +(31) BroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#16] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 11] +Output [4]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#16] + +(34) Scan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#18, cc_county#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 10] +Input [2]: [cc_call_center_sk#18, cc_county#19] + +(36) Filter [codegen id : 10] +Input [2]: [cc_call_center_sk#18, cc_county#19] +Condition : ((isnotnull(cc_county#19) AND (cc_county#19 = Williamson County)) AND isnotnull(cc_call_center_sk#18)) + +(37) Project [codegen id : 10] +Output [1]: [cc_call_center_sk#18] +Input [2]: [cc_call_center_sk#18, cc_county#19] + +(38) BroadcastExchange +Input [1]: [cc_call_center_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_call_center_sk#3] +Right keys [1]: [cc_call_center_sk#18] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 11] +Output [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [5]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#18] + +(41) HashAggregate [codegen id : 11] +Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Keys [1]: [cs_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21] +Results [3]: [cs_order_number#5, sum#22, sum#23] + +(42) HashAggregate [codegen id : 11] +Input [3]: [cs_order_number#5, sum#22, sum#23] +Keys [1]: [cs_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21] +Results [3]: [cs_order_number#5, sum#22, sum#23] + +(43) HashAggregate [codegen id : 11] +Input [3]: [cs_order_number#5, sum#22, sum#23] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24] +Results [3]: [sum#22, sum#23, count#25] + +(44) Exchange +Input [3]: [sum#22, sum#23, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 12] +Input [3]: [sum#22, sum#23, count#25] +Keys: [] +Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24] +Results [3]: [count(cs_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#21,17,2) AS total net profit #28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..0ae1584bd9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.native_iceberg_compat/simplified.txt @@ -0,0 +1,74 @@ +WholeStageCodegen (12) + HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (11) + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,count] + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] + HashAggregate [cs_order_number,cs_ext_ship_cost,cs_net_profit] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] + Project [cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_ship_addr_sk,ca_address_sk] + Project [cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + SortMergeJoin [cs_order_number,cr_order_number] + InputAdapter + WholeStageCodegen (5) + Project [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + SortMergeJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] + InputAdapter + WholeStageCodegen (2) + Sort [cs_order_number] + InputAdapter + Exchange [cs_order_number] #2 + WholeStageCodegen (1) + Project [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [cs_order_number] + InputAdapter + Exchange [cs_order_number] #3 + WholeStageCodegen (3) + Project [cs_warehouse_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_warehouse_sk,cs_order_number,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (7) + Sort [cr_order_number] + InputAdapter + Exchange [cr_order_number] #4 + WholeStageCodegen (6) + Project [cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_order_number,cr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + Project [cc_call_center_sk] + Filter [cc_county,cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_county] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_datafusion/explain.txt new file mode 100644 index 0000000000..f5b068a455 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_datafusion/explain.txt @@ -0,0 +1,279 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet spark_catalog.default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet spark_catalog.default.date_dim (23) + : : +- ReusedExchange (30) + : +- BroadcastExchange (36) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet spark_catalog.default.store (33) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet spark_catalog.default.item (39) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#11)] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(7) BroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] +Right keys [3]: [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(10) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(12) Filter [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#8, sr_item_sk#7] +Right keys [2]: [cs_bill_customer_sk#12, cs_item_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(16) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#16, d_quarter_name#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#16, d_quarter_name#17] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#16, d_quarter_name#17] +Condition : ((isnotnull(d_quarter_name#17) AND (d_quarter_name#17 = 2001Q1)) AND isnotnull(d_date_sk#16)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#16] +Input [2]: [d_date_sk#16, d_quarter_name#17] + +(20) BroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#16] + +(23) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#18, d_quarter_name#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#18, d_quarter_name#19] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#18, d_quarter_name#19] +Condition : (d_quarter_name#19 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#18)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_quarter_name#19] + +(27) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#11] +Right keys [1]: [d_date_sk#18] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#18] + +(30) ReusedExchange [Reuses operator id: 27] +Output [1]: [d_date_sk#20] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#20] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 8] +Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#20] + +(33) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#21, s_state#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [2]: [s_store_sk#21, s_state#22] + +(35) Filter [codegen id : 6] +Input [2]: [s_store_sk#21, s_state#22] +Condition : isnotnull(s_store_sk#21) + +(36) BroadcastExchange +Input [2]: [s_store_sk#21, s_state#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(37) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#21] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 8] +Output [5]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_sk#21, s_state#22] + +(39) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] + +(41) Filter [codegen id : 7] +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Condition : isnotnull(i_item_sk#23) + +(42) BroadcastExchange +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(43) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#23] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 8] +Output [6]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] +Input [8]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_sk#23, i_item_id#24, i_item_desc#25] + +(45) HashAggregate [codegen id : 8] +Input [6]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#10), partial_avg(sr_return_quantity#10), partial_stddev_samp(cast(sr_return_quantity#10 as double)), partial_count(cs_quantity#14), partial_avg(cs_quantity#14), partial_stddev_samp(cast(cs_quantity#14 as double))] +Aggregate Attributes [18]: [count#26, sum#27, count#28, n#29, avg#30, m2#31, count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43] +Results [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] + +(46) Exchange +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] +Arguments: hashpartitioning(i_item_id#24, i_item_desc#25, s_state#22, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(47) HashAggregate [codegen id : 9] +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#10), avg(sr_return_quantity#10), stddev_samp(cast(sr_return_quantity#10 as double)), count(cs_quantity#14), avg(cs_quantity#14), stddev_samp(cast(cs_quantity#14 as double))] +Aggregate Attributes [9]: [count(ss_quantity#5)#62, avg(ss_quantity#5)#63, stddev_samp(cast(ss_quantity#5 as double))#64, count(sr_return_quantity#10)#65, avg(sr_return_quantity#10)#66, stddev_samp(cast(sr_return_quantity#10 as double))#67, count(cs_quantity#14)#68, avg(cs_quantity#14)#69, stddev_samp(cast(cs_quantity#14 as double))#70] +Results [15]: [i_item_id#24, i_item_desc#25, s_state#22, count(ss_quantity#5)#62 AS store_sales_quantitycount#71, avg(ss_quantity#5)#63 AS store_sales_quantityave#72, stddev_samp(cast(ss_quantity#5 as double))#64 AS store_sales_quantitystdev#73, (stddev_samp(cast(ss_quantity#5 as double))#64 / avg(ss_quantity#5)#63) AS store_sales_quantitycov#74, count(sr_return_quantity#10)#65 AS as_store_returns_quantitycount#75, avg(sr_return_quantity#10)#66 AS as_store_returns_quantityave#76, stddev_samp(cast(sr_return_quantity#10 as double))#67 AS as_store_returns_quantitystdev#77, (stddev_samp(cast(sr_return_quantity#10 as double))#67 / avg(sr_return_quantity#10)#66) AS store_returns_quantitycov#78, count(cs_quantity#14)#68 AS catalog_sales_quantitycount#79, avg(cs_quantity#14)#69 AS catalog_sales_quantityave#80, (stddev_samp(cast(cs_quantity#14 as double))#70 / avg(cs_quantity#14)#69) AS catalog_sales_quantitystdev#81, (stddev_samp(cast(cs_quantity#14 as double))#70 / avg(cs_quantity#14)#69) AS catalog_sales_quantitycov#82] + +(48) TakeOrderedAndProject +Input [15]: [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#71, store_sales_quantityave#72, store_sales_quantitystdev#73, store_sales_quantitycov#74, as_store_returns_quantitycount#75, as_store_returns_quantityave#76, as_store_returns_quantitystdev#77, store_returns_quantitycov#78, catalog_sales_quantitycount#79, catalog_sales_quantityave#80, catalog_sales_quantitystdev#81, catalog_sales_quantitycov#82] +Arguments: 100, [i_item_id#24 ASC NULLS FIRST, i_item_desc#25 ASC NULLS FIRST, s_state#22 ASC NULLS FIRST], [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#71, store_sales_quantityave#72, store_sales_quantitystdev#73, store_sales_quantitycov#74, as_store_returns_quantitycount#75, as_store_returns_quantityave#76, as_store_returns_quantitystdev#77, store_returns_quantitycov#78, catalog_sales_quantitycount#79, catalog_sales_quantityave#80, catalog_sales_quantitystdev#81, catalog_sales_quantitycov#82] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a951b39350 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_datafusion/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov] + WholeStageCodegen (9) + HashAggregate [i_item_id,i_item_desc,s_state,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] [count(ss_quantity),avg(ss_quantity),stddev_samp(cast(ss_quantity as double)),count(sr_return_quantity),avg(sr_return_quantity),stddev_samp(cast(sr_return_quantity as double)),count(cs_quantity),avg(cs_quantity),stddev_samp(cast(cs_quantity as double)),store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] + InputAdapter + Exchange [i_item_id,i_item_desc,s_state] #1 + WholeStageCodegen (8) + HashAggregate [i_item_id,i_item_desc,s_state,ss_quantity,sr_return_quantity,cs_quantity] [count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] + Project [ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_id,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_quarter_name] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f5b068a455 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_iceberg_compat/explain.txt @@ -0,0 +1,279 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet spark_catalog.default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet spark_catalog.default.date_dim (23) + : : +- ReusedExchange (30) + : +- BroadcastExchange (36) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet spark_catalog.default.store (33) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet spark_catalog.default.item (39) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#11)] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(7) BroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] +Right keys [3]: [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(10) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(12) Filter [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#8, sr_item_sk#7] +Right keys [2]: [cs_bill_customer_sk#12, cs_item_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(16) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#16, d_quarter_name#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#16, d_quarter_name#17] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#16, d_quarter_name#17] +Condition : ((isnotnull(d_quarter_name#17) AND (d_quarter_name#17 = 2001Q1)) AND isnotnull(d_date_sk#16)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#16] +Input [2]: [d_date_sk#16, d_quarter_name#17] + +(20) BroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#16] + +(23) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#18, d_quarter_name#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#18, d_quarter_name#19] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#18, d_quarter_name#19] +Condition : (d_quarter_name#19 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#18)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_quarter_name#19] + +(27) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#11] +Right keys [1]: [d_date_sk#18] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#18] + +(30) ReusedExchange [Reuses operator id: 27] +Output [1]: [d_date_sk#20] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#20] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 8] +Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#20] + +(33) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#21, s_state#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [2]: [s_store_sk#21, s_state#22] + +(35) Filter [codegen id : 6] +Input [2]: [s_store_sk#21, s_state#22] +Condition : isnotnull(s_store_sk#21) + +(36) BroadcastExchange +Input [2]: [s_store_sk#21, s_state#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(37) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#21] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 8] +Output [5]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_sk#21, s_state#22] + +(39) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] + +(41) Filter [codegen id : 7] +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Condition : isnotnull(i_item_sk#23) + +(42) BroadcastExchange +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(43) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#23] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 8] +Output [6]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] +Input [8]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_sk#23, i_item_id#24, i_item_desc#25] + +(45) HashAggregate [codegen id : 8] +Input [6]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_state#22, i_item_id#24, i_item_desc#25] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#10), partial_avg(sr_return_quantity#10), partial_stddev_samp(cast(sr_return_quantity#10 as double)), partial_count(cs_quantity#14), partial_avg(cs_quantity#14), partial_stddev_samp(cast(cs_quantity#14 as double))] +Aggregate Attributes [18]: [count#26, sum#27, count#28, n#29, avg#30, m2#31, count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43] +Results [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] + +(46) Exchange +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] +Arguments: hashpartitioning(i_item_id#24, i_item_desc#25, s_state#22, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(47) HashAggregate [codegen id : 9] +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#10), avg(sr_return_quantity#10), stddev_samp(cast(sr_return_quantity#10 as double)), count(cs_quantity#14), avg(cs_quantity#14), stddev_samp(cast(cs_quantity#14 as double))] +Aggregate Attributes [9]: [count(ss_quantity#5)#62, avg(ss_quantity#5)#63, stddev_samp(cast(ss_quantity#5 as double))#64, count(sr_return_quantity#10)#65, avg(sr_return_quantity#10)#66, stddev_samp(cast(sr_return_quantity#10 as double))#67, count(cs_quantity#14)#68, avg(cs_quantity#14)#69, stddev_samp(cast(cs_quantity#14 as double))#70] +Results [15]: [i_item_id#24, i_item_desc#25, s_state#22, count(ss_quantity#5)#62 AS store_sales_quantitycount#71, avg(ss_quantity#5)#63 AS store_sales_quantityave#72, stddev_samp(cast(ss_quantity#5 as double))#64 AS store_sales_quantitystdev#73, (stddev_samp(cast(ss_quantity#5 as double))#64 / avg(ss_quantity#5)#63) AS store_sales_quantitycov#74, count(sr_return_quantity#10)#65 AS as_store_returns_quantitycount#75, avg(sr_return_quantity#10)#66 AS as_store_returns_quantityave#76, stddev_samp(cast(sr_return_quantity#10 as double))#67 AS as_store_returns_quantitystdev#77, (stddev_samp(cast(sr_return_quantity#10 as double))#67 / avg(sr_return_quantity#10)#66) AS store_returns_quantitycov#78, count(cs_quantity#14)#68 AS catalog_sales_quantitycount#79, avg(cs_quantity#14)#69 AS catalog_sales_quantityave#80, (stddev_samp(cast(cs_quantity#14 as double))#70 / avg(cs_quantity#14)#69) AS catalog_sales_quantitystdev#81, (stddev_samp(cast(cs_quantity#14 as double))#70 / avg(cs_quantity#14)#69) AS catalog_sales_quantitycov#82] + +(48) TakeOrderedAndProject +Input [15]: [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#71, store_sales_quantityave#72, store_sales_quantitystdev#73, store_sales_quantitycov#74, as_store_returns_quantitycount#75, as_store_returns_quantityave#76, as_store_returns_quantitystdev#77, store_returns_quantitycov#78, catalog_sales_quantitycount#79, catalog_sales_quantityave#80, catalog_sales_quantitystdev#81, catalog_sales_quantitycov#82] +Arguments: 100, [i_item_id#24 ASC NULLS FIRST, i_item_desc#25 ASC NULLS FIRST, s_state#22 ASC NULLS FIRST], [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#71, store_sales_quantityave#72, store_sales_quantitystdev#73, store_sales_quantitycov#74, as_store_returns_quantitycount#75, as_store_returns_quantityave#76, as_store_returns_quantitystdev#77, store_returns_quantitycov#78, catalog_sales_quantitycount#79, catalog_sales_quantityave#80, catalog_sales_quantitystdev#81, catalog_sales_quantitycov#82] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a951b39350 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.native_iceberg_compat/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov] + WholeStageCodegen (9) + HashAggregate [i_item_id,i_item_desc,s_state,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] [count(ss_quantity),avg(ss_quantity),stddev_samp(cast(ss_quantity as double)),count(sr_return_quantity),avg(sr_return_quantity),stddev_samp(cast(sr_return_quantity as double)),count(cs_quantity),avg(cs_quantity),stddev_samp(cast(cs_quantity as double)),store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] + InputAdapter + Exchange [i_item_id,i_item_desc,s_state] #1 + WholeStageCodegen (8) + HashAggregate [i_item_id,i_item_desc,s_state,ss_quantity,sr_return_quantity,cs_quantity] [count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] + Project [ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_id,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_quarter_name] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_datafusion/explain.txt new file mode 100644 index 0000000000..6e2c40bca4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_datafusion/explain.txt @@ -0,0 +1,271 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Expand (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (23) + : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet spark_catalog.default.customer (11) + : : : +- BroadcastExchange (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet spark_catalog.default.customer_demographics (18) + : : +- BroadcastExchange (27) + : : +- * Filter (26) + : : +- * ColumnarToRow (25) + : : +- Scan parquet spark_catalog.default.customer_address (24) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet spark_catalog.default.date_dim (30) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.item (37) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(3) Filter [codegen id : 7] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_education_status,Unknown ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = F)) AND (cd_education_status#12 = Unknown )) AND isnotnull(cd_demo_sk#10)) + +(7) Project [codegen id : 1] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(8) BroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 7] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(11) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [In(c_birth_month, [1,12,2,6,8,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(13) Filter [codegen id : 2] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Condition : (((c_birth_month#17 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#14)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_addr_sk#16)) + +(14) Project [codegen id : 2] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(15) BroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 7] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(18) Scan parquet spark_catalog.default.customer_demographics +Output [1]: [cd_demo_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [cd_demo_sk#19] + +(20) Filter [codegen id : 3] +Input [1]: [cd_demo_sk#19] +Condition : isnotnull(cd_demo_sk#19) + +(21) BroadcastExchange +Input [1]: [cd_demo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 7] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(24) Scan parquet spark_catalog.default.customer_address +Output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [IN,MS,ND,NM,OK,VA]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(26) Filter [codegen id : 4] +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#20)) + +(27) BroadcastExchange +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 7] +Output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(30) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#24, d_year#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#24, d_year#25] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#24, d_year#25] +Condition : ((isnotnull(d_year#25) AND (d_year#25 = 1998)) AND isnotnull(d_date_sk#24)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_year#25] + +(34) BroadcastExchange +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 7] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, d_date_sk#24] + +(37) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#26, i_item_id#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#26, i_item_id#27] + +(39) Filter [codegen id : 6] +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) + +(40) BroadcastExchange +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 7] +Output [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] + +(43) Expand [codegen id : 7] +Input [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Arguments: [[cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21, 0], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, null, 1], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, null, null, 3], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, null, null, null, 7], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, null, null, null, null, 15]], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] + +(44) HashAggregate [codegen id : 7] +Input [12]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [partial_avg(cast(cs_quantity#4 as decimal(12,2))), partial_avg(cast(cs_list_price#5 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#7 as decimal(12,2))), partial_avg(cast(cs_sales_price#6 as decimal(12,2))), partial_avg(cast(cs_net_profit#8 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [14]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46] +Results [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] + +(45) Exchange +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] +Arguments: hashpartitioning(i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(46) HashAggregate [codegen id : 8] +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [avg(cast(cs_quantity#4 as decimal(12,2))), avg(cast(cs_list_price#5 as decimal(12,2))), avg(cast(cs_coupon_amt#7 as decimal(12,2))), avg(cast(cs_sales_price#6 as decimal(12,2))), avg(cast(cs_net_profit#8 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [7]: [avg(cast(cs_quantity#4 as decimal(12,2)))#61, avg(cast(cs_list_price#5 as decimal(12,2)))#62, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#63, avg(cast(cs_sales_price#6 as decimal(12,2)))#64, avg(cast(cs_net_profit#8 as decimal(12,2)))#65, avg(cast(c_birth_year#18 as decimal(12,2)))#66, avg(cast(cd_dep_count#13 as decimal(12,2)))#67] +Results [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, avg(cast(cs_quantity#4 as decimal(12,2)))#61 AS agg1#68, avg(cast(cs_list_price#5 as decimal(12,2)))#62 AS agg2#69, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#63 AS agg3#70, avg(cast(cs_sales_price#6 as decimal(12,2)))#64 AS agg4#71, avg(cast(cs_net_profit#8 as decimal(12,2)))#65 AS agg5#72, avg(cast(c_birth_year#18 as decimal(12,2)))#66 AS agg6#73, avg(cast(cd_dep_count#13 as decimal(12,2)))#67 AS agg7#74] + +(47) TakeOrderedAndProject +Input [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#68, agg2#69, agg3#70, agg4#71, agg5#72, agg6#73, agg7#74] +Arguments: 100, [ca_country#29 ASC NULLS FIRST, ca_state#30 ASC NULLS FIRST, ca_county#31 ASC NULLS FIRST, i_item_id#28 ASC NULLS FIRST], [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#68, agg2#69, agg3#70, agg4#71, agg5#72, agg6#73, agg7#74] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bd9fc33e49 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_datafusion/simplified.txt @@ -0,0 +1,69 @@ +TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + WholeStageCodegen (8) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id] #1 + WholeStageCodegen (7) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Expand [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] + Project [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk,cd_dep_count] + Filter [cd_gender,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..6e2c40bca4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_iceberg_compat/explain.txt @@ -0,0 +1,271 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Expand (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (23) + : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet spark_catalog.default.customer (11) + : : : +- BroadcastExchange (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet spark_catalog.default.customer_demographics (18) + : : +- BroadcastExchange (27) + : : +- * Filter (26) + : : +- * ColumnarToRow (25) + : : +- Scan parquet spark_catalog.default.customer_address (24) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet spark_catalog.default.date_dim (30) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.item (37) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(3) Filter [codegen id : 7] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_education_status,Unknown ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = F)) AND (cd_education_status#12 = Unknown )) AND isnotnull(cd_demo_sk#10)) + +(7) Project [codegen id : 1] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(8) BroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 7] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(11) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [In(c_birth_month, [1,12,2,6,8,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(13) Filter [codegen id : 2] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Condition : (((c_birth_month#17 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#14)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_addr_sk#16)) + +(14) Project [codegen id : 2] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(15) BroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 7] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(18) Scan parquet spark_catalog.default.customer_demographics +Output [1]: [cd_demo_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [cd_demo_sk#19] + +(20) Filter [codegen id : 3] +Input [1]: [cd_demo_sk#19] +Condition : isnotnull(cd_demo_sk#19) + +(21) BroadcastExchange +Input [1]: [cd_demo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 7] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(24) Scan parquet spark_catalog.default.customer_address +Output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [IN,MS,ND,NM,OK,VA]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(26) Filter [codegen id : 4] +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#20)) + +(27) BroadcastExchange +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 7] +Output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(30) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#24, d_year#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#24, d_year#25] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#24, d_year#25] +Condition : ((isnotnull(d_year#25) AND (d_year#25 = 1998)) AND isnotnull(d_date_sk#24)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_year#25] + +(34) BroadcastExchange +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 7] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, d_date_sk#24] + +(37) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#26, i_item_id#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#26, i_item_id#27] + +(39) Filter [codegen id : 6] +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) + +(40) BroadcastExchange +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 7] +Output [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] + +(43) Expand [codegen id : 7] +Input [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Arguments: [[cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, ca_county#21, 0], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, ca_state#22, null, 1], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, ca_country#23, null, null, 3], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#27, null, null, null, 7], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, null, null, null, null, 15]], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] + +(44) HashAggregate [codegen id : 7] +Input [12]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [partial_avg(cast(cs_quantity#4 as decimal(12,2))), partial_avg(cast(cs_list_price#5 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#7 as decimal(12,2))), partial_avg(cast(cs_sales_price#6 as decimal(12,2))), partial_avg(cast(cs_net_profit#8 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [14]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46] +Results [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] + +(45) Exchange +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] +Arguments: hashpartitioning(i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(46) HashAggregate [codegen id : 8] +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [avg(cast(cs_quantity#4 as decimal(12,2))), avg(cast(cs_list_price#5 as decimal(12,2))), avg(cast(cs_coupon_amt#7 as decimal(12,2))), avg(cast(cs_sales_price#6 as decimal(12,2))), avg(cast(cs_net_profit#8 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [7]: [avg(cast(cs_quantity#4 as decimal(12,2)))#61, avg(cast(cs_list_price#5 as decimal(12,2)))#62, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#63, avg(cast(cs_sales_price#6 as decimal(12,2)))#64, avg(cast(cs_net_profit#8 as decimal(12,2)))#65, avg(cast(c_birth_year#18 as decimal(12,2)))#66, avg(cast(cd_dep_count#13 as decimal(12,2)))#67] +Results [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, avg(cast(cs_quantity#4 as decimal(12,2)))#61 AS agg1#68, avg(cast(cs_list_price#5 as decimal(12,2)))#62 AS agg2#69, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#63 AS agg3#70, avg(cast(cs_sales_price#6 as decimal(12,2)))#64 AS agg4#71, avg(cast(cs_net_profit#8 as decimal(12,2)))#65 AS agg5#72, avg(cast(c_birth_year#18 as decimal(12,2)))#66 AS agg6#73, avg(cast(cd_dep_count#13 as decimal(12,2)))#67 AS agg7#74] + +(47) TakeOrderedAndProject +Input [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#68, agg2#69, agg3#70, agg4#71, agg5#72, agg6#73, agg7#74] +Arguments: 100, [ca_country#29 ASC NULLS FIRST, ca_state#30 ASC NULLS FIRST, ca_county#31 ASC NULLS FIRST, i_item_id#28 ASC NULLS FIRST], [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#68, agg2#69, agg3#70, agg4#71, agg5#72, agg6#73, agg7#74] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bd9fc33e49 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.native_iceberg_compat/simplified.txt @@ -0,0 +1,69 @@ +TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + WholeStageCodegen (8) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id] #1 + WholeStageCodegen (7) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Expand [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] + Project [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk,cd_dep_count] + Filter [cd_gender,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_datafusion/explain.txt new file mode 100644 index 0000000000..2ee7d276e1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_datafusion/explain.txt @@ -0,0 +1,227 @@ +== Physical Plan == +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.date_dim (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet spark_catalog.default.store_sales (5) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.item (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet spark_catalog.default.customer (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet spark_catalog.default.customer_address (24) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet spark_catalog.default.store (30) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 6] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1998)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 6] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] + +(7) Filter [codegen id : 1] +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_item_sk#4) AND isnotnull(ss_customer_sk#5)) AND isnotnull(ss_store_sk#6)) + +(8) BroadcastExchange +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[4, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#8] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 6] +Output [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7] +Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] + +(11) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] + +(13) Filter [codegen id : 2] +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Condition : ((isnotnull(i_manager_id#14) AND (i_manager_id#14 = 8)) AND isnotnull(i_item_sk#9)) + +(14) Project [codegen id : 2] +Output [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] + +(15) BroadcastExchange +Input [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#9] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 6] +Output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(18) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#15, c_current_addr_sk#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] + +(20) Filter [codegen id : 3] +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_current_addr_sk#16)) + +(21) BroadcastExchange +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#5] +Right keys [1]: [c_customer_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 6] +Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16] +Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_customer_sk#15, c_current_addr_sk#16] + +(24) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_zip#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#17, ca_zip#18] + +(26) Filter [codegen id : 4] +Input [2]: [ca_address_sk#17, ca_zip#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_zip#18)) + +(27) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_zip#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 6] +Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18] +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16, ca_address_sk#17, ca_zip#18] + +(30) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#19, s_zip#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [s_store_sk#19, s_zip#20] + +(32) Filter [codegen id : 5] +Input [2]: [s_store_sk#19, s_zip#20] +Condition : (isnotnull(s_zip#20) AND isnotnull(s_store_sk#19)) + +(33) BroadcastExchange +Input [2]: [s_store_sk#19, s_zip#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#19] +Join type: Inner +Join condition: NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#20, 1, 5)) + +(35) Project [codegen id : 6] +Output [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18, s_store_sk#19, s_zip#20] + +(36) HashAggregate [codegen id : 6] +Input [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum#21] +Results [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] + +(37) Exchange +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] +Arguments: hashpartitioning(i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(38) HashAggregate [codegen id : 7] +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#23] +Results [5]: [i_brand_id#10 AS brand_id#24, i_brand#11 AS brand#25, i_manufact_id#12, i_manufact#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#23,17,2) AS ext_price#26] + +(39) TakeOrderedAndProject +Input [5]: [brand_id#24, brand#25, i_manufact_id#12, i_manufact#13, ext_price#26] +Arguments: 100, [ext_price#26 DESC NULLS LAST, brand#25 ASC NULLS FIRST, brand_id#24 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST, i_manufact#13 ASC NULLS FIRST], [brand_id#24, brand#25, i_manufact_id#12, i_manufact#13, ext_price#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_datafusion/simplified.txt new file mode 100644 index 0000000000..65d7e34ffe --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_datafusion/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact] + WholeStageCodegen (7) + HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 + WholeStageCodegen (6) + HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + BroadcastHashJoin [ss_store_sk,s_store_sk,ca_zip,s_zip] + Project [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_customer_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [s_zip,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_zip] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..2ee7d276e1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_iceberg_compat/explain.txt @@ -0,0 +1,227 @@ +== Physical Plan == +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.date_dim (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet spark_catalog.default.store_sales (5) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.item (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet spark_catalog.default.customer (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet spark_catalog.default.customer_address (24) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet spark_catalog.default.store (30) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 6] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1998)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 6] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] + +(7) Filter [codegen id : 1] +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_item_sk#4) AND isnotnull(ss_customer_sk#5)) AND isnotnull(ss_store_sk#6)) + +(8) BroadcastExchange +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[4, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#8] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 6] +Output [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7] +Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] + +(11) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] + +(13) Filter [codegen id : 2] +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Condition : ((isnotnull(i_manager_id#14) AND (i_manager_id#14 = 8)) AND isnotnull(i_item_sk#9)) + +(14) Project [codegen id : 2] +Output [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] + +(15) BroadcastExchange +Input [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#9] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 6] +Output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] + +(18) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#15, c_current_addr_sk#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] + +(20) Filter [codegen id : 3] +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_current_addr_sk#16)) + +(21) BroadcastExchange +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#5] +Right keys [1]: [c_customer_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 6] +Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16] +Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_customer_sk#15, c_current_addr_sk#16] + +(24) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_zip#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#17, ca_zip#18] + +(26) Filter [codegen id : 4] +Input [2]: [ca_address_sk#17, ca_zip#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_zip#18)) + +(27) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_zip#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 6] +Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18] +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16, ca_address_sk#17, ca_zip#18] + +(30) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#19, s_zip#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [s_store_sk#19, s_zip#20] + +(32) Filter [codegen id : 5] +Input [2]: [s_store_sk#19, s_zip#20] +Condition : (isnotnull(s_zip#20) AND isnotnull(s_store_sk#19)) + +(33) BroadcastExchange +Input [2]: [s_store_sk#19, s_zip#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#19] +Join type: Inner +Join condition: NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#20, 1, 5)) + +(35) Project [codegen id : 6] +Output [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18, s_store_sk#19, s_zip#20] + +(36) HashAggregate [codegen id : 6] +Input [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum#21] +Results [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] + +(37) Exchange +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] +Arguments: hashpartitioning(i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(38) HashAggregate [codegen id : 7] +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#23] +Results [5]: [i_brand_id#10 AS brand_id#24, i_brand#11 AS brand#25, i_manufact_id#12, i_manufact#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#23,17,2) AS ext_price#26] + +(39) TakeOrderedAndProject +Input [5]: [brand_id#24, brand#25, i_manufact_id#12, i_manufact#13, ext_price#26] +Arguments: 100, [ext_price#26 DESC NULLS LAST, brand#25 ASC NULLS FIRST, brand_id#24 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST, i_manufact#13 ASC NULLS FIRST], [brand_id#24, brand#25, i_manufact_id#12, i_manufact#13, ext_price#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..65d7e34ffe --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.native_iceberg_compat/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact] + WholeStageCodegen (7) + HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 + WholeStageCodegen (6) + HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + BroadcastHashJoin [ss_store_sk,s_store_sk,ca_zip,s_zip] + Project [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_customer_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [s_zip,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_zip] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_datafusion/explain.txt new file mode 100644 index 0000000000..72bee887d4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_datafusion/explain.txt @@ -0,0 +1,212 @@ +== Physical Plan == +* Sort (37) ++- Exchange (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * HashAggregate (16) + : : +- Exchange (15) + : : +- * HashAggregate (14) + : : +- * Project (13) + : : +- * BroadcastHashJoin Inner BuildRight (12) + : : :- Union (7) + : : : :- * Project (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : +- * Project (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.catalog_sales (4) + : : +- BroadcastExchange (11) + : : +- * Filter (10) + : : +- * ColumnarToRow (9) + : : +- Scan parquet spark_catalog.default.date_dim (8) + : +- BroadcastExchange (21) + : +- * Project (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.date_dim (17) + +- BroadcastExchange (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * HashAggregate (25) + : +- ReusedExchange (24) + +- BroadcastExchange (30) + +- * Project (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet spark_catalog.default.date_dim (26) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#2)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] + +(3) Project [codegen id : 1] +Output [2]: [ws_sold_date_sk#2 AS sold_date_sk#3, ws_ext_sales_price#1 AS sales_price#4] +Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] + +(4) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#6)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] + +(6) Project [codegen id : 2] +Output [2]: [cs_sold_date_sk#6 AS sold_date_sk#7, cs_ext_sales_price#5 AS sales_price#8] +Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] + +(7) Union + +(8) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(9) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] + +(10) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) + +(11) BroadcastExchange +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(12) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [sold_date_sk#3] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(13) Project [codegen id : 4] +Output [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_name#11] + +(14) HashAggregate [codegen id : 4] +Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Keys [1]: [d_week_seq#10] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] +Aggregate Attributes [7]: [sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Results [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] + +(15) Exchange +Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(16) HashAggregate [codegen id : 12] +Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39] + +(17) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_week_seq#40, d_year#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 5] +Input [2]: [d_week_seq#40, d_year#41] + +(19) Filter [codegen id : 5] +Input [2]: [d_week_seq#40, d_year#41] +Condition : ((isnotnull(d_year#41) AND (d_year#41 = 2001)) AND isnotnull(d_week_seq#40)) + +(20) Project [codegen id : 5] +Output [1]: [d_week_seq#40] +Input [2]: [d_week_seq#40, d_year#41] + +(21) BroadcastExchange +Input [1]: [d_week_seq#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#40] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 12] +Output [8]: [d_week_seq#10 AS d_week_seq1#42, sun_sales#33 AS sun_sales1#43, mon_sales#34 AS mon_sales1#44, tue_sales#35 AS tue_sales1#45, wed_sales#36 AS wed_sales1#46, thu_sales#37 AS thu_sales1#47, fri_sales#38 AS fri_sales1#48, sat_sales#39 AS sat_sales1#49] +Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#40] + +(24) ReusedExchange [Reuses operator id: 15] +Output [8]: [d_week_seq#10, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] + +(25) HashAggregate [codegen id : 11] +Input [8]: [d_week_seq#10, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_week_seq#57, d_year#58] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 10] +Input [2]: [d_week_seq#57, d_year#58] + +(28) Filter [codegen id : 10] +Input [2]: [d_week_seq#57, d_year#58] +Condition : ((isnotnull(d_year#58) AND (d_year#58 = 2002)) AND isnotnull(d_week_seq#57)) + +(29) Project [codegen id : 10] +Output [1]: [d_week_seq#57] +Input [2]: [d_week_seq#57, d_year#58] + +(30) BroadcastExchange +Input [1]: [d_week_seq#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(31) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#57] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 11] +Output [8]: [d_week_seq#10 AS d_week_seq2#59, sun_sales#33 AS sun_sales2#60, mon_sales#34 AS mon_sales2#61, tue_sales#35 AS tue_sales2#62, wed_sales#36 AS wed_sales2#63, thu_sales#37 AS thu_sales2#64, fri_sales#38 AS fri_sales2#65, sat_sales#39 AS sat_sales2#66] +Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#57] + +(33) BroadcastExchange +Input [8]: [d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] +Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [d_week_seq1#42] +Right keys [1]: [(d_week_seq2#59 - 53)] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 12] +Output [8]: [d_week_seq1#42, round((sun_sales1#43 / sun_sales2#60), 2) AS round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1#44 / mon_sales2#61), 2) AS round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1#45 / tue_sales2#62), 2) AS round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1#46 / wed_sales2#63), 2) AS round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1#47 / thu_sales2#64), 2) AS round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1#48 / fri_sales2#65), 2) AS round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1#49 / sat_sales2#66), 2) AS round((sat_sales1 / sat_sales2), 2)#73] +Input [16]: [d_week_seq1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] + +(36) Exchange +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1 / sat_sales2), 2)#73] +Arguments: rangepartitioning(d_week_seq1#42 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(37) Sort [codegen id : 13] +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1 / sat_sales2), 2)#73] +Arguments: [d_week_seq1#42 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_datafusion/simplified.txt new file mode 100644 index 0000000000..1b34ac798b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_datafusion/simplified.txt @@ -0,0 +1,59 @@ +WholeStageCodegen (13) + Sort [d_week_seq1] + InputAdapter + Exchange [d_week_seq1] #1 + WholeStageCodegen (12) + Project [d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] + BroadcastHashJoin [d_week_seq1,d_week_seq2] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [d_week_seq] #2 + WholeStageCodegen (4) + HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [sold_date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ws_sold_date_sk,ws_ext_sales_price] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_ext_sales_price,ws_sold_date_sk] + WholeStageCodegen (2) + Project [cs_sold_date_sk,cs_ext_sales_price] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Project [d_week_seq] + Filter [d_year,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (11) + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (10) + Project [d_week_seq] + Filter [d_year,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..72bee887d4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_iceberg_compat/explain.txt @@ -0,0 +1,212 @@ +== Physical Plan == +* Sort (37) ++- Exchange (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * HashAggregate (16) + : : +- Exchange (15) + : : +- * HashAggregate (14) + : : +- * Project (13) + : : +- * BroadcastHashJoin Inner BuildRight (12) + : : :- Union (7) + : : : :- * Project (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : +- * Project (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.catalog_sales (4) + : : +- BroadcastExchange (11) + : : +- * Filter (10) + : : +- * ColumnarToRow (9) + : : +- Scan parquet spark_catalog.default.date_dim (8) + : +- BroadcastExchange (21) + : +- * Project (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.date_dim (17) + +- BroadcastExchange (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * HashAggregate (25) + : +- ReusedExchange (24) + +- BroadcastExchange (30) + +- * Project (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet spark_catalog.default.date_dim (26) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#2)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] + +(3) Project [codegen id : 1] +Output [2]: [ws_sold_date_sk#2 AS sold_date_sk#3, ws_ext_sales_price#1 AS sales_price#4] +Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2] + +(4) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#6)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] + +(6) Project [codegen id : 2] +Output [2]: [cs_sold_date_sk#6 AS sold_date_sk#7, cs_ext_sales_price#5 AS sales_price#8] +Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6] + +(7) Union + +(8) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(9) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] + +(10) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) + +(11) BroadcastExchange +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(12) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [sold_date_sk#3] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(13) Project [codegen id : 4] +Output [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_name#11] + +(14) HashAggregate [codegen id : 4] +Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Keys [1]: [d_week_seq#10] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] +Aggregate Attributes [7]: [sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Results [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] + +(15) Exchange +Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(16) HashAggregate [codegen id : 12] +Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39] + +(17) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_week_seq#40, d_year#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 5] +Input [2]: [d_week_seq#40, d_year#41] + +(19) Filter [codegen id : 5] +Input [2]: [d_week_seq#40, d_year#41] +Condition : ((isnotnull(d_year#41) AND (d_year#41 = 2001)) AND isnotnull(d_week_seq#40)) + +(20) Project [codegen id : 5] +Output [1]: [d_week_seq#40] +Input [2]: [d_week_seq#40, d_year#41] + +(21) BroadcastExchange +Input [1]: [d_week_seq#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#40] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 12] +Output [8]: [d_week_seq#10 AS d_week_seq1#42, sun_sales#33 AS sun_sales1#43, mon_sales#34 AS mon_sales1#44, tue_sales#35 AS tue_sales1#45, wed_sales#36 AS wed_sales1#46, thu_sales#37 AS thu_sales1#47, fri_sales#38 AS fri_sales1#48, sat_sales#39 AS sat_sales1#49] +Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#40] + +(24) ReusedExchange [Reuses operator id: 15] +Output [8]: [d_week_seq#10, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] + +(25) HashAggregate [codegen id : 11] +Input [8]: [d_week_seq#10, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_week_seq#57, d_year#58] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 10] +Input [2]: [d_week_seq#57, d_year#58] + +(28) Filter [codegen id : 10] +Input [2]: [d_week_seq#57, d_year#58] +Condition : ((isnotnull(d_year#58) AND (d_year#58 = 2002)) AND isnotnull(d_week_seq#57)) + +(29) Project [codegen id : 10] +Output [1]: [d_week_seq#57] +Input [2]: [d_week_seq#57, d_year#58] + +(30) BroadcastExchange +Input [1]: [d_week_seq#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(31) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#57] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 11] +Output [8]: [d_week_seq#10 AS d_week_seq2#59, sun_sales#33 AS sun_sales2#60, mon_sales#34 AS mon_sales2#61, tue_sales#35 AS tue_sales2#62, wed_sales#36 AS wed_sales2#63, thu_sales#37 AS thu_sales2#64, fri_sales#38 AS fri_sales2#65, sat_sales#39 AS sat_sales2#66] +Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#57] + +(33) BroadcastExchange +Input [8]: [d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] +Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [d_week_seq1#42] +Right keys [1]: [(d_week_seq2#59 - 53)] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 12] +Output [8]: [d_week_seq1#42, round((sun_sales1#43 / sun_sales2#60), 2) AS round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1#44 / mon_sales2#61), 2) AS round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1#45 / tue_sales2#62), 2) AS round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1#46 / wed_sales2#63), 2) AS round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1#47 / thu_sales2#64), 2) AS round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1#48 / fri_sales2#65), 2) AS round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1#49 / sat_sales2#66), 2) AS round((sat_sales1 / sat_sales2), 2)#73] +Input [16]: [d_week_seq1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] + +(36) Exchange +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1 / sat_sales2), 2)#73] +Arguments: rangepartitioning(d_week_seq1#42 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(37) Sort [codegen id : 13] +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1 / sat_sales2), 2)#73] +Arguments: [d_week_seq1#42 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..1b34ac798b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.native_iceberg_compat/simplified.txt @@ -0,0 +1,59 @@ +WholeStageCodegen (13) + Sort [d_week_seq1] + InputAdapter + Exchange [d_week_seq1] #1 + WholeStageCodegen (12) + Project [d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] + BroadcastHashJoin [d_week_seq1,d_week_seq2] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [d_week_seq] #2 + WholeStageCodegen (4) + HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [sold_date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ws_sold_date_sk,ws_ext_sales_price] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_ext_sales_price,ws_sold_date_sk] + WholeStageCodegen (2) + Project [cs_sold_date_sk,cs_ext_sales_price] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Project [d_week_seq] + Filter [d_year,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (11) + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (10) + Project [d_week_seq] + Filter [d_year,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_datafusion/explain.txt new file mode 100644 index 0000000000..96183beea0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_datafusion/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14] +Results [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#5] + +(20) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5, _we0#17] + +(24) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bcc94f0169 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_datafusion/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (6) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..96183beea0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_iceberg_compat/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14] +Results [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#5] + +(20) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5, _we0#17] + +(24) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bcc94f0169 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.native_iceberg_compat/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (6) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_datafusion/explain.txt new file mode 100644 index 0000000000..7d51ef43a1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_datafusion/explain.txt @@ -0,0 +1,159 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * Filter (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.inventory (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.warehouse (4) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet spark_catalog.default.item (10) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.date_dim (17) + + +(1) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_warehouse_sk#2) AND isnotnull(inv_item_sk#1)) + +(4) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Condition : isnotnull(w_warehouse_sk#5) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6] +Input [6]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_sk#5, w_warehouse_name#6] + +(10) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] + +(12) Filter [codegen id : 2] +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Condition : (((isnotnull(i_current_price#9) AND (i_current_price#9 >= 0.99)) AND (i_current_price#9 <= 1.49)) AND isnotnull(i_item_sk#7)) + +(13) Project [codegen id : 2] +Output [2]: [i_item_sk#7, i_item_id#8] +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] + +(14) BroadcastExchange +Input [2]: [i_item_sk#7, i_item_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [4]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8] +Input [6]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_sk#7, i_item_id#8] + +(17) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_date#11] + +(19) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 2000-02-10)) AND (d_date#11 <= 2000-04-10)) AND isnotnull(d_date_sk#10)) + +(20) BroadcastExchange +Input [2]: [d_date_sk#10, d_date#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#4] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [4]: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] +Input [6]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8, d_date_sk#10, d_date#11] + +(23) HashAggregate [codegen id : 4] +Input [4]: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] +Keys [2]: [w_warehouse_name#6, i_item_id#8] +Functions [2]: [partial_sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), partial_sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] +Aggregate Attributes [2]: [sum#12, sum#13] +Results [4]: [w_warehouse_name#6, i_item_id#8, sum#14, sum#15] + +(24) Exchange +Input [4]: [w_warehouse_name#6, i_item_id#8, sum#14, sum#15] +Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [4]: [w_warehouse_name#6, i_item_id#8, sum#14, sum#15] +Keys [2]: [w_warehouse_name#6, i_item_id#8] +Functions [2]: [sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#16, sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#17] +Results [4]: [w_warehouse_name#6, i_item_id#8, sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#16 AS inv_before#18, sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#17 AS inv_after#19] + +(26) Filter [codegen id : 5] +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#18, inv_after#19] +Condition : (CASE WHEN (inv_before#18 > 0) THEN ((cast(inv_after#19 as double) / cast(inv_before#18 as double)) >= 0.666667) END AND CASE WHEN (inv_before#18 > 0) THEN ((cast(inv_after#19 as double) / cast(inv_before#18 as double)) <= 1.5) END) + +(27) TakeOrderedAndProject +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#18, inv_after#19] +Arguments: 100, [w_warehouse_name#6 ASC NULLS FIRST, i_item_id#8 ASC NULLS FIRST], [w_warehouse_name#6, i_item_id#8, inv_before#18, inv_after#19] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f09a8ad603 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_datafusion/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] + WholeStageCodegen (5) + Filter [inv_before,inv_after] + HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(CASE WHEN (d_date < 2000-03-11) THEN inv_quantity_on_hand ELSE 0 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN inv_quantity_on_hand ELSE 0 END),inv_before,inv_after,sum,sum] + InputAdapter + Exchange [w_warehouse_name,i_item_id] #1 + WholeStageCodegen (4) + HashAggregate [w_warehouse_name,i_item_id,d_date,inv_quantity_on_hand] [sum,sum,sum,sum] + Project [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_id] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Filter [inv_warehouse_sk,inv_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_item_id] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_current_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..7d51ef43a1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_iceberg_compat/explain.txt @@ -0,0 +1,159 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * Filter (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.inventory (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.warehouse (4) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet spark_catalog.default.item (10) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.date_dim (17) + + +(1) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_warehouse_sk#2) AND isnotnull(inv_item_sk#1)) + +(4) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Condition : isnotnull(w_warehouse_sk#5) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6] +Input [6]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_sk#5, w_warehouse_name#6] + +(10) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] + +(12) Filter [codegen id : 2] +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] +Condition : (((isnotnull(i_current_price#9) AND (i_current_price#9 >= 0.99)) AND (i_current_price#9 <= 1.49)) AND isnotnull(i_item_sk#7)) + +(13) Project [codegen id : 2] +Output [2]: [i_item_sk#7, i_item_id#8] +Input [3]: [i_item_sk#7, i_item_id#8, i_current_price#9] + +(14) BroadcastExchange +Input [2]: [i_item_sk#7, i_item_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [4]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8] +Input [6]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_sk#7, i_item_id#8] + +(17) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_date#11] + +(19) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 2000-02-10)) AND (d_date#11 <= 2000-04-10)) AND isnotnull(d_date_sk#10)) + +(20) BroadcastExchange +Input [2]: [d_date_sk#10, d_date#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#4] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [4]: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] +Input [6]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#6, i_item_id#8, d_date_sk#10, d_date#11] + +(23) HashAggregate [codegen id : 4] +Input [4]: [inv_quantity_on_hand#3, w_warehouse_name#6, i_item_id#8, d_date#11] +Keys [2]: [w_warehouse_name#6, i_item_id#8] +Functions [2]: [partial_sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), partial_sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] +Aggregate Attributes [2]: [sum#12, sum#13] +Results [4]: [w_warehouse_name#6, i_item_id#8, sum#14, sum#15] + +(24) Exchange +Input [4]: [w_warehouse_name#6, i_item_id#8, sum#14, sum#15] +Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [4]: [w_warehouse_name#6, i_item_id#8, sum#14, sum#15] +Keys [2]: [w_warehouse_name#6, i_item_id#8] +Functions [2]: [sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#16, sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#17] +Results [4]: [w_warehouse_name#6, i_item_id#8, sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#16 AS inv_before#18, sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#17 AS inv_after#19] + +(26) Filter [codegen id : 5] +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#18, inv_after#19] +Condition : (CASE WHEN (inv_before#18 > 0) THEN ((cast(inv_after#19 as double) / cast(inv_before#18 as double)) >= 0.666667) END AND CASE WHEN (inv_before#18 > 0) THEN ((cast(inv_after#19 as double) / cast(inv_before#18 as double)) <= 1.5) END) + +(27) TakeOrderedAndProject +Input [4]: [w_warehouse_name#6, i_item_id#8, inv_before#18, inv_after#19] +Arguments: 100, [w_warehouse_name#6 ASC NULLS FIRST, i_item_id#8 ASC NULLS FIRST], [w_warehouse_name#6, i_item_id#8, inv_before#18, inv_after#19] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..f09a8ad603 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.native_iceberg_compat/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] + WholeStageCodegen (5) + Filter [inv_before,inv_after] + HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(CASE WHEN (d_date < 2000-03-11) THEN inv_quantity_on_hand ELSE 0 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN inv_quantity_on_hand ELSE 0 END),inv_before,inv_after,sum,sum] + InputAdapter + Exchange [w_warehouse_name,i_item_id] #1 + WholeStageCodegen (4) + HashAggregate [w_warehouse_name,i_item_id,d_date,inv_quantity_on_hand] [sum,sum,sum,sum] + Project [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,inv_date_sk,w_warehouse_name,i_item_id] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_quantity_on_hand,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Filter [inv_warehouse_sk,inv_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_item_id] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_current_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_datafusion/explain.txt new file mode 100644 index 0000000000..013ede591c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_datafusion/explain.txt @@ -0,0 +1,159 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Expand (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.inventory (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.item (11) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.warehouse (17) + + +(1) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(13) Filter [codegen id : 2] +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Condition : isnotnull(i_item_sk#7) + +(14) BroadcastExchange +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(17) Scan parquet spark_catalog.default.warehouse +Output [1]: [w_warehouse_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [1]: [w_warehouse_sk#12] + +(19) Filter [codegen id : 3] +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) + +(20) BroadcastExchange +Input [1]: [w_warehouse_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] + +(23) Expand [codegen id : 4] +Input [5]: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] +Arguments: [[inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, null, 1], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, null, null, 3], [inv_quantity_on_hand#3, i_product_name#11, null, null, null, 7], [inv_quantity_on_hand#3, null, null, null, null, 15]], [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] + +(24) HashAggregate [codegen id : 4] +Input [6]: [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Functions [1]: [partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [sum#18, count#19] +Results [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] + +(25) Exchange +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Arguments: hashpartitioning(i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 5] +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#22] +Results [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, avg(inv_quantity_on_hand#3)#22 AS qoh#23] + +(27) TakeOrderedAndProject +Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#23] +Arguments: 100, [qoh#23 ASC NULLS FIRST, i_product_name#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_category#16 ASC NULLS FIRST], [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_datafusion/simplified.txt new file mode 100644 index 0000000000..7346844cad --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_datafusion/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] + WholeStageCodegen (5) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 + WholeStageCodegen (4) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count] + Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + Project [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..013ede591c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_iceberg_compat/explain.txt @@ -0,0 +1,159 @@ +== Physical Plan == +TakeOrderedAndProject (27) ++- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Expand (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.inventory (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.item (11) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.warehouse (17) + + +(1) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(13) Filter [codegen id : 2] +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Condition : isnotnull(i_item_sk#7) + +(14) BroadcastExchange +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(17) Scan parquet spark_catalog.default.warehouse +Output [1]: [w_warehouse_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [1]: [w_warehouse_sk#12] + +(19) Filter [codegen id : 3] +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) + +(20) BroadcastExchange +Input [1]: [w_warehouse_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] + +(23) Expand [codegen id : 4] +Input [5]: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] +Arguments: [[inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, null, 1], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, null, null, 3], [inv_quantity_on_hand#3, i_product_name#11, null, null, null, 7], [inv_quantity_on_hand#3, null, null, null, null, 15]], [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] + +(24) HashAggregate [codegen id : 4] +Input [6]: [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Functions [1]: [partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [sum#18, count#19] +Results [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] + +(25) Exchange +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Arguments: hashpartitioning(i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 5] +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#22] +Results [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, avg(inv_quantity_on_hand#3)#22 AS qoh#23] + +(27) TakeOrderedAndProject +Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#23] +Arguments: 100, [qoh#23 ASC NULLS FIRST, i_product_name#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_category#16 ASC NULLS FIRST], [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..7346844cad --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.native_iceberg_compat/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] + WholeStageCodegen (5) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 + WholeStageCodegen (4) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count] + Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + Project [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_datafusion/explain.txt new file mode 100644 index 0000000000..886ec7c698 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_datafusion/explain.txt @@ -0,0 +1,547 @@ +== Physical Plan == +* HashAggregate (74) ++- Exchange (73) + +- * HashAggregate (72) + +- Union (71) + :- * Project (53) + : +- * BroadcastHashJoin Inner BuildRight (52) + : :- * Project (46) + : : +- * SortMergeJoin LeftSemi (45) + : : :- * Sort (28) + : : : +- Exchange (27) + : : : +- * Project (26) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (25) + : : : :- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : +- BroadcastExchange (24) + : : : +- * Project (23) + : : : +- * Filter (22) + : : : +- * HashAggregate (21) + : : : +- Exchange (20) + : : : +- * HashAggregate (19) + : : : +- * Project (18) + : : : +- * BroadcastHashJoin Inner BuildRight (17) + : : : :- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * Filter (5) + : : : : : +- * ColumnarToRow (4) + : : : : : +- Scan parquet spark_catalog.default.store_sales (3) + : : : : +- BroadcastExchange (10) + : : : : +- * Project (9) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : : +- BroadcastExchange (16) + : : : +- * Filter (15) + : : : +- * ColumnarToRow (14) + : : : +- Scan parquet spark_catalog.default.item (13) + : : +- * Sort (44) + : : +- * Project (43) + : : +- * Filter (42) + : : +- * HashAggregate (41) + : : +- Exchange (40) + : : +- * HashAggregate (39) + : : +- * Project (38) + : : +- * BroadcastHashJoin Inner BuildRight (37) + : : :- * Project (32) + : : : +- * Filter (31) + : : : +- * ColumnarToRow (30) + : : : +- Scan parquet spark_catalog.default.store_sales (29) + : : +- BroadcastExchange (36) + : : +- * Filter (35) + : : +- * ColumnarToRow (34) + : : +- Scan parquet spark_catalog.default.customer (33) + : +- BroadcastExchange (51) + : +- * Project (50) + : +- * Filter (49) + : +- * ColumnarToRow (48) + : +- Scan parquet spark_catalog.default.date_dim (47) + +- * Project (70) + +- * BroadcastHashJoin Inner BuildRight (69) + :- * Project (67) + : +- * SortMergeJoin LeftSemi (66) + : :- * Sort (60) + : : +- Exchange (59) + : : +- * Project (58) + : : +- * BroadcastHashJoin LeftSemi BuildRight (57) + : : :- * ColumnarToRow (55) + : : : +- Scan parquet spark_catalog.default.web_sales (54) + : : +- ReusedExchange (56) + : +- * Sort (65) + : +- * Project (64) + : +- * Filter (63) + : +- * HashAggregate (62) + : +- ReusedExchange (61) + +- ReusedExchange (68) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(3) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(4) ColumnarToRow [codegen id : 3] +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] + +(5) Filter [codegen id : 3] +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_item_sk#6) + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(9) Project [codegen id : 1] +Output [2]: [d_date_sk#8, d_date#9] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(10) BroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 3] +Output [2]: [ss_item_sk#6, d_date#9] +Input [4]: [ss_item_sk#6, ss_sold_date_sk#7, d_date_sk#8, d_date#9] + +(13) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#11, i_item_desc#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#11, i_item_desc#12] + +(15) Filter [codegen id : 2] +Input [2]: [i_item_sk#11, i_item_desc#12] +Condition : isnotnull(i_item_sk#11) + +(16) BroadcastExchange +Input [2]: [i_item_sk#11, i_item_desc#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 3] +Output [3]: [d_date#9, i_item_sk#11, substr(i_item_desc#12, 1, 30) AS _groupingexpression#13] +Input [4]: [ss_item_sk#6, d_date#9, i_item_sk#11, i_item_desc#12] + +(19) HashAggregate [codegen id : 3] +Input [3]: [d_date#9, i_item_sk#11, _groupingexpression#13] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#14] +Results [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] + +(20) Exchange +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] +Arguments: hashpartitioning(_groupingexpression#13, i_item_sk#11, d_date#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(21) HashAggregate [codegen id : 4] +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#16] +Results [2]: [i_item_sk#11 AS item_sk#17, count(1)#16 AS cnt#18] + +(22) Filter [codegen id : 4] +Input [2]: [item_sk#17, cnt#18] +Condition : (cnt#18 > 4) + +(23) Project [codegen id : 4] +Output [1]: [item_sk#17] +Input [2]: [item_sk#17, cnt#18] + +(24) BroadcastExchange +Input [1]: [item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [item_sk#17] +Join type: LeftSemi +Join condition: None + +(26) Project [codegen id : 5] +Output [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(27) Exchange +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(28) Sort [codegen id : 6] +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 + +(29) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 8] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] + +(31) Filter [codegen id : 8] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] +Condition : isnotnull(ss_customer_sk#19) + +(32) Project [codegen id : 8] +Output [3]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] + +(33) Scan parquet spark_catalog.default.customer +Output [1]: [c_customer_sk#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [1]: [c_customer_sk#23] + +(35) Filter [codegen id : 7] +Input [1]: [c_customer_sk#23] +Condition : isnotnull(c_customer_sk#23) + +(36) BroadcastExchange +Input [1]: [c_customer_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(37) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#19] +Right keys [1]: [c_customer_sk#23] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 8] +Output [3]: [ss_quantity#20, ss_sales_price#21, c_customer_sk#23] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, c_customer_sk#23] + +(39) HashAggregate [codegen id : 8] +Input [3]: [ss_quantity#20, ss_sales_price#21, c_customer_sk#23] +Keys [1]: [c_customer_sk#23] +Functions [1]: [partial_sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [2]: [sum#24, isEmpty#25] +Results [3]: [c_customer_sk#23, sum#26, isEmpty#27] + +(40) Exchange +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Arguments: hashpartitioning(c_customer_sk#23, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(41) HashAggregate [codegen id : 9] +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28] +Results [2]: [c_customer_sk#23, sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28 AS ssales#29] + +(42) Filter [codegen id : 9] +Input [2]: [c_customer_sk#23, ssales#29] +Condition : (isnotnull(ssales#29) AND (cast(ssales#29 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#30, [id=#31]))) + +(43) Project [codegen id : 9] +Output [1]: [c_customer_sk#23] +Input [2]: [c_customer_sk#23, ssales#29] + +(44) Sort [codegen id : 9] +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 11] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#23] +Join type: LeftSemi +Join condition: None + +(46) Project [codegen id : 11] +Output [3]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(47) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#32, d_year#33, d_moy#34] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 10] +Input [3]: [d_date_sk#32, d_year#33, d_moy#34] + +(49) Filter [codegen id : 10] +Input [3]: [d_date_sk#32, d_year#33, d_moy#34] +Condition : ((((isnotnull(d_year#33) AND isnotnull(d_moy#34)) AND (d_year#33 = 2000)) AND (d_moy#34 = 2)) AND isnotnull(d_date_sk#32)) + +(50) Project [codegen id : 10] +Output [1]: [d_date_sk#32] +Input [3]: [d_date_sk#32, d_year#33, d_moy#34] + +(51) BroadcastExchange +Input [1]: [d_date_sk#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#32] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 11] +Output [1]: [(cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4) AS sales#35] +Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#32] + +(54) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#36, ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#40)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 16] +Input [5]: [ws_item_sk#36, ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] + +(56) ReusedExchange [Reuses operator id: 24] +Output [1]: [item_sk#17] + +(57) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [ws_item_sk#36] +Right keys [1]: [item_sk#17] +Join type: LeftSemi +Join condition: None + +(58) Project [codegen id : 16] +Output [4]: [ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] +Input [5]: [ws_item_sk#36, ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] + +(59) Exchange +Input [4]: [ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] +Arguments: hashpartitioning(ws_bill_customer_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(60) Sort [codegen id : 17] +Input [4]: [ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] +Arguments: [ws_bill_customer_sk#37 ASC NULLS FIRST], false, 0 + +(61) ReusedExchange [Reuses operator id: 40] +Output [3]: [c_customer_sk#23, sum#26, isEmpty#27] + +(62) HashAggregate [codegen id : 20] +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28] +Results [2]: [c_customer_sk#23, sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28 AS ssales#29] + +(63) Filter [codegen id : 20] +Input [2]: [c_customer_sk#23, ssales#29] +Condition : (isnotnull(ssales#29) AND (cast(ssales#29 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#30, [id=#31]))) + +(64) Project [codegen id : 20] +Output [1]: [c_customer_sk#23] +Input [2]: [c_customer_sk#23, ssales#29] + +(65) Sort [codegen id : 20] +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 + +(66) SortMergeJoin [codegen id : 22] +Left keys [1]: [ws_bill_customer_sk#37] +Right keys [1]: [c_customer_sk#23] +Join type: LeftSemi +Join condition: None + +(67) Project [codegen id : 22] +Output [3]: [ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] +Input [4]: [ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] + +(68) ReusedExchange [Reuses operator id: 51] +Output [1]: [d_date_sk#41] + +(69) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#40] +Right keys [1]: [d_date_sk#41] +Join type: Inner +Join condition: None + +(70) Project [codegen id : 22] +Output [1]: [(cast(ws_quantity#38 as decimal(10,0)) * ws_list_price#39) AS sales#42] +Input [4]: [ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40, d_date_sk#41] + +(71) Union + +(72) HashAggregate [codegen id : 23] +Input [1]: [sales#35] +Keys: [] +Functions [1]: [partial_sum(sales#35)] +Aggregate Attributes [2]: [sum#43, isEmpty#44] +Results [2]: [sum#45, isEmpty#46] + +(73) Exchange +Input [2]: [sum#45, isEmpty#46] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(74) HashAggregate [codegen id : 24] +Input [2]: [sum#45, isEmpty#46] +Keys: [] +Functions [1]: [sum(sales#35)] +Aggregate Attributes [1]: [sum(sales#35)#47] +Results [1]: [sum(sales#35)#47 AS sum(sales)#48] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#30, [id=#31] +* HashAggregate (93) ++- Exchange (92) + +- * HashAggregate (91) + +- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * Project (87) + +- * BroadcastHashJoin Inner BuildRight (86) + :- * Project (80) + : +- * BroadcastHashJoin Inner BuildRight (79) + : :- * Filter (77) + : : +- * ColumnarToRow (76) + : : +- Scan parquet spark_catalog.default.store_sales (75) + : +- ReusedExchange (78) + +- BroadcastExchange (85) + +- * Project (84) + +- * Filter (83) + +- * ColumnarToRow (82) + +- Scan parquet spark_catalog.default.date_dim (81) + + +(75) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#49, ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#52)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 3] +Input [4]: [ss_customer_sk#49, ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52] + +(77) Filter [codegen id : 3] +Input [4]: [ss_customer_sk#49, ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52] +Condition : isnotnull(ss_customer_sk#49) + +(78) ReusedExchange [Reuses operator id: 36] +Output [1]: [c_customer_sk#53] + +(79) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#49] +Right keys [1]: [c_customer_sk#53] +Join type: Inner +Join condition: None + +(80) Project [codegen id : 3] +Output [4]: [ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52, c_customer_sk#53] +Input [5]: [ss_customer_sk#49, ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52, c_customer_sk#53] + +(81) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#54, d_year#55] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(82) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#54, d_year#55] + +(83) Filter [codegen id : 2] +Input [2]: [d_date_sk#54, d_year#55] +Condition : (d_year#55 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#54)) + +(84) Project [codegen id : 2] +Output [1]: [d_date_sk#54] +Input [2]: [d_date_sk#54, d_year#55] + +(85) BroadcastExchange +Input [1]: [d_date_sk#54] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] + +(86) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#52] +Right keys [1]: [d_date_sk#54] +Join type: Inner +Join condition: None + +(87) Project [codegen id : 3] +Output [3]: [ss_quantity#50, ss_sales_price#51, c_customer_sk#53] +Input [5]: [ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52, c_customer_sk#53, d_date_sk#54] + +(88) HashAggregate [codegen id : 3] +Input [3]: [ss_quantity#50, ss_sales_price#51, c_customer_sk#53] +Keys [1]: [c_customer_sk#53] +Functions [1]: [partial_sum((cast(ss_quantity#50 as decimal(10,0)) * ss_sales_price#51))] +Aggregate Attributes [2]: [sum#56, isEmpty#57] +Results [3]: [c_customer_sk#53, sum#58, isEmpty#59] + +(89) Exchange +Input [3]: [c_customer_sk#53, sum#58, isEmpty#59] +Arguments: hashpartitioning(c_customer_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(90) HashAggregate [codegen id : 4] +Input [3]: [c_customer_sk#53, sum#58, isEmpty#59] +Keys [1]: [c_customer_sk#53] +Functions [1]: [sum((cast(ss_quantity#50 as decimal(10,0)) * ss_sales_price#51))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#50 as decimal(10,0)) * ss_sales_price#51))#60] +Results [1]: [sum((cast(ss_quantity#50 as decimal(10,0)) * ss_sales_price#51))#60 AS csales#61] + +(91) HashAggregate [codegen id : 4] +Input [1]: [csales#61] +Keys: [] +Functions [1]: [partial_max(csales#61)] +Aggregate Attributes [1]: [max#62] +Results [1]: [max#63] + +(92) Exchange +Input [1]: [max#63] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] + +(93) HashAggregate [codegen id : 5] +Input [1]: [max#63] +Keys: [] +Functions [1]: [max(csales#61)] +Aggregate Attributes [1]: [max(csales#61)#64] +Results [1]: [max(csales#61)#64 AS tpcds_cmax#65] + +Subquery:2 Hosting operator id = 63 Hosting Expression = ReusedSubquery Subquery scalar-subquery#30, [id=#31] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a7ccacb8b9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_datafusion/simplified.txt @@ -0,0 +1,148 @@ +WholeStageCodegen (24) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (23) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (11) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_list_price,cs_sold_date_sk] + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (5) + Project [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + BroadcastHashJoin [cs_item_sk,item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Project [item_sk] + Filter [cnt] + HashAggregate [_groupingexpression,i_item_sk,d_date,count] [count(1),item_sk,cnt,count] + InputAdapter + Exchange [_groupingexpression,i_item_sk,d_date] #4 + WholeStageCodegen (3) + HashAggregate [_groupingexpression,i_item_sk,d_date] [count,count] + Project [d_date,i_item_sk,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + InputAdapter + WholeStageCodegen (9) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + Subquery #1 + WholeStageCodegen (5) + HashAggregate [max] [max(csales),tpcds_cmax,max] + InputAdapter + Exchange #9 + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),csales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [c_customer_sk] #8 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (8) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_quantity,ss_sales_price] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (10) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (22) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk] + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (17) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #13 + WholeStageCodegen (16) + Project [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + BroadcastHashJoin [ws_item_sk,item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [item_sk] #3 + InputAdapter + WholeStageCodegen (20) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + ReusedExchange [d_date_sk] #12 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..886ec7c698 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_iceberg_compat/explain.txt @@ -0,0 +1,547 @@ +== Physical Plan == +* HashAggregate (74) ++- Exchange (73) + +- * HashAggregate (72) + +- Union (71) + :- * Project (53) + : +- * BroadcastHashJoin Inner BuildRight (52) + : :- * Project (46) + : : +- * SortMergeJoin LeftSemi (45) + : : :- * Sort (28) + : : : +- Exchange (27) + : : : +- * Project (26) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (25) + : : : :- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : +- BroadcastExchange (24) + : : : +- * Project (23) + : : : +- * Filter (22) + : : : +- * HashAggregate (21) + : : : +- Exchange (20) + : : : +- * HashAggregate (19) + : : : +- * Project (18) + : : : +- * BroadcastHashJoin Inner BuildRight (17) + : : : :- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * Filter (5) + : : : : : +- * ColumnarToRow (4) + : : : : : +- Scan parquet spark_catalog.default.store_sales (3) + : : : : +- BroadcastExchange (10) + : : : : +- * Project (9) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : : +- BroadcastExchange (16) + : : : +- * Filter (15) + : : : +- * ColumnarToRow (14) + : : : +- Scan parquet spark_catalog.default.item (13) + : : +- * Sort (44) + : : +- * Project (43) + : : +- * Filter (42) + : : +- * HashAggregate (41) + : : +- Exchange (40) + : : +- * HashAggregate (39) + : : +- * Project (38) + : : +- * BroadcastHashJoin Inner BuildRight (37) + : : :- * Project (32) + : : : +- * Filter (31) + : : : +- * ColumnarToRow (30) + : : : +- Scan parquet spark_catalog.default.store_sales (29) + : : +- BroadcastExchange (36) + : : +- * Filter (35) + : : +- * ColumnarToRow (34) + : : +- Scan parquet spark_catalog.default.customer (33) + : +- BroadcastExchange (51) + : +- * Project (50) + : +- * Filter (49) + : +- * ColumnarToRow (48) + : +- Scan parquet spark_catalog.default.date_dim (47) + +- * Project (70) + +- * BroadcastHashJoin Inner BuildRight (69) + :- * Project (67) + : +- * SortMergeJoin LeftSemi (66) + : :- * Sort (60) + : : +- Exchange (59) + : : +- * Project (58) + : : +- * BroadcastHashJoin LeftSemi BuildRight (57) + : : :- * ColumnarToRow (55) + : : : +- Scan parquet spark_catalog.default.web_sales (54) + : : +- ReusedExchange (56) + : +- * Sort (65) + : +- * Project (64) + : +- * Filter (63) + : +- * HashAggregate (62) + : +- ReusedExchange (61) + +- ReusedExchange (68) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(3) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(4) ColumnarToRow [codegen id : 3] +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] + +(5) Filter [codegen id : 3] +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_item_sk#6) + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(9) Project [codegen id : 1] +Output [2]: [d_date_sk#8, d_date#9] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(10) BroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 3] +Output [2]: [ss_item_sk#6, d_date#9] +Input [4]: [ss_item_sk#6, ss_sold_date_sk#7, d_date_sk#8, d_date#9] + +(13) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#11, i_item_desc#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#11, i_item_desc#12] + +(15) Filter [codegen id : 2] +Input [2]: [i_item_sk#11, i_item_desc#12] +Condition : isnotnull(i_item_sk#11) + +(16) BroadcastExchange +Input [2]: [i_item_sk#11, i_item_desc#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 3] +Output [3]: [d_date#9, i_item_sk#11, substr(i_item_desc#12, 1, 30) AS _groupingexpression#13] +Input [4]: [ss_item_sk#6, d_date#9, i_item_sk#11, i_item_desc#12] + +(19) HashAggregate [codegen id : 3] +Input [3]: [d_date#9, i_item_sk#11, _groupingexpression#13] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#14] +Results [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] + +(20) Exchange +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] +Arguments: hashpartitioning(_groupingexpression#13, i_item_sk#11, d_date#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(21) HashAggregate [codegen id : 4] +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#16] +Results [2]: [i_item_sk#11 AS item_sk#17, count(1)#16 AS cnt#18] + +(22) Filter [codegen id : 4] +Input [2]: [item_sk#17, cnt#18] +Condition : (cnt#18 > 4) + +(23) Project [codegen id : 4] +Output [1]: [item_sk#17] +Input [2]: [item_sk#17, cnt#18] + +(24) BroadcastExchange +Input [1]: [item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [item_sk#17] +Join type: LeftSemi +Join condition: None + +(26) Project [codegen id : 5] +Output [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(27) Exchange +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(28) Sort [codegen id : 6] +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 + +(29) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 8] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] + +(31) Filter [codegen id : 8] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] +Condition : isnotnull(ss_customer_sk#19) + +(32) Project [codegen id : 8] +Output [3]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] + +(33) Scan parquet spark_catalog.default.customer +Output [1]: [c_customer_sk#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [1]: [c_customer_sk#23] + +(35) Filter [codegen id : 7] +Input [1]: [c_customer_sk#23] +Condition : isnotnull(c_customer_sk#23) + +(36) BroadcastExchange +Input [1]: [c_customer_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(37) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#19] +Right keys [1]: [c_customer_sk#23] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 8] +Output [3]: [ss_quantity#20, ss_sales_price#21, c_customer_sk#23] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, c_customer_sk#23] + +(39) HashAggregate [codegen id : 8] +Input [3]: [ss_quantity#20, ss_sales_price#21, c_customer_sk#23] +Keys [1]: [c_customer_sk#23] +Functions [1]: [partial_sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [2]: [sum#24, isEmpty#25] +Results [3]: [c_customer_sk#23, sum#26, isEmpty#27] + +(40) Exchange +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Arguments: hashpartitioning(c_customer_sk#23, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(41) HashAggregate [codegen id : 9] +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28] +Results [2]: [c_customer_sk#23, sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28 AS ssales#29] + +(42) Filter [codegen id : 9] +Input [2]: [c_customer_sk#23, ssales#29] +Condition : (isnotnull(ssales#29) AND (cast(ssales#29 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#30, [id=#31]))) + +(43) Project [codegen id : 9] +Output [1]: [c_customer_sk#23] +Input [2]: [c_customer_sk#23, ssales#29] + +(44) Sort [codegen id : 9] +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin [codegen id : 11] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#23] +Join type: LeftSemi +Join condition: None + +(46) Project [codegen id : 11] +Output [3]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(47) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#32, d_year#33, d_moy#34] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 10] +Input [3]: [d_date_sk#32, d_year#33, d_moy#34] + +(49) Filter [codegen id : 10] +Input [3]: [d_date_sk#32, d_year#33, d_moy#34] +Condition : ((((isnotnull(d_year#33) AND isnotnull(d_moy#34)) AND (d_year#33 = 2000)) AND (d_moy#34 = 2)) AND isnotnull(d_date_sk#32)) + +(50) Project [codegen id : 10] +Output [1]: [d_date_sk#32] +Input [3]: [d_date_sk#32, d_year#33, d_moy#34] + +(51) BroadcastExchange +Input [1]: [d_date_sk#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#32] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 11] +Output [1]: [(cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4) AS sales#35] +Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#32] + +(54) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#36, ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#40)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 16] +Input [5]: [ws_item_sk#36, ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] + +(56) ReusedExchange [Reuses operator id: 24] +Output [1]: [item_sk#17] + +(57) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [ws_item_sk#36] +Right keys [1]: [item_sk#17] +Join type: LeftSemi +Join condition: None + +(58) Project [codegen id : 16] +Output [4]: [ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] +Input [5]: [ws_item_sk#36, ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] + +(59) Exchange +Input [4]: [ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] +Arguments: hashpartitioning(ws_bill_customer_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(60) Sort [codegen id : 17] +Input [4]: [ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] +Arguments: [ws_bill_customer_sk#37 ASC NULLS FIRST], false, 0 + +(61) ReusedExchange [Reuses operator id: 40] +Output [3]: [c_customer_sk#23, sum#26, isEmpty#27] + +(62) HashAggregate [codegen id : 20] +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28] +Results [2]: [c_customer_sk#23, sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28 AS ssales#29] + +(63) Filter [codegen id : 20] +Input [2]: [c_customer_sk#23, ssales#29] +Condition : (isnotnull(ssales#29) AND (cast(ssales#29 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#30, [id=#31]))) + +(64) Project [codegen id : 20] +Output [1]: [c_customer_sk#23] +Input [2]: [c_customer_sk#23, ssales#29] + +(65) Sort [codegen id : 20] +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 + +(66) SortMergeJoin [codegen id : 22] +Left keys [1]: [ws_bill_customer_sk#37] +Right keys [1]: [c_customer_sk#23] +Join type: LeftSemi +Join condition: None + +(67) Project [codegen id : 22] +Output [3]: [ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] +Input [4]: [ws_bill_customer_sk#37, ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40] + +(68) ReusedExchange [Reuses operator id: 51] +Output [1]: [d_date_sk#41] + +(69) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#40] +Right keys [1]: [d_date_sk#41] +Join type: Inner +Join condition: None + +(70) Project [codegen id : 22] +Output [1]: [(cast(ws_quantity#38 as decimal(10,0)) * ws_list_price#39) AS sales#42] +Input [4]: [ws_quantity#38, ws_list_price#39, ws_sold_date_sk#40, d_date_sk#41] + +(71) Union + +(72) HashAggregate [codegen id : 23] +Input [1]: [sales#35] +Keys: [] +Functions [1]: [partial_sum(sales#35)] +Aggregate Attributes [2]: [sum#43, isEmpty#44] +Results [2]: [sum#45, isEmpty#46] + +(73) Exchange +Input [2]: [sum#45, isEmpty#46] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(74) HashAggregate [codegen id : 24] +Input [2]: [sum#45, isEmpty#46] +Keys: [] +Functions [1]: [sum(sales#35)] +Aggregate Attributes [1]: [sum(sales#35)#47] +Results [1]: [sum(sales#35)#47 AS sum(sales)#48] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#30, [id=#31] +* HashAggregate (93) ++- Exchange (92) + +- * HashAggregate (91) + +- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * Project (87) + +- * BroadcastHashJoin Inner BuildRight (86) + :- * Project (80) + : +- * BroadcastHashJoin Inner BuildRight (79) + : :- * Filter (77) + : : +- * ColumnarToRow (76) + : : +- Scan parquet spark_catalog.default.store_sales (75) + : +- ReusedExchange (78) + +- BroadcastExchange (85) + +- * Project (84) + +- * Filter (83) + +- * ColumnarToRow (82) + +- Scan parquet spark_catalog.default.date_dim (81) + + +(75) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#49, ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#52)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 3] +Input [4]: [ss_customer_sk#49, ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52] + +(77) Filter [codegen id : 3] +Input [4]: [ss_customer_sk#49, ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52] +Condition : isnotnull(ss_customer_sk#49) + +(78) ReusedExchange [Reuses operator id: 36] +Output [1]: [c_customer_sk#53] + +(79) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#49] +Right keys [1]: [c_customer_sk#53] +Join type: Inner +Join condition: None + +(80) Project [codegen id : 3] +Output [4]: [ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52, c_customer_sk#53] +Input [5]: [ss_customer_sk#49, ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52, c_customer_sk#53] + +(81) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#54, d_year#55] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(82) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#54, d_year#55] + +(83) Filter [codegen id : 2] +Input [2]: [d_date_sk#54, d_year#55] +Condition : (d_year#55 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#54)) + +(84) Project [codegen id : 2] +Output [1]: [d_date_sk#54] +Input [2]: [d_date_sk#54, d_year#55] + +(85) BroadcastExchange +Input [1]: [d_date_sk#54] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] + +(86) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#52] +Right keys [1]: [d_date_sk#54] +Join type: Inner +Join condition: None + +(87) Project [codegen id : 3] +Output [3]: [ss_quantity#50, ss_sales_price#51, c_customer_sk#53] +Input [5]: [ss_quantity#50, ss_sales_price#51, ss_sold_date_sk#52, c_customer_sk#53, d_date_sk#54] + +(88) HashAggregate [codegen id : 3] +Input [3]: [ss_quantity#50, ss_sales_price#51, c_customer_sk#53] +Keys [1]: [c_customer_sk#53] +Functions [1]: [partial_sum((cast(ss_quantity#50 as decimal(10,0)) * ss_sales_price#51))] +Aggregate Attributes [2]: [sum#56, isEmpty#57] +Results [3]: [c_customer_sk#53, sum#58, isEmpty#59] + +(89) Exchange +Input [3]: [c_customer_sk#53, sum#58, isEmpty#59] +Arguments: hashpartitioning(c_customer_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(90) HashAggregate [codegen id : 4] +Input [3]: [c_customer_sk#53, sum#58, isEmpty#59] +Keys [1]: [c_customer_sk#53] +Functions [1]: [sum((cast(ss_quantity#50 as decimal(10,0)) * ss_sales_price#51))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#50 as decimal(10,0)) * ss_sales_price#51))#60] +Results [1]: [sum((cast(ss_quantity#50 as decimal(10,0)) * ss_sales_price#51))#60 AS csales#61] + +(91) HashAggregate [codegen id : 4] +Input [1]: [csales#61] +Keys: [] +Functions [1]: [partial_max(csales#61)] +Aggregate Attributes [1]: [max#62] +Results [1]: [max#63] + +(92) Exchange +Input [1]: [max#63] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] + +(93) HashAggregate [codegen id : 5] +Input [1]: [max#63] +Keys: [] +Functions [1]: [max(csales#61)] +Aggregate Attributes [1]: [max(csales#61)#64] +Results [1]: [max(csales#61)#64 AS tpcds_cmax#65] + +Subquery:2 Hosting operator id = 63 Hosting Expression = ReusedSubquery Subquery scalar-subquery#30, [id=#31] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a7ccacb8b9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.native_iceberg_compat/simplified.txt @@ -0,0 +1,148 @@ +WholeStageCodegen (24) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (23) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (11) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_list_price,cs_sold_date_sk] + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (5) + Project [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + BroadcastHashJoin [cs_item_sk,item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Project [item_sk] + Filter [cnt] + HashAggregate [_groupingexpression,i_item_sk,d_date,count] [count(1),item_sk,cnt,count] + InputAdapter + Exchange [_groupingexpression,i_item_sk,d_date] #4 + WholeStageCodegen (3) + HashAggregate [_groupingexpression,i_item_sk,d_date] [count,count] + Project [d_date,i_item_sk,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + InputAdapter + WholeStageCodegen (9) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + Subquery #1 + WholeStageCodegen (5) + HashAggregate [max] [max(csales),tpcds_cmax,max] + InputAdapter + Exchange #9 + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),csales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [c_customer_sk] #8 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (8) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_quantity,ss_sales_price] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (10) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (22) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk] + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (17) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #13 + WholeStageCodegen (16) + Project [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + BroadcastHashJoin [ws_item_sk,item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [item_sk] #3 + InputAdapter + WholeStageCodegen (20) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + ReusedExchange [d_date_sk] #12 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_datafusion/explain.txt new file mode 100644 index 0000000000..fd5c4ba2e7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_datafusion/explain.txt @@ -0,0 +1,671 @@ +== Physical Plan == +TakeOrderedAndProject (95) ++- Union (94) + :- * HashAggregate (70) + : +- Exchange (69) + : +- * HashAggregate (68) + : +- * Project (67) + : +- * BroadcastHashJoin Inner BuildRight (66) + : :- * Project (60) + : : +- * BroadcastHashJoin Inner BuildRight (59) + : : :- * SortMergeJoin LeftSemi (46) + : : : :- * Sort (29) + : : : : +- Exchange (28) + : : : : +- * Project (27) + : : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : +- BroadcastExchange (25) + : : : : +- * Project (24) + : : : : +- * Filter (23) + : : : : +- * HashAggregate (22) + : : : : +- Exchange (21) + : : : : +- * HashAggregate (20) + : : : : +- * Project (19) + : : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : : :- * Project (13) + : : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : : :- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : : +- BroadcastExchange (11) + : : : : : +- * Project (10) + : : : : : +- * Filter (9) + : : : : : +- * ColumnarToRow (8) + : : : : : +- Scan parquet spark_catalog.default.date_dim (7) + : : : : +- BroadcastExchange (17) + : : : : +- * Filter (16) + : : : : +- * ColumnarToRow (15) + : : : : +- Scan parquet spark_catalog.default.item (14) + : : : +- * Sort (45) + : : : +- * Project (44) + : : : +- * Filter (43) + : : : +- * HashAggregate (42) + : : : +- Exchange (41) + : : : +- * HashAggregate (40) + : : : +- * Project (39) + : : : +- * BroadcastHashJoin Inner BuildRight (38) + : : : :- * Project (33) + : : : : +- * Filter (32) + : : : : +- * ColumnarToRow (31) + : : : : +- Scan parquet spark_catalog.default.store_sales (30) + : : : +- BroadcastExchange (37) + : : : +- * Filter (36) + : : : +- * ColumnarToRow (35) + : : : +- Scan parquet spark_catalog.default.customer (34) + : : +- BroadcastExchange (58) + : : +- * SortMergeJoin LeftSemi (57) + : : :- * Sort (51) + : : : +- Exchange (50) + : : : +- * Filter (49) + : : : +- * ColumnarToRow (48) + : : : +- Scan parquet spark_catalog.default.customer (47) + : : +- * Sort (56) + : : +- * Project (55) + : : +- * Filter (54) + : : +- * HashAggregate (53) + : : +- ReusedExchange (52) + : +- BroadcastExchange (65) + : +- * Project (64) + : +- * Filter (63) + : +- * ColumnarToRow (62) + : +- Scan parquet spark_catalog.default.date_dim (61) + +- * HashAggregate (93) + +- Exchange (92) + +- * HashAggregate (91) + +- * Project (90) + +- * BroadcastHashJoin Inner BuildRight (89) + :- * Project (87) + : +- * BroadcastHashJoin Inner BuildRight (86) + : :- * SortMergeJoin LeftSemi (84) + : : :- * Sort (78) + : : : +- Exchange (77) + : : : +- * Project (76) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (75) + : : : :- * Filter (73) + : : : : +- * ColumnarToRow (72) + : : : : +- Scan parquet spark_catalog.default.web_sales (71) + : : : +- ReusedExchange (74) + : : +- * Sort (83) + : : +- * Project (82) + : : +- * Filter (81) + : : +- * HashAggregate (80) + : : +- ReusedExchange (79) + : +- ReusedExchange (85) + +- ReusedExchange (88) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_bill_customer_sk#1) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 3] +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] + +(6) Filter [codegen id : 3] +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_item_sk#6) + +(7) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [2]: [d_date_sk#8, d_date#9] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(11) BroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(12) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(13) Project [codegen id : 3] +Output [2]: [ss_item_sk#6, d_date#9] +Input [4]: [ss_item_sk#6, ss_sold_date_sk#7, d_date_sk#8, d_date#9] + +(14) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#11, i_item_desc#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#11, i_item_desc#12] + +(16) Filter [codegen id : 2] +Input [2]: [i_item_sk#11, i_item_desc#12] +Condition : isnotnull(i_item_sk#11) + +(17) BroadcastExchange +Input [2]: [i_item_sk#11, i_item_desc#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(18) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 3] +Output [3]: [d_date#9, i_item_sk#11, substr(i_item_desc#12, 1, 30) AS _groupingexpression#13] +Input [4]: [ss_item_sk#6, d_date#9, i_item_sk#11, i_item_desc#12] + +(20) HashAggregate [codegen id : 3] +Input [3]: [d_date#9, i_item_sk#11, _groupingexpression#13] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#14] +Results [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] + +(21) Exchange +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] +Arguments: hashpartitioning(_groupingexpression#13, i_item_sk#11, d_date#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(22) HashAggregate [codegen id : 4] +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#16] +Results [2]: [i_item_sk#11 AS item_sk#17, count(1)#16 AS cnt#18] + +(23) Filter [codegen id : 4] +Input [2]: [item_sk#17, cnt#18] +Condition : (cnt#18 > 4) + +(24) Project [codegen id : 4] +Output [1]: [item_sk#17] +Input [2]: [item_sk#17, cnt#18] + +(25) BroadcastExchange +Input [1]: [item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [item_sk#17] +Join type: LeftSemi +Join condition: None + +(27) Project [codegen id : 5] +Output [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(28) Exchange +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) Sort [codegen id : 6] +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 + +(30) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] + +(32) Filter [codegen id : 8] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] +Condition : isnotnull(ss_customer_sk#19) + +(33) Project [codegen id : 8] +Output [3]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] + +(34) Scan parquet spark_catalog.default.customer +Output [1]: [c_customer_sk#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [1]: [c_customer_sk#23] + +(36) Filter [codegen id : 7] +Input [1]: [c_customer_sk#23] +Condition : isnotnull(c_customer_sk#23) + +(37) BroadcastExchange +Input [1]: [c_customer_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#19] +Right keys [1]: [c_customer_sk#23] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 8] +Output [3]: [ss_quantity#20, ss_sales_price#21, c_customer_sk#23] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, c_customer_sk#23] + +(40) HashAggregate [codegen id : 8] +Input [3]: [ss_quantity#20, ss_sales_price#21, c_customer_sk#23] +Keys [1]: [c_customer_sk#23] +Functions [1]: [partial_sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [2]: [sum#24, isEmpty#25] +Results [3]: [c_customer_sk#23, sum#26, isEmpty#27] + +(41) Exchange +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Arguments: hashpartitioning(c_customer_sk#23, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(42) HashAggregate [codegen id : 9] +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28] +Results [2]: [c_customer_sk#23, sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28 AS ssales#29] + +(43) Filter [codegen id : 9] +Input [2]: [c_customer_sk#23, ssales#29] +Condition : (isnotnull(ssales#29) AND (cast(ssales#29 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#30, [id=#31]))) + +(44) Project [codegen id : 9] +Output [1]: [c_customer_sk#23] +Input [2]: [c_customer_sk#23, ssales#29] + +(45) Sort [codegen id : 9] +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 + +(46) SortMergeJoin [codegen id : 17] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#23] +Join type: LeftSemi +Join condition: None + +(47) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 10] +Input [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] + +(49) Filter [codegen id : 10] +Input [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] +Condition : isnotnull(c_customer_sk#32) + +(50) Exchange +Input [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] +Arguments: hashpartitioning(c_customer_sk#32, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) Sort [codegen id : 11] +Input [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] +Arguments: [c_customer_sk#32 ASC NULLS FIRST], false, 0 + +(52) ReusedExchange [Reuses operator id: 41] +Output [3]: [c_customer_sk#23, sum#26, isEmpty#27] + +(53) HashAggregate [codegen id : 14] +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28] +Results [2]: [c_customer_sk#23, sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28 AS ssales#29] + +(54) Filter [codegen id : 14] +Input [2]: [c_customer_sk#23, ssales#29] +Condition : (isnotnull(ssales#29) AND (cast(ssales#29 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#30, [id=#31]))) + +(55) Project [codegen id : 14] +Output [1]: [c_customer_sk#23] +Input [2]: [c_customer_sk#23, ssales#29] + +(56) Sort [codegen id : 14] +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 + +(57) SortMergeJoin [codegen id : 15] +Left keys [1]: [c_customer_sk#32] +Right keys [1]: [c_customer_sk#23] +Join type: LeftSemi +Join condition: None + +(58) BroadcastExchange +Input [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#32] +Join type: Inner +Join condition: None + +(60) Project [codegen id : 17] +Output [5]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#33, c_last_name#34] +Input [7]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_customer_sk#32, c_first_name#33, c_last_name#34] + +(61) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#35, d_year#36, d_moy#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 16] +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] + +(63) Filter [codegen id : 16] +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] +Condition : ((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2000)) AND (d_moy#37 = 2)) AND isnotnull(d_date_sk#35)) + +(64) Project [codegen id : 16] +Output [1]: [d_date_sk#35] +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] + +(65) BroadcastExchange +Input [1]: [d_date_sk#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(66) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#35] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 17] +Output [4]: [cs_quantity#3, cs_list_price#4, c_first_name#33, c_last_name#34] +Input [6]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#33, c_last_name#34, d_date_sk#35] + +(68) HashAggregate [codegen id : 17] +Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#33, c_last_name#34] +Keys [2]: [c_last_name#34, c_first_name#33] +Functions [1]: [partial_sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))] +Aggregate Attributes [2]: [sum#38, isEmpty#39] +Results [4]: [c_last_name#34, c_first_name#33, sum#40, isEmpty#41] + +(69) Exchange +Input [4]: [c_last_name#34, c_first_name#33, sum#40, isEmpty#41] +Arguments: hashpartitioning(c_last_name#34, c_first_name#33, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(70) HashAggregate [codegen id : 18] +Input [4]: [c_last_name#34, c_first_name#33, sum#40, isEmpty#41] +Keys [2]: [c_last_name#34, c_first_name#33] +Functions [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))] +Aggregate Attributes [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))#42] +Results [3]: [c_last_name#34, c_first_name#33, sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))#42 AS sales#43] + +(71) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#44, ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#48)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 23] +Input [5]: [ws_item_sk#44, ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] + +(73) Filter [codegen id : 23] +Input [5]: [ws_item_sk#44, ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] +Condition : isnotnull(ws_bill_customer_sk#45) + +(74) ReusedExchange [Reuses operator id: 25] +Output [1]: [item_sk#17] + +(75) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [ws_item_sk#44] +Right keys [1]: [item_sk#17] +Join type: LeftSemi +Join condition: None + +(76) Project [codegen id : 23] +Output [4]: [ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] +Input [5]: [ws_item_sk#44, ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] + +(77) Exchange +Input [4]: [ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] +Arguments: hashpartitioning(ws_bill_customer_sk#45, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(78) Sort [codegen id : 24] +Input [4]: [ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] +Arguments: [ws_bill_customer_sk#45 ASC NULLS FIRST], false, 0 + +(79) ReusedExchange [Reuses operator id: 41] +Output [3]: [c_customer_sk#23, sum#26, isEmpty#27] + +(80) HashAggregate [codegen id : 27] +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28] +Results [2]: [c_customer_sk#23, sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28 AS ssales#29] + +(81) Filter [codegen id : 27] +Input [2]: [c_customer_sk#23, ssales#29] +Condition : (isnotnull(ssales#29) AND (cast(ssales#29 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#30, [id=#31]))) + +(82) Project [codegen id : 27] +Output [1]: [c_customer_sk#23] +Input [2]: [c_customer_sk#23, ssales#29] + +(83) Sort [codegen id : 27] +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin [codegen id : 35] +Left keys [1]: [ws_bill_customer_sk#45] +Right keys [1]: [c_customer_sk#23] +Join type: LeftSemi +Join condition: None + +(85) ReusedExchange [Reuses operator id: 58] +Output [3]: [c_customer_sk#49, c_first_name#50, c_last_name#51] + +(86) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ws_bill_customer_sk#45] +Right keys [1]: [c_customer_sk#49] +Join type: Inner +Join condition: None + +(87) Project [codegen id : 35] +Output [5]: [ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48, c_first_name#50, c_last_name#51] +Input [7]: [ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48, c_customer_sk#49, c_first_name#50, c_last_name#51] + +(88) ReusedExchange [Reuses operator id: 65] +Output [1]: [d_date_sk#52] + +(89) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ws_sold_date_sk#48] +Right keys [1]: [d_date_sk#52] +Join type: Inner +Join condition: None + +(90) Project [codegen id : 35] +Output [4]: [ws_quantity#46, ws_list_price#47, c_first_name#50, c_last_name#51] +Input [6]: [ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48, c_first_name#50, c_last_name#51, d_date_sk#52] + +(91) HashAggregate [codegen id : 35] +Input [4]: [ws_quantity#46, ws_list_price#47, c_first_name#50, c_last_name#51] +Keys [2]: [c_last_name#51, c_first_name#50] +Functions [1]: [partial_sum((cast(ws_quantity#46 as decimal(10,0)) * ws_list_price#47))] +Aggregate Attributes [2]: [sum#53, isEmpty#54] +Results [4]: [c_last_name#51, c_first_name#50, sum#55, isEmpty#56] + +(92) Exchange +Input [4]: [c_last_name#51, c_first_name#50, sum#55, isEmpty#56] +Arguments: hashpartitioning(c_last_name#51, c_first_name#50, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(93) HashAggregate [codegen id : 36] +Input [4]: [c_last_name#51, c_first_name#50, sum#55, isEmpty#56] +Keys [2]: [c_last_name#51, c_first_name#50] +Functions [1]: [sum((cast(ws_quantity#46 as decimal(10,0)) * ws_list_price#47))] +Aggregate Attributes [1]: [sum((cast(ws_quantity#46 as decimal(10,0)) * ws_list_price#47))#57] +Results [3]: [c_last_name#51, c_first_name#50, sum((cast(ws_quantity#46 as decimal(10,0)) * ws_list_price#47))#57 AS sales#58] + +(94) Union + +(95) TakeOrderedAndProject +Input [3]: [c_last_name#34, c_first_name#33, sales#43] +Arguments: 100, [c_last_name#34 ASC NULLS FIRST, c_first_name#33 ASC NULLS FIRST, sales#43 ASC NULLS FIRST], [c_last_name#34, c_first_name#33, sales#43] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 43 Hosting Expression = Subquery scalar-subquery#30, [id=#31] +* HashAggregate (114) ++- Exchange (113) + +- * HashAggregate (112) + +- * HashAggregate (111) + +- Exchange (110) + +- * HashAggregate (109) + +- * Project (108) + +- * BroadcastHashJoin Inner BuildRight (107) + :- * Project (101) + : +- * BroadcastHashJoin Inner BuildRight (100) + : :- * Filter (98) + : : +- * ColumnarToRow (97) + : : +- Scan parquet spark_catalog.default.store_sales (96) + : +- ReusedExchange (99) + +- BroadcastExchange (106) + +- * Project (105) + +- * Filter (104) + +- * ColumnarToRow (103) + +- Scan parquet spark_catalog.default.date_dim (102) + + +(96) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#59, ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#62)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(97) ColumnarToRow [codegen id : 3] +Input [4]: [ss_customer_sk#59, ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62] + +(98) Filter [codegen id : 3] +Input [4]: [ss_customer_sk#59, ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62] +Condition : isnotnull(ss_customer_sk#59) + +(99) ReusedExchange [Reuses operator id: 37] +Output [1]: [c_customer_sk#63] + +(100) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#59] +Right keys [1]: [c_customer_sk#63] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 3] +Output [4]: [ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62, c_customer_sk#63] +Input [5]: [ss_customer_sk#59, ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62, c_customer_sk#63] + +(102) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#64, d_year#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(103) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#64, d_year#65] + +(104) Filter [codegen id : 2] +Input [2]: [d_date_sk#64, d_year#65] +Condition : (d_year#65 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#64)) + +(105) Project [codegen id : 2] +Output [1]: [d_date_sk#64] +Input [2]: [d_date_sk#64, d_year#65] + +(106) BroadcastExchange +Input [1]: [d_date_sk#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] + +(107) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#62] +Right keys [1]: [d_date_sk#64] +Join type: Inner +Join condition: None + +(108) Project [codegen id : 3] +Output [3]: [ss_quantity#60, ss_sales_price#61, c_customer_sk#63] +Input [5]: [ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62, c_customer_sk#63, d_date_sk#64] + +(109) HashAggregate [codegen id : 3] +Input [3]: [ss_quantity#60, ss_sales_price#61, c_customer_sk#63] +Keys [1]: [c_customer_sk#63] +Functions [1]: [partial_sum((cast(ss_quantity#60 as decimal(10,0)) * ss_sales_price#61))] +Aggregate Attributes [2]: [sum#66, isEmpty#67] +Results [3]: [c_customer_sk#63, sum#68, isEmpty#69] + +(110) Exchange +Input [3]: [c_customer_sk#63, sum#68, isEmpty#69] +Arguments: hashpartitioning(c_customer_sk#63, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(111) HashAggregate [codegen id : 4] +Input [3]: [c_customer_sk#63, sum#68, isEmpty#69] +Keys [1]: [c_customer_sk#63] +Functions [1]: [sum((cast(ss_quantity#60 as decimal(10,0)) * ss_sales_price#61))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#60 as decimal(10,0)) * ss_sales_price#61))#70] +Results [1]: [sum((cast(ss_quantity#60 as decimal(10,0)) * ss_sales_price#61))#70 AS csales#71] + +(112) HashAggregate [codegen id : 4] +Input [1]: [csales#71] +Keys: [] +Functions [1]: [partial_max(csales#71)] +Aggregate Attributes [1]: [max#72] +Results [1]: [max#73] + +(113) Exchange +Input [1]: [max#73] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] + +(114) HashAggregate [codegen id : 5] +Input [1]: [max#73] +Keys: [] +Functions [1]: [max(csales#71)] +Aggregate Attributes [1]: [max(csales#71)#74] +Results [1]: [max(csales#71)#74 AS tpcds_cmax#75] + +Subquery:2 Hosting operator id = 54 Hosting Expression = ReusedSubquery Subquery scalar-subquery#30, [id=#31] + +Subquery:3 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#30, [id=#31] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8099e5fea6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_datafusion/simplified.txt @@ -0,0 +1,181 @@ +TakeOrderedAndProject [c_last_name,c_first_name,sales] + Union + WholeStageCodegen (18) + HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),sales,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name] #1 + WholeStageCodegen (17) + HashAggregate [c_last_name,c_first_name,cs_quantity,cs_list_price] [sum,isEmpty,sum,isEmpty] + Project [cs_quantity,cs_list_price,c_first_name,c_last_name] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_list_price,cs_sold_date_sk,c_first_name,c_last_name] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (5) + Project [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + BroadcastHashJoin [cs_item_sk,item_sk] + Filter [cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Project [item_sk] + Filter [cnt] + HashAggregate [_groupingexpression,i_item_sk,d_date,count] [count(1),item_sk,cnt,count] + InputAdapter + Exchange [_groupingexpression,i_item_sk,d_date] #4 + WholeStageCodegen (3) + HashAggregate [_groupingexpression,i_item_sk,d_date] [count,count] + Project [d_date,i_item_sk,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + InputAdapter + WholeStageCodegen (9) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + Subquery #1 + WholeStageCodegen (5) + HashAggregate [max] [max(csales),tpcds_cmax,max] + InputAdapter + Exchange #9 + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),csales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [c_customer_sk] #8 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (8) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_quantity,ss_sales_price] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (15) + SortMergeJoin [c_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (11) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #13 + WholeStageCodegen (10) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (14) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (16) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (36) + HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),sales,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name] #15 + WholeStageCodegen (35) + HashAggregate [c_last_name,c_first_name,ws_quantity,ws_list_price] [sum,isEmpty,sum,isEmpty] + Project [ws_quantity,ws_list_price,c_first_name,c_last_name] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk,c_first_name,c_last_name] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (24) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #16 + WholeStageCodegen (23) + Project [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + BroadcastHashJoin [ws_item_sk,item_sk] + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [item_sk] #3 + InputAdapter + WholeStageCodegen (27) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #12 + InputAdapter + ReusedExchange [d_date_sk] #14 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..fd5c4ba2e7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_iceberg_compat/explain.txt @@ -0,0 +1,671 @@ +== Physical Plan == +TakeOrderedAndProject (95) ++- Union (94) + :- * HashAggregate (70) + : +- Exchange (69) + : +- * HashAggregate (68) + : +- * Project (67) + : +- * BroadcastHashJoin Inner BuildRight (66) + : :- * Project (60) + : : +- * BroadcastHashJoin Inner BuildRight (59) + : : :- * SortMergeJoin LeftSemi (46) + : : : :- * Sort (29) + : : : : +- Exchange (28) + : : : : +- * Project (27) + : : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : +- BroadcastExchange (25) + : : : : +- * Project (24) + : : : : +- * Filter (23) + : : : : +- * HashAggregate (22) + : : : : +- Exchange (21) + : : : : +- * HashAggregate (20) + : : : : +- * Project (19) + : : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : : :- * Project (13) + : : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : : :- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : : +- BroadcastExchange (11) + : : : : : +- * Project (10) + : : : : : +- * Filter (9) + : : : : : +- * ColumnarToRow (8) + : : : : : +- Scan parquet spark_catalog.default.date_dim (7) + : : : : +- BroadcastExchange (17) + : : : : +- * Filter (16) + : : : : +- * ColumnarToRow (15) + : : : : +- Scan parquet spark_catalog.default.item (14) + : : : +- * Sort (45) + : : : +- * Project (44) + : : : +- * Filter (43) + : : : +- * HashAggregate (42) + : : : +- Exchange (41) + : : : +- * HashAggregate (40) + : : : +- * Project (39) + : : : +- * BroadcastHashJoin Inner BuildRight (38) + : : : :- * Project (33) + : : : : +- * Filter (32) + : : : : +- * ColumnarToRow (31) + : : : : +- Scan parquet spark_catalog.default.store_sales (30) + : : : +- BroadcastExchange (37) + : : : +- * Filter (36) + : : : +- * ColumnarToRow (35) + : : : +- Scan parquet spark_catalog.default.customer (34) + : : +- BroadcastExchange (58) + : : +- * SortMergeJoin LeftSemi (57) + : : :- * Sort (51) + : : : +- Exchange (50) + : : : +- * Filter (49) + : : : +- * ColumnarToRow (48) + : : : +- Scan parquet spark_catalog.default.customer (47) + : : +- * Sort (56) + : : +- * Project (55) + : : +- * Filter (54) + : : +- * HashAggregate (53) + : : +- ReusedExchange (52) + : +- BroadcastExchange (65) + : +- * Project (64) + : +- * Filter (63) + : +- * ColumnarToRow (62) + : +- Scan parquet spark_catalog.default.date_dim (61) + +- * HashAggregate (93) + +- Exchange (92) + +- * HashAggregate (91) + +- * Project (90) + +- * BroadcastHashJoin Inner BuildRight (89) + :- * Project (87) + : +- * BroadcastHashJoin Inner BuildRight (86) + : :- * SortMergeJoin LeftSemi (84) + : : :- * Sort (78) + : : : +- Exchange (77) + : : : +- * Project (76) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (75) + : : : :- * Filter (73) + : : : : +- * ColumnarToRow (72) + : : : : +- Scan parquet spark_catalog.default.web_sales (71) + : : : +- ReusedExchange (74) + : : +- * Sort (83) + : : +- * Project (82) + : : +- * Filter (81) + : : +- * HashAggregate (80) + : : +- ReusedExchange (79) + : +- ReusedExchange (85) + +- ReusedExchange (88) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_bill_customer_sk#1) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 3] +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] + +(6) Filter [codegen id : 3] +Input [2]: [ss_item_sk#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_item_sk#6) + +(7) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [2]: [d_date_sk#8, d_date#9] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(11) BroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(12) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(13) Project [codegen id : 3] +Output [2]: [ss_item_sk#6, d_date#9] +Input [4]: [ss_item_sk#6, ss_sold_date_sk#7, d_date_sk#8, d_date#9] + +(14) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#11, i_item_desc#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#11, i_item_desc#12] + +(16) Filter [codegen id : 2] +Input [2]: [i_item_sk#11, i_item_desc#12] +Condition : isnotnull(i_item_sk#11) + +(17) BroadcastExchange +Input [2]: [i_item_sk#11, i_item_desc#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(18) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#6] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 3] +Output [3]: [d_date#9, i_item_sk#11, substr(i_item_desc#12, 1, 30) AS _groupingexpression#13] +Input [4]: [ss_item_sk#6, d_date#9, i_item_sk#11, i_item_desc#12] + +(20) HashAggregate [codegen id : 3] +Input [3]: [d_date#9, i_item_sk#11, _groupingexpression#13] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#14] +Results [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] + +(21) Exchange +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] +Arguments: hashpartitioning(_groupingexpression#13, i_item_sk#11, d_date#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(22) HashAggregate [codegen id : 4] +Input [4]: [_groupingexpression#13, i_item_sk#11, d_date#9, count#15] +Keys [3]: [_groupingexpression#13, i_item_sk#11, d_date#9] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#16] +Results [2]: [i_item_sk#11 AS item_sk#17, count(1)#16 AS cnt#18] + +(23) Filter [codegen id : 4] +Input [2]: [item_sk#17, cnt#18] +Condition : (cnt#18 > 4) + +(24) Project [codegen id : 4] +Output [1]: [item_sk#17] +Input [2]: [item_sk#17, cnt#18] + +(25) BroadcastExchange +Input [1]: [item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [item_sk#17] +Join type: LeftSemi +Join condition: None + +(27) Project [codegen id : 5] +Output [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] + +(28) Exchange +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) Sort [codegen id : 6] +Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] +Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 + +(30) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] + +(32) Filter [codegen id : 8] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] +Condition : isnotnull(ss_customer_sk#19) + +(33) Project [codegen id : 8] +Output [3]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, ss_sold_date_sk#22] + +(34) Scan parquet spark_catalog.default.customer +Output [1]: [c_customer_sk#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [1]: [c_customer_sk#23] + +(36) Filter [codegen id : 7] +Input [1]: [c_customer_sk#23] +Condition : isnotnull(c_customer_sk#23) + +(37) BroadcastExchange +Input [1]: [c_customer_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#19] +Right keys [1]: [c_customer_sk#23] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 8] +Output [3]: [ss_quantity#20, ss_sales_price#21, c_customer_sk#23] +Input [4]: [ss_customer_sk#19, ss_quantity#20, ss_sales_price#21, c_customer_sk#23] + +(40) HashAggregate [codegen id : 8] +Input [3]: [ss_quantity#20, ss_sales_price#21, c_customer_sk#23] +Keys [1]: [c_customer_sk#23] +Functions [1]: [partial_sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [2]: [sum#24, isEmpty#25] +Results [3]: [c_customer_sk#23, sum#26, isEmpty#27] + +(41) Exchange +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Arguments: hashpartitioning(c_customer_sk#23, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(42) HashAggregate [codegen id : 9] +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28] +Results [2]: [c_customer_sk#23, sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28 AS ssales#29] + +(43) Filter [codegen id : 9] +Input [2]: [c_customer_sk#23, ssales#29] +Condition : (isnotnull(ssales#29) AND (cast(ssales#29 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#30, [id=#31]))) + +(44) Project [codegen id : 9] +Output [1]: [c_customer_sk#23] +Input [2]: [c_customer_sk#23, ssales#29] + +(45) Sort [codegen id : 9] +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 + +(46) SortMergeJoin [codegen id : 17] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#23] +Join type: LeftSemi +Join condition: None + +(47) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 10] +Input [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] + +(49) Filter [codegen id : 10] +Input [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] +Condition : isnotnull(c_customer_sk#32) + +(50) Exchange +Input [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] +Arguments: hashpartitioning(c_customer_sk#32, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) Sort [codegen id : 11] +Input [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] +Arguments: [c_customer_sk#32 ASC NULLS FIRST], false, 0 + +(52) ReusedExchange [Reuses operator id: 41] +Output [3]: [c_customer_sk#23, sum#26, isEmpty#27] + +(53) HashAggregate [codegen id : 14] +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28] +Results [2]: [c_customer_sk#23, sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28 AS ssales#29] + +(54) Filter [codegen id : 14] +Input [2]: [c_customer_sk#23, ssales#29] +Condition : (isnotnull(ssales#29) AND (cast(ssales#29 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#30, [id=#31]))) + +(55) Project [codegen id : 14] +Output [1]: [c_customer_sk#23] +Input [2]: [c_customer_sk#23, ssales#29] + +(56) Sort [codegen id : 14] +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 + +(57) SortMergeJoin [codegen id : 15] +Left keys [1]: [c_customer_sk#32] +Right keys [1]: [c_customer_sk#23] +Join type: LeftSemi +Join condition: None + +(58) BroadcastExchange +Input [3]: [c_customer_sk#32, c_first_name#33, c_last_name#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#32] +Join type: Inner +Join condition: None + +(60) Project [codegen id : 17] +Output [5]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#33, c_last_name#34] +Input [7]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_customer_sk#32, c_first_name#33, c_last_name#34] + +(61) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#35, d_year#36, d_moy#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 16] +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] + +(63) Filter [codegen id : 16] +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] +Condition : ((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2000)) AND (d_moy#37 = 2)) AND isnotnull(d_date_sk#35)) + +(64) Project [codegen id : 16] +Output [1]: [d_date_sk#35] +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] + +(65) BroadcastExchange +Input [1]: [d_date_sk#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(66) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#35] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 17] +Output [4]: [cs_quantity#3, cs_list_price#4, c_first_name#33, c_last_name#34] +Input [6]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#33, c_last_name#34, d_date_sk#35] + +(68) HashAggregate [codegen id : 17] +Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#33, c_last_name#34] +Keys [2]: [c_last_name#34, c_first_name#33] +Functions [1]: [partial_sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))] +Aggregate Attributes [2]: [sum#38, isEmpty#39] +Results [4]: [c_last_name#34, c_first_name#33, sum#40, isEmpty#41] + +(69) Exchange +Input [4]: [c_last_name#34, c_first_name#33, sum#40, isEmpty#41] +Arguments: hashpartitioning(c_last_name#34, c_first_name#33, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(70) HashAggregate [codegen id : 18] +Input [4]: [c_last_name#34, c_first_name#33, sum#40, isEmpty#41] +Keys [2]: [c_last_name#34, c_first_name#33] +Functions [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))] +Aggregate Attributes [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))#42] +Results [3]: [c_last_name#34, c_first_name#33, sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))#42 AS sales#43] + +(71) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#44, ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#48)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 23] +Input [5]: [ws_item_sk#44, ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] + +(73) Filter [codegen id : 23] +Input [5]: [ws_item_sk#44, ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] +Condition : isnotnull(ws_bill_customer_sk#45) + +(74) ReusedExchange [Reuses operator id: 25] +Output [1]: [item_sk#17] + +(75) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [ws_item_sk#44] +Right keys [1]: [item_sk#17] +Join type: LeftSemi +Join condition: None + +(76) Project [codegen id : 23] +Output [4]: [ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] +Input [5]: [ws_item_sk#44, ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] + +(77) Exchange +Input [4]: [ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] +Arguments: hashpartitioning(ws_bill_customer_sk#45, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(78) Sort [codegen id : 24] +Input [4]: [ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48] +Arguments: [ws_bill_customer_sk#45 ASC NULLS FIRST], false, 0 + +(79) ReusedExchange [Reuses operator id: 41] +Output [3]: [c_customer_sk#23, sum#26, isEmpty#27] + +(80) HashAggregate [codegen id : 27] +Input [3]: [c_customer_sk#23, sum#26, isEmpty#27] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28] +Results [2]: [c_customer_sk#23, sum((cast(ss_quantity#20 as decimal(10,0)) * ss_sales_price#21))#28 AS ssales#29] + +(81) Filter [codegen id : 27] +Input [2]: [c_customer_sk#23, ssales#29] +Condition : (isnotnull(ssales#29) AND (cast(ssales#29 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#30, [id=#31]))) + +(82) Project [codegen id : 27] +Output [1]: [c_customer_sk#23] +Input [2]: [c_customer_sk#23, ssales#29] + +(83) Sort [codegen id : 27] +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin [codegen id : 35] +Left keys [1]: [ws_bill_customer_sk#45] +Right keys [1]: [c_customer_sk#23] +Join type: LeftSemi +Join condition: None + +(85) ReusedExchange [Reuses operator id: 58] +Output [3]: [c_customer_sk#49, c_first_name#50, c_last_name#51] + +(86) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ws_bill_customer_sk#45] +Right keys [1]: [c_customer_sk#49] +Join type: Inner +Join condition: None + +(87) Project [codegen id : 35] +Output [5]: [ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48, c_first_name#50, c_last_name#51] +Input [7]: [ws_bill_customer_sk#45, ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48, c_customer_sk#49, c_first_name#50, c_last_name#51] + +(88) ReusedExchange [Reuses operator id: 65] +Output [1]: [d_date_sk#52] + +(89) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ws_sold_date_sk#48] +Right keys [1]: [d_date_sk#52] +Join type: Inner +Join condition: None + +(90) Project [codegen id : 35] +Output [4]: [ws_quantity#46, ws_list_price#47, c_first_name#50, c_last_name#51] +Input [6]: [ws_quantity#46, ws_list_price#47, ws_sold_date_sk#48, c_first_name#50, c_last_name#51, d_date_sk#52] + +(91) HashAggregate [codegen id : 35] +Input [4]: [ws_quantity#46, ws_list_price#47, c_first_name#50, c_last_name#51] +Keys [2]: [c_last_name#51, c_first_name#50] +Functions [1]: [partial_sum((cast(ws_quantity#46 as decimal(10,0)) * ws_list_price#47))] +Aggregate Attributes [2]: [sum#53, isEmpty#54] +Results [4]: [c_last_name#51, c_first_name#50, sum#55, isEmpty#56] + +(92) Exchange +Input [4]: [c_last_name#51, c_first_name#50, sum#55, isEmpty#56] +Arguments: hashpartitioning(c_last_name#51, c_first_name#50, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(93) HashAggregate [codegen id : 36] +Input [4]: [c_last_name#51, c_first_name#50, sum#55, isEmpty#56] +Keys [2]: [c_last_name#51, c_first_name#50] +Functions [1]: [sum((cast(ws_quantity#46 as decimal(10,0)) * ws_list_price#47))] +Aggregate Attributes [1]: [sum((cast(ws_quantity#46 as decimal(10,0)) * ws_list_price#47))#57] +Results [3]: [c_last_name#51, c_first_name#50, sum((cast(ws_quantity#46 as decimal(10,0)) * ws_list_price#47))#57 AS sales#58] + +(94) Union + +(95) TakeOrderedAndProject +Input [3]: [c_last_name#34, c_first_name#33, sales#43] +Arguments: 100, [c_last_name#34 ASC NULLS FIRST, c_first_name#33 ASC NULLS FIRST, sales#43 ASC NULLS FIRST], [c_last_name#34, c_first_name#33, sales#43] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 43 Hosting Expression = Subquery scalar-subquery#30, [id=#31] +* HashAggregate (114) ++- Exchange (113) + +- * HashAggregate (112) + +- * HashAggregate (111) + +- Exchange (110) + +- * HashAggregate (109) + +- * Project (108) + +- * BroadcastHashJoin Inner BuildRight (107) + :- * Project (101) + : +- * BroadcastHashJoin Inner BuildRight (100) + : :- * Filter (98) + : : +- * ColumnarToRow (97) + : : +- Scan parquet spark_catalog.default.store_sales (96) + : +- ReusedExchange (99) + +- BroadcastExchange (106) + +- * Project (105) + +- * Filter (104) + +- * ColumnarToRow (103) + +- Scan parquet spark_catalog.default.date_dim (102) + + +(96) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#59, ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#62)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(97) ColumnarToRow [codegen id : 3] +Input [4]: [ss_customer_sk#59, ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62] + +(98) Filter [codegen id : 3] +Input [4]: [ss_customer_sk#59, ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62] +Condition : isnotnull(ss_customer_sk#59) + +(99) ReusedExchange [Reuses operator id: 37] +Output [1]: [c_customer_sk#63] + +(100) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#59] +Right keys [1]: [c_customer_sk#63] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 3] +Output [4]: [ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62, c_customer_sk#63] +Input [5]: [ss_customer_sk#59, ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62, c_customer_sk#63] + +(102) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#64, d_year#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(103) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#64, d_year#65] + +(104) Filter [codegen id : 2] +Input [2]: [d_date_sk#64, d_year#65] +Condition : (d_year#65 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#64)) + +(105) Project [codegen id : 2] +Output [1]: [d_date_sk#64] +Input [2]: [d_date_sk#64, d_year#65] + +(106) BroadcastExchange +Input [1]: [d_date_sk#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] + +(107) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#62] +Right keys [1]: [d_date_sk#64] +Join type: Inner +Join condition: None + +(108) Project [codegen id : 3] +Output [3]: [ss_quantity#60, ss_sales_price#61, c_customer_sk#63] +Input [5]: [ss_quantity#60, ss_sales_price#61, ss_sold_date_sk#62, c_customer_sk#63, d_date_sk#64] + +(109) HashAggregate [codegen id : 3] +Input [3]: [ss_quantity#60, ss_sales_price#61, c_customer_sk#63] +Keys [1]: [c_customer_sk#63] +Functions [1]: [partial_sum((cast(ss_quantity#60 as decimal(10,0)) * ss_sales_price#61))] +Aggregate Attributes [2]: [sum#66, isEmpty#67] +Results [3]: [c_customer_sk#63, sum#68, isEmpty#69] + +(110) Exchange +Input [3]: [c_customer_sk#63, sum#68, isEmpty#69] +Arguments: hashpartitioning(c_customer_sk#63, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(111) HashAggregate [codegen id : 4] +Input [3]: [c_customer_sk#63, sum#68, isEmpty#69] +Keys [1]: [c_customer_sk#63] +Functions [1]: [sum((cast(ss_quantity#60 as decimal(10,0)) * ss_sales_price#61))] +Aggregate Attributes [1]: [sum((cast(ss_quantity#60 as decimal(10,0)) * ss_sales_price#61))#70] +Results [1]: [sum((cast(ss_quantity#60 as decimal(10,0)) * ss_sales_price#61))#70 AS csales#71] + +(112) HashAggregate [codegen id : 4] +Input [1]: [csales#71] +Keys: [] +Functions [1]: [partial_max(csales#71)] +Aggregate Attributes [1]: [max#72] +Results [1]: [max#73] + +(113) Exchange +Input [1]: [max#73] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] + +(114) HashAggregate [codegen id : 5] +Input [1]: [max#73] +Keys: [] +Functions [1]: [max(csales#71)] +Aggregate Attributes [1]: [max(csales#71)#74] +Results [1]: [max(csales#71)#74 AS tpcds_cmax#75] + +Subquery:2 Hosting operator id = 54 Hosting Expression = ReusedSubquery Subquery scalar-subquery#30, [id=#31] + +Subquery:3 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#30, [id=#31] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8099e5fea6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.native_iceberg_compat/simplified.txt @@ -0,0 +1,181 @@ +TakeOrderedAndProject [c_last_name,c_first_name,sales] + Union + WholeStageCodegen (18) + HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),sales,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name] #1 + WholeStageCodegen (17) + HashAggregate [c_last_name,c_first_name,cs_quantity,cs_list_price] [sum,isEmpty,sum,isEmpty] + Project [cs_quantity,cs_list_price,c_first_name,c_last_name] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_list_price,cs_sold_date_sk,c_first_name,c_last_name] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (6) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (5) + Project [cs_bill_customer_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + BroadcastHashJoin [cs_item_sk,item_sk] + Filter [cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Project [item_sk] + Filter [cnt] + HashAggregate [_groupingexpression,i_item_sk,d_date,count] [count(1),item_sk,cnt,count] + InputAdapter + Exchange [_groupingexpression,i_item_sk,d_date] #4 + WholeStageCodegen (3) + HashAggregate [_groupingexpression,i_item_sk,d_date] [count,count] + Project [d_date,i_item_sk,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + InputAdapter + WholeStageCodegen (9) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + Subquery #1 + WholeStageCodegen (5) + HashAggregate [max] [max(csales),tpcds_cmax,max] + InputAdapter + Exchange #9 + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),csales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_sales_price,ss_sold_date_sk,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [c_customer_sk] #8 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (8) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_quantity,ss_sales_price] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (15) + SortMergeJoin [c_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (11) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #13 + WholeStageCodegen (10) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + WholeStageCodegen (14) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (16) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (36) + HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),sales,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name] #15 + WholeStageCodegen (35) + HashAggregate [c_last_name,c_first_name,ws_quantity,ws_list_price] [sum,isEmpty,sum,isEmpty] + Project [ws_quantity,ws_list_price,c_first_name,c_last_name] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk,c_first_name,c_last_name] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (24) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #16 + WholeStageCodegen (23) + Project [ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + BroadcastHashJoin [ws_item_sk,item_sk] + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [item_sk] #3 + InputAdapter + WholeStageCodegen (27) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [ssales] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #12 + InputAdapter + ReusedExchange [d_date_sk] #14 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_datafusion/explain.txt new file mode 100644 index 0000000000..454a150d12 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_datafusion/explain.txt @@ -0,0 +1,427 @@ +== Physical Plan == +* Filter (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Project (14) + : : : : +- * SortMergeJoin Inner (13) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- * Sort (12) + : : : : +- Exchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet spark_catalog.default.store_returns (7) + : : : +- BroadcastExchange (19) + : : : +- * Project (18) + : : : +- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet spark_catalog.default.store (15) + : : +- BroadcastExchange (25) + : : +- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet spark_catalog.default.item (22) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet spark_catalog.default.customer (28) + +- BroadcastExchange (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.customer_address (34) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Project [codegen id : 1] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(5) Exchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(9) Filter [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(10) Project [codegen id : 3] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(11) Exchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(13) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(14) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(15) Scan parquet spark_catalog.default.store +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(17) Filter [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(18) Project [codegen id : 5] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(19) BroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(22) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) + +(25) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(28) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(30) Filter [codegen id : 7] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) + +(31) BroadcastExchange +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(34) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(36) Filter [codegen id : 8] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(37) BroadcastExchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(40) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] + +(41) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(42) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(43) HashAggregate [codegen id : 10] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] + +(44) Exchange +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(45) HashAggregate [codegen id : 11] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] + +(46) Filter [codegen id : 11] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (73) ++- Exchange (72) + +- * HashAggregate (71) + +- * HashAggregate (70) + +- Exchange (69) + +- * HashAggregate (68) + +- * Project (67) + +- * BroadcastHashJoin Inner BuildRight (66) + :- * Project (64) + : +- * BroadcastHashJoin Inner BuildRight (63) + : :- * Project (61) + : : +- * BroadcastHashJoin Inner BuildRight (60) + : : :- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * Project (52) + : : : : +- * SortMergeJoin Inner (51) + : : : : :- * Sort (48) + : : : : : +- ReusedExchange (47) + : : : : +- * Sort (50) + : : : : +- ReusedExchange (49) + : : : +- ReusedExchange (53) + : : +- BroadcastExchange (59) + : : +- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet spark_catalog.default.item (56) + : +- ReusedExchange (62) + +- ReusedExchange (65) + + +(47) ReusedExchange [Reuses operator id: 5] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(48) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(49) ReusedExchange [Reuses operator id: 11] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] + +(50) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(51) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(52) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(53) ReusedExchange [Reuses operator id: 19] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(54) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(56) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(58) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(59) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(60) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(62) ReusedExchange [Reuses operator id: 31] +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(63) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 9] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(65) ReusedExchange [Reuses operator id: 37] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(66) BroadcastHashJoin [codegen id : 9] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(68) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#40] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] + +(69) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(70) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(71) HashAggregate [codegen id : 10] +Input [1]: [netpaid#31] +Keys: [] +Functions [1]: [partial_avg(netpaid#31)] +Aggregate Attributes [2]: [sum#42, count#43] +Results [2]: [sum#44, count#45] + +(72) Exchange +Input [2]: [sum#44, count#45] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 11] +Input [2]: [sum#44, count#45] +Keys: [] +Functions [1]: [avg(netpaid#31)] +Aggregate Attributes [1]: [avg(netpaid#31)#46] +Results [1]: [(0.05 * avg(netpaid#31)#46) AS (0.05 * avg(netpaid))#47] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bd820d7de7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_datafusion/simplified.txt @@ -0,0 +1,118 @@ +WholeStageCodegen (11) + Filter [paid] + Subquery #1 + WholeStageCodegen (11) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen (10) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #10 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #3 + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] #4 + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + InputAdapter + ReusedExchange [ca_state,ca_zip,ca_country] #8 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #3 + WholeStageCodegen (1) + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #4 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [c_customer_sk,c_birth_country] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..454a150d12 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_iceberg_compat/explain.txt @@ -0,0 +1,427 @@ +== Physical Plan == +* Filter (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Project (14) + : : : : +- * SortMergeJoin Inner (13) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- * Sort (12) + : : : : +- Exchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet spark_catalog.default.store_returns (7) + : : : +- BroadcastExchange (19) + : : : +- * Project (18) + : : : +- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet spark_catalog.default.store (15) + : : +- BroadcastExchange (25) + : : +- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet spark_catalog.default.item (22) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet spark_catalog.default.customer (28) + +- BroadcastExchange (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.customer_address (34) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Project [codegen id : 1] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(5) Exchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(9) Filter [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(10) Project [codegen id : 3] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(11) Exchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(13) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(14) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(15) Scan parquet spark_catalog.default.store +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(17) Filter [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(18) Project [codegen id : 5] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(19) BroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(22) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) + +(25) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(28) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(30) Filter [codegen id : 7] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) + +(31) BroadcastExchange +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(34) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(36) Filter [codegen id : 8] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(37) BroadcastExchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(40) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] + +(41) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(42) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(43) HashAggregate [codegen id : 10] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] + +(44) Exchange +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(45) HashAggregate [codegen id : 11] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] + +(46) Filter [codegen id : 11] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (73) ++- Exchange (72) + +- * HashAggregate (71) + +- * HashAggregate (70) + +- Exchange (69) + +- * HashAggregate (68) + +- * Project (67) + +- * BroadcastHashJoin Inner BuildRight (66) + :- * Project (64) + : +- * BroadcastHashJoin Inner BuildRight (63) + : :- * Project (61) + : : +- * BroadcastHashJoin Inner BuildRight (60) + : : :- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * Project (52) + : : : : +- * SortMergeJoin Inner (51) + : : : : :- * Sort (48) + : : : : : +- ReusedExchange (47) + : : : : +- * Sort (50) + : : : : +- ReusedExchange (49) + : : : +- ReusedExchange (53) + : : +- BroadcastExchange (59) + : : +- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet spark_catalog.default.item (56) + : +- ReusedExchange (62) + +- ReusedExchange (65) + + +(47) ReusedExchange [Reuses operator id: 5] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(48) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(49) ReusedExchange [Reuses operator id: 11] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] + +(50) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(51) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(52) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(53) ReusedExchange [Reuses operator id: 19] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(54) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(56) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(58) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(59) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(60) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(62) ReusedExchange [Reuses operator id: 31] +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(63) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 9] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(65) ReusedExchange [Reuses operator id: 37] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(66) BroadcastHashJoin [codegen id : 9] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(68) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#40] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] + +(69) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(70) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(71) HashAggregate [codegen id : 10] +Input [1]: [netpaid#31] +Keys: [] +Functions [1]: [partial_avg(netpaid#31)] +Aggregate Attributes [2]: [sum#42, count#43] +Results [2]: [sum#44, count#45] + +(72) Exchange +Input [2]: [sum#44, count#45] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 11] +Input [2]: [sum#44, count#45] +Keys: [] +Functions [1]: [avg(netpaid#31)] +Aggregate Attributes [1]: [avg(netpaid#31)#46] +Results [1]: [(0.05 * avg(netpaid#31)#46) AS (0.05 * avg(netpaid))#47] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bd820d7de7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.native_iceberg_compat/simplified.txt @@ -0,0 +1,118 @@ +WholeStageCodegen (11) + Filter [paid] + Subquery #1 + WholeStageCodegen (11) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen (10) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #10 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #3 + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] #4 + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + InputAdapter + ReusedExchange [ca_state,ca_zip,ca_country] #8 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #3 + WholeStageCodegen (1) + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #4 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [c_customer_sk,c_birth_country] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_datafusion/explain.txt new file mode 100644 index 0000000000..36e95dab7b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_datafusion/explain.txt @@ -0,0 +1,427 @@ +== Physical Plan == +* Filter (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Project (14) + : : : : +- * SortMergeJoin Inner (13) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- * Sort (12) + : : : : +- Exchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet spark_catalog.default.store_returns (7) + : : : +- BroadcastExchange (19) + : : : +- * Project (18) + : : : +- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet spark_catalog.default.store (15) + : : +- BroadcastExchange (25) + : : +- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet spark_catalog.default.item (22) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet spark_catalog.default.customer (28) + +- BroadcastExchange (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.customer_address (34) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Project [codegen id : 1] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(5) Exchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(9) Filter [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(10) Project [codegen id : 3] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(11) Exchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(13) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(14) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(15) Scan parquet spark_catalog.default.store +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(17) Filter [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(18) Project [codegen id : 5] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(19) BroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(22) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = chiffon )) AND isnotnull(i_item_sk#15)) + +(25) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(28) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(30) Filter [codegen id : 7] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) + +(31) BroadcastExchange +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(34) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(36) Filter [codegen id : 8] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(37) BroadcastExchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(40) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] + +(41) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(42) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(43) HashAggregate [codegen id : 10] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] + +(44) Exchange +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(45) HashAggregate [codegen id : 11] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] + +(46) Filter [codegen id : 11] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (73) ++- Exchange (72) + +- * HashAggregate (71) + +- * HashAggregate (70) + +- Exchange (69) + +- * HashAggregate (68) + +- * Project (67) + +- * BroadcastHashJoin Inner BuildRight (66) + :- * Project (64) + : +- * BroadcastHashJoin Inner BuildRight (63) + : :- * Project (61) + : : +- * BroadcastHashJoin Inner BuildRight (60) + : : :- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * Project (52) + : : : : +- * SortMergeJoin Inner (51) + : : : : :- * Sort (48) + : : : : : +- ReusedExchange (47) + : : : : +- * Sort (50) + : : : : +- ReusedExchange (49) + : : : +- ReusedExchange (53) + : : +- BroadcastExchange (59) + : : +- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet spark_catalog.default.item (56) + : +- ReusedExchange (62) + +- ReusedExchange (65) + + +(47) ReusedExchange [Reuses operator id: 5] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(48) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(49) ReusedExchange [Reuses operator id: 11] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] + +(50) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(51) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(52) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(53) ReusedExchange [Reuses operator id: 19] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(54) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(56) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(58) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(59) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(60) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(62) ReusedExchange [Reuses operator id: 31] +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(63) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 9] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(65) ReusedExchange [Reuses operator id: 37] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(66) BroadcastHashJoin [codegen id : 9] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(68) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#40] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] + +(69) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(70) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(71) HashAggregate [codegen id : 10] +Input [1]: [netpaid#31] +Keys: [] +Functions [1]: [partial_avg(netpaid#31)] +Aggregate Attributes [2]: [sum#42, count#43] +Results [2]: [sum#44, count#45] + +(72) Exchange +Input [2]: [sum#44, count#45] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 11] +Input [2]: [sum#44, count#45] +Keys: [] +Functions [1]: [avg(netpaid#31)] +Aggregate Attributes [1]: [avg(netpaid#31)#46] +Results [1]: [(0.05 * avg(netpaid#31)#46) AS (0.05 * avg(netpaid))#47] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bd820d7de7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_datafusion/simplified.txt @@ -0,0 +1,118 @@ +WholeStageCodegen (11) + Filter [paid] + Subquery #1 + WholeStageCodegen (11) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen (10) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #10 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #3 + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] #4 + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + InputAdapter + ReusedExchange [ca_state,ca_zip,ca_country] #8 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #3 + WholeStageCodegen (1) + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #4 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [c_customer_sk,c_birth_country] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..36e95dab7b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_iceberg_compat/explain.txt @@ -0,0 +1,427 @@ +== Physical Plan == +* Filter (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Project (14) + : : : : +- * SortMergeJoin Inner (13) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- * Sort (12) + : : : : +- Exchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet spark_catalog.default.store_returns (7) + : : : +- BroadcastExchange (19) + : : : +- * Project (18) + : : : +- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet spark_catalog.default.store (15) + : : +- BroadcastExchange (25) + : : +- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet spark_catalog.default.item (22) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet spark_catalog.default.customer (28) + +- BroadcastExchange (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.customer_address (34) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Project [codegen id : 1] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(5) Exchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(9) Filter [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(10) Project [codegen id : 3] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(11) Exchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(13) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(14) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(15) Scan parquet spark_catalog.default.store +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(17) Filter [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(18) Project [codegen id : 5] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(19) BroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(22) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = chiffon )) AND isnotnull(i_item_sk#15)) + +(25) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(28) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(30) Filter [codegen id : 7] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) + +(31) BroadcastExchange +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(34) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(36) Filter [codegen id : 8] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) + +(37) BroadcastExchange +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(40) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] + +(41) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(42) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(43) HashAggregate [codegen id : 10] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] + +(44) Exchange +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(45) HashAggregate [codegen id : 11] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] + +(46) Filter [codegen id : 11] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#38, [id=#39] +* HashAggregate (73) ++- Exchange (72) + +- * HashAggregate (71) + +- * HashAggregate (70) + +- Exchange (69) + +- * HashAggregate (68) + +- * Project (67) + +- * BroadcastHashJoin Inner BuildRight (66) + :- * Project (64) + : +- * BroadcastHashJoin Inner BuildRight (63) + : :- * Project (61) + : : +- * BroadcastHashJoin Inner BuildRight (60) + : : :- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * Project (52) + : : : : +- * SortMergeJoin Inner (51) + : : : : :- * Sort (48) + : : : : : +- ReusedExchange (47) + : : : : +- * Sort (50) + : : : : +- ReusedExchange (49) + : : : +- ReusedExchange (53) + : : +- BroadcastExchange (59) + : : +- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet spark_catalog.default.item (56) + : +- ReusedExchange (62) + +- ReusedExchange (65) + + +(47) ReusedExchange [Reuses operator id: 5] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(48) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(49) ReusedExchange [Reuses operator id: 11] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] + +(50) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(51) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(52) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(53) ReusedExchange [Reuses operator id: 19] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(54) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(56) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(58) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(59) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(60) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(62) ReusedExchange [Reuses operator id: 31] +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(63) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 9] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] + +(65) ReusedExchange [Reuses operator id: 37] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] + +(66) BroadcastHashJoin [codegen id : 9] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] + +(68) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#40] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] + +(69) Exchange +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(70) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] + +(71) HashAggregate [codegen id : 10] +Input [1]: [netpaid#31] +Keys: [] +Functions [1]: [partial_avg(netpaid#31)] +Aggregate Attributes [2]: [sum#42, count#43] +Results [2]: [sum#44, count#45] + +(72) Exchange +Input [2]: [sum#44, count#45] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 11] +Input [2]: [sum#44, count#45] +Keys: [] +Functions [1]: [avg(netpaid#31)] +Aggregate Attributes [1]: [avg(netpaid#31)#46] +Results [1]: [(0.05 * avg(netpaid#31)#46) AS (0.05 * avg(netpaid))#47] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bd820d7de7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.native_iceberg_compat/simplified.txt @@ -0,0 +1,118 @@ +WholeStageCodegen (11) + Filter [paid] + Subquery #1 + WholeStageCodegen (11) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen (10) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #10 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #3 + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] #4 + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #5 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7 + InputAdapter + ReusedExchange [ca_state,ca_zip,ca_country] #8 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #3 + WholeStageCodegen (1) + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #4 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [c_customer_sk,c_birth_country] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_datafusion/explain.txt new file mode 100644 index 0000000000..07f3e28387 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_datafusion/explain.txt @@ -0,0 +1,279 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet spark_catalog.default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet spark_catalog.default.date_dim (23) + : : +- ReusedExchange (30) + : +- BroadcastExchange (36) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet spark_catalog.default.store (33) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet spark_catalog.default.item (39) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#11)] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(7) BroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] +Right keys [3]: [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] + +(10) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] + +(12) Filter [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#8, sr_item_sk#7] +Right keys [2]: [cs_bill_customer_sk#12, cs_item_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 4)) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#16] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(20) BroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#16] + +(23) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] + +(25) Filter [codegen id : 4] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : (((((isnotnull(d_moy#21) AND isnotnull(d_year#20)) AND (d_moy#21 >= 4)) AND (d_moy#21 <= 10)) AND (d_year#20 = 2001)) AND isnotnull(d_date_sk#19)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#19] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] + +(27) BroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#11] +Right keys [1]: [d_date_sk#19] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#19] + +(30) ReusedExchange [Reuses operator id: 27] +Output [1]: [d_date_sk#22] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 8] +Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#22] + +(33) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] + +(35) Filter [codegen id : 6] +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Condition : isnotnull(s_store_sk#23) + +(36) BroadcastExchange +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(37) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#23] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 8] +Output [6]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_sk#23, s_store_id#24, s_store_name#25] + +(39) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] + +(41) Filter [codegen id : 7] +Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Condition : isnotnull(i_item_sk#26) + +(42) BroadcastExchange +Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(43) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 8] +Output [7]: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] +Input [9]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_sk#26, i_item_id#27, i_item_desc#28] + +(45) HashAggregate [codegen id : 8] +Input [7]: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] +Keys [4]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#10)), partial_sum(UnscaledValue(cs_net_profit#14))] +Aggregate Attributes [3]: [sum#29, sum#30, sum#31] +Results [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#32, sum#33, sum#34] + +(46) Exchange +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(47) HashAggregate [codegen id : 9] +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#32, sum#33, sum#34] +Keys [4]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25] +Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#10)), sum(UnscaledValue(cs_net_profit#14))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#5))#35, sum(UnscaledValue(sr_net_loss#10))#36, sum(UnscaledValue(cs_net_profit#14))#37] +Results [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#35,17,2) AS store_sales_profit#38, MakeDecimal(sum(UnscaledValue(sr_net_loss#10))#36,17,2) AS store_returns_loss#39, MakeDecimal(sum(UnscaledValue(cs_net_profit#14))#37,17,2) AS catalog_sales_profit#40] + +(48) TakeOrderedAndProject +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#38, store_returns_loss#39, catalog_sales_profit#40] +Arguments: 100, [i_item_id#27 ASC NULLS FIRST, i_item_desc#28 ASC NULLS FIRST, s_store_id#24 ASC NULLS FIRST, s_store_name#25 ASC NULLS FIRST], [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#38, store_returns_loss#39, catalog_sales_profit#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_datafusion/simplified.txt new file mode 100644 index 0000000000..7e012d5563 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_datafusion/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_profit,store_returns_loss,catalog_sales_profit] + WholeStageCodegen (9) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(cs_net_profit)),store_sales_profit,store_returns_loss,catalog_sales_profit,sum,sum,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_net_profit,sr_net_loss,cs_net_profit] [sum,sum,sum,sum,sum,sum] + Project [ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,cs_sold_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_net_loss,sr_returned_date_sk] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..07f3e28387 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_iceberg_compat/explain.txt @@ -0,0 +1,279 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet spark_catalog.default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet spark_catalog.default.date_dim (23) + : : +- ReusedExchange (30) + : +- BroadcastExchange (36) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet spark_catalog.default.store (33) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet spark_catalog.default.item (39) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#11)] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(7) BroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] +Right keys [3]: [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_net_loss#10, sr_returned_date_sk#11] + +(10) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] + +(12) Filter [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#8, sr_item_sk#7] +Right keys [2]: [cs_bill_customer_sk#12, cs_item_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_net_loss#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_net_profit#14, cs_sold_date_sk#15] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 4)) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#16] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(20) BroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15] +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#16] + +(23) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] + +(25) Filter [codegen id : 4] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : (((((isnotnull(d_moy#21) AND isnotnull(d_year#20)) AND (d_moy#21 >= 4)) AND (d_moy#21 <= 10)) AND (d_year#20 = 2001)) AND isnotnull(d_date_sk#19)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#19] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] + +(27) BroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#11] +Right keys [1]: [d_date_sk#19] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, sr_returned_date_sk#11, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#19] + +(30) ReusedExchange [Reuses operator id: 27] +Output [1]: [d_date_sk#22] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 8] +Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, cs_sold_date_sk#15, d_date_sk#22] + +(33) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] + +(35) Filter [codegen id : 6] +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Condition : isnotnull(s_store_sk#23) + +(36) BroadcastExchange +Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(37) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#23] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 8] +Output [6]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_sk#23, s_store_id#24, s_store_name#25] + +(39) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] + +(41) Filter [codegen id : 7] +Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Condition : isnotnull(i_item_sk#26) + +(42) BroadcastExchange +Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(43) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 8] +Output [7]: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] +Input [9]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_sk#26, i_item_id#27, i_item_desc#28] + +(45) HashAggregate [codegen id : 8] +Input [7]: [ss_net_profit#5, sr_net_loss#10, cs_net_profit#14, s_store_id#24, s_store_name#25, i_item_id#27, i_item_desc#28] +Keys [4]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#10)), partial_sum(UnscaledValue(cs_net_profit#14))] +Aggregate Attributes [3]: [sum#29, sum#30, sum#31] +Results [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#32, sum#33, sum#34] + +(46) Exchange +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(47) HashAggregate [codegen id : 9] +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, sum#32, sum#33, sum#34] +Keys [4]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25] +Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#10)), sum(UnscaledValue(cs_net_profit#14))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#5))#35, sum(UnscaledValue(sr_net_loss#10))#36, sum(UnscaledValue(cs_net_profit#14))#37] +Results [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#35,17,2) AS store_sales_profit#38, MakeDecimal(sum(UnscaledValue(sr_net_loss#10))#36,17,2) AS store_returns_loss#39, MakeDecimal(sum(UnscaledValue(cs_net_profit#14))#37,17,2) AS catalog_sales_profit#40] + +(48) TakeOrderedAndProject +Input [7]: [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#38, store_returns_loss#39, catalog_sales_profit#40] +Arguments: 100, [i_item_id#27 ASC NULLS FIRST, i_item_desc#28 ASC NULLS FIRST, s_store_id#24 ASC NULLS FIRST, s_store_name#25 ASC NULLS FIRST], [i_item_id#27, i_item_desc#28, s_store_id#24, s_store_name#25, store_sales_profit#38, store_returns_loss#39, catalog_sales_profit#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..7e012d5563 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.native_iceberg_compat/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_profit,store_returns_loss,catalog_sales_profit] + WholeStageCodegen (9) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(cs_net_profit)),store_sales_profit,store_returns_loss,catalog_sales_profit,sum,sum,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_net_profit,sr_net_loss,cs_net_profit] [sum,sum,sum,sum,sum,sum] + Project [ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit,cs_sold_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_net_loss,sr_returned_date_sk,cs_net_profit,cs_sold_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_net_loss,sr_returned_date_sk] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_datafusion/explain.txt new file mode 100644 index 0000000000..718aa301fe --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_datafusion/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.item (18) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet spark_catalog.default.promotion (24) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] + +(3) Filter [codegen id : 5] +Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Condition : ((isnotnull(cs_bill_cdemo_sk#1) AND isnotnull(cs_item_sk#2)) AND isnotnull(cs_promo_sk#3)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_bill_cdemo_sk#1] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Input [9]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, cd_demo_sk#9] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7] +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#13] + +(18) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_id#16] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) + +(21) BroadcastExchange +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_sk#15, i_item_id#16] + +(24) Scan parquet spark_catalog.default.promotion +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(26) Filter [codegen id : 4] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) + +(27) Project [codegen id : 4] +Output [1]: [p_promo_sk#17] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(28) BroadcastExchange +Input [1]: [p_promo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_promo_sk#3] +Right keys [1]: [p_promo_sk#17] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Input [7]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16, p_promo_sk#17] + +(31) HashAggregate [codegen id : 5] +Input [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] +Functions [4]: [partial_avg(cs_quantity#4), partial_avg(UnscaledValue(cs_list_price#5)), partial_avg(UnscaledValue(cs_coupon_amt#7)), partial_avg(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] + +(32) Exchange +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#16] +Functions [4]: [avg(cs_quantity#4), avg(UnscaledValue(cs_list_price#5)), avg(UnscaledValue(cs_coupon_amt#7)), avg(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [4]: [avg(cs_quantity#4)#36, avg(UnscaledValue(cs_list_price#5))#37, avg(UnscaledValue(cs_coupon_amt#7))#38, avg(UnscaledValue(cs_sales_price#6))#39] +Results [5]: [i_item_id#16, avg(cs_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(cs_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(cs_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(cs_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_datafusion/simplified.txt new file mode 100644 index 0000000000..04314c0c07 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_datafusion/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + WholeStageCodegen (6) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(cs_quantity),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_sales_price)),agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..718aa301fe --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_iceberg_compat/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.item (18) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet spark_catalog.default.promotion (24) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] + +(3) Filter [codegen id : 5] +Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Condition : ((isnotnull(cs_bill_cdemo_sk#1) AND isnotnull(cs_item_sk#2)) AND isnotnull(cs_promo_sk#3)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_bill_cdemo_sk#1] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] +Input [9]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, cd_demo_sk#9] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7] +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#13] + +(18) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_id#16] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) + +(21) BroadcastExchange +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_sk#15, i_item_id#16] + +(24) Scan parquet spark_catalog.default.promotion +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(26) Filter [codegen id : 4] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) + +(27) Project [codegen id : 4] +Output [1]: [p_promo_sk#17] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(28) BroadcastExchange +Input [1]: [p_promo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_promo_sk#3] +Right keys [1]: [p_promo_sk#17] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Input [7]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16, p_promo_sk#17] + +(31) HashAggregate [codegen id : 5] +Input [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] +Functions [4]: [partial_avg(cs_quantity#4), partial_avg(UnscaledValue(cs_list_price#5)), partial_avg(UnscaledValue(cs_coupon_amt#7)), partial_avg(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] + +(32) Exchange +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#16] +Functions [4]: [avg(cs_quantity#4), avg(UnscaledValue(cs_list_price#5)), avg(UnscaledValue(cs_coupon_amt#7)), avg(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [4]: [avg(cs_quantity#4)#36, avg(UnscaledValue(cs_list_price#5))#37, avg(UnscaledValue(cs_coupon_amt#7))#38, avg(UnscaledValue(cs_sales_price#6))#39] +Results [5]: [i_item_id#16, avg(cs_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(cs_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(cs_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(cs_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..04314c0c07 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.native_iceberg_compat/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + WholeStageCodegen (6) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(cs_quantity),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_sales_price)),agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_datafusion/explain.txt new file mode 100644 index 0000000000..9f3ff1c51e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_datafusion/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Expand (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.store (18) + +- BroadcastExchange (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet spark_catalog.default.item (24) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] + +(18) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#15, s_state#16] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#15, s_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] + +(24) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(27) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] + +(30) Expand [codegen id : 5] +Input [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Arguments: [[ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16, 0], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, null, 1], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, null, null, 3]], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] +Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [8]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28, count#29] +Results [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] + +(32) Exchange +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] +Arguments: hashpartitioning(i_item_id#19, s_state#20, spark_grouping_id#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 6] +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] +Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [4]: [avg(ss_quantity#4)#38, avg(UnscaledValue(ss_list_price#5))#39, avg(UnscaledValue(ss_coupon_amt#7))#40, avg(UnscaledValue(ss_sales_price#6))#41] +Results [7]: [i_item_id#19, s_state#20, cast((shiftright(spark_grouping_id#21, 0) & 1) as tinyint) AS g_state#42, avg(ss_quantity#4)#38 AS agg1#43, cast((avg(UnscaledValue(ss_list_price#5))#39 / 100.0) as decimal(11,6)) AS agg2#44, cast((avg(UnscaledValue(ss_coupon_amt#7))#40 / 100.0) as decimal(11,6)) AS agg3#45, cast((avg(UnscaledValue(ss_sales_price#6))#41 / 100.0) as decimal(11,6)) AS agg4#46] + +(34) TakeOrderedAndProject +Input [7]: [i_item_id#19, s_state#20, g_state#42, agg1#43, agg2#44, agg3#45, agg4#46] +Arguments: 100, [i_item_id#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST], [i_item_id#19, s_state#20, g_state#42, agg1#43, agg2#44, agg3#45, agg4#46] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_datafusion/simplified.txt new file mode 100644 index 0000000000..d61e190b9f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_datafusion/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + WholeStageCodegen (6) + HashAggregate [i_item_id,s_state,spark_grouping_id,sum,count,sum,count,sum,count,sum,count] [avg(ss_quantity),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,s_state,spark_grouping_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,s_state,spark_grouping_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Expand [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] + Project [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..9f3ff1c51e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_iceberg_compat/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Expand (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.store (18) + +- BroadcastExchange (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet spark_catalog.default.item (24) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] + +(18) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#15, s_state#16] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#15, s_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] + +(24) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(27) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] + +(30) Expand [codegen id : 5] +Input [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Arguments: [[ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16, 0], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, null, 1], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, null, null, 3]], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] +Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [8]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28, count#29] +Results [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] + +(32) Exchange +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] +Arguments: hashpartitioning(i_item_id#19, s_state#20, spark_grouping_id#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 6] +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] +Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [4]: [avg(ss_quantity#4)#38, avg(UnscaledValue(ss_list_price#5))#39, avg(UnscaledValue(ss_coupon_amt#7))#40, avg(UnscaledValue(ss_sales_price#6))#41] +Results [7]: [i_item_id#19, s_state#20, cast((shiftright(spark_grouping_id#21, 0) & 1) as tinyint) AS g_state#42, avg(ss_quantity#4)#38 AS agg1#43, cast((avg(UnscaledValue(ss_list_price#5))#39 / 100.0) as decimal(11,6)) AS agg2#44, cast((avg(UnscaledValue(ss_coupon_amt#7))#40 / 100.0) as decimal(11,6)) AS agg3#45, cast((avg(UnscaledValue(ss_sales_price#6))#41 / 100.0) as decimal(11,6)) AS agg4#46] + +(34) TakeOrderedAndProject +Input [7]: [i_item_id#19, s_state#20, g_state#42, agg1#43, agg2#44, agg3#45, agg4#46] +Arguments: 100, [i_item_id#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST], [i_item_id#19, s_state#20, g_state#42, agg1#43, agg2#44, agg3#45, agg4#46] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..d61e190b9f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.native_iceberg_compat/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + WholeStageCodegen (6) + HashAggregate [i_item_id,s_state,spark_grouping_id,sum,count,sum,count,sum,count,sum,count] [avg(ss_quantity),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,s_state,spark_grouping_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,s_state,spark_grouping_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Expand [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] + Project [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_datafusion/explain.txt new file mode 100644 index 0000000000..bfc684b148 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_datafusion/explain.txt @@ -0,0 +1,437 @@ +== Physical Plan == +* BroadcastNestedLoopJoin Inner BuildRight (70) +:- * BroadcastNestedLoopJoin Inner BuildRight (58) +: :- * BroadcastNestedLoopJoin Inner BuildRight (46) +: : :- * BroadcastNestedLoopJoin Inner BuildRight (34) +: : : :- * BroadcastNestedLoopJoin Inner BuildRight (22) +: : : : :- * HashAggregate (10) +: : : : : +- Exchange (9) +: : : : : +- * HashAggregate (8) +: : : : : +- * HashAggregate (7) +: : : : : +- Exchange (6) +: : : : : +- * HashAggregate (5) +: : : : : +- * Project (4) +: : : : : +- * Filter (3) +: : : : : +- * ColumnarToRow (2) +: : : : : +- Scan parquet spark_catalog.default.store_sales (1) +: : : : +- BroadcastExchange (21) +: : : : +- * HashAggregate (20) +: : : : +- Exchange (19) +: : : : +- * HashAggregate (18) +: : : : +- * HashAggregate (17) +: : : : +- Exchange (16) +: : : : +- * HashAggregate (15) +: : : : +- * Project (14) +: : : : +- * Filter (13) +: : : : +- * ColumnarToRow (12) +: : : : +- Scan parquet spark_catalog.default.store_sales (11) +: : : +- BroadcastExchange (33) +: : : +- * HashAggregate (32) +: : : +- Exchange (31) +: : : +- * HashAggregate (30) +: : : +- * HashAggregate (29) +: : : +- Exchange (28) +: : : +- * HashAggregate (27) +: : : +- * Project (26) +: : : +- * Filter (25) +: : : +- * ColumnarToRow (24) +: : : +- Scan parquet spark_catalog.default.store_sales (23) +: : +- BroadcastExchange (45) +: : +- * HashAggregate (44) +: : +- Exchange (43) +: : +- * HashAggregate (42) +: : +- * HashAggregate (41) +: : +- Exchange (40) +: : +- * HashAggregate (39) +: : +- * Project (38) +: : +- * Filter (37) +: : +- * ColumnarToRow (36) +: : +- Scan parquet spark_catalog.default.store_sales (35) +: +- BroadcastExchange (57) +: +- * HashAggregate (56) +: +- Exchange (55) +: +- * HashAggregate (54) +: +- * HashAggregate (53) +: +- Exchange (52) +: +- * HashAggregate (51) +: +- * Project (50) +: +- * Filter (49) +: +- * ColumnarToRow (48) +: +- Scan parquet spark_catalog.default.store_sales (47) ++- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * Project (62) + +- * Filter (61) + +- * ColumnarToRow (60) + +- Scan parquet spark_catalog.default.store_sales (59) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5), Or(Or(And(GreaterThanOrEqual(ss_list_price,8.00),LessThanOrEqual(ss_list_price,18.00)),And(GreaterThanOrEqual(ss_coupon_amt,459.00),LessThanOrEqual(ss_coupon_amt,1459.00))),And(GreaterThanOrEqual(ss_wholesale_cost,57.00),LessThanOrEqual(ss_wholesale_cost,77.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 1] +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (ss_list_price#3 <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (ss_coupon_amt#4 <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (ss_wholesale_cost#2 <= 77.00)))) + +(4) Project [codegen id : 1] +Output [1]: [ss_list_price#3] +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] + +(5) HashAggregate [codegen id : 1] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7] +Results [4]: [ss_list_price#3, sum#8, count#9, count#10] + +(6) Exchange +Input [4]: [ss_list_price#3, sum#8, count#9, count#10] +Arguments: hashpartitioning(ss_list_price#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(7) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#8, count#9, count#10] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7] +Results [4]: [ss_list_price#3, sum#8, count#9, count#10] + +(8) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#8, count#9, count#10] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11] +Results [4]: [sum#8, count#9, count#10, count#12] + +(9) Exchange +Input [4]: [sum#8, count#9, count#10, count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(10) HashAggregate [codegen id : 18] +Input [4]: [sum#8, count#9, count#10, count#12] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#6 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#7 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15] + +(11) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10), Or(Or(And(GreaterThanOrEqual(ss_list_price,90.00),LessThanOrEqual(ss_list_price,100.00)),And(GreaterThanOrEqual(ss_coupon_amt,2323.00),LessThanOrEqual(ss_coupon_amt,3323.00))),And(GreaterThanOrEqual(ss_wholesale_cost,31.00),LessThanOrEqual(ss_wholesale_cost,51.00)))] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] + +(13) Filter [codegen id : 3] +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Condition : (((isnotnull(ss_quantity#16) AND (ss_quantity#16 >= 6)) AND (ss_quantity#16 <= 10)) AND ((((ss_list_price#18 >= 90.00) AND (ss_list_price#18 <= 100.00)) OR ((ss_coupon_amt#19 >= 2323.00) AND (ss_coupon_amt#19 <= 3323.00))) OR ((ss_wholesale_cost#17 >= 31.00) AND (ss_wholesale_cost#17 <= 51.00)))) + +(14) Project [codegen id : 3] +Output [1]: [ss_list_price#18] +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] + +(15) HashAggregate [codegen id : 3] +Input [1]: [ss_list_price#18] +Keys [1]: [ss_list_price#18] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#18)), partial_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22] +Results [4]: [ss_list_price#18, sum#23, count#24, count#25] + +(16) Exchange +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Arguments: hashpartitioning(ss_list_price#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(17) HashAggregate [codegen id : 4] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Keys [1]: [ss_list_price#18] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22] +Results [4]: [ss_list_price#18, sum#23, count#24, count#25] + +(18) HashAggregate [codegen id : 4] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18), partial_count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22, count(ss_list_price#18)#26] +Results [4]: [sum#23, count#24, count#25, count#27] + +(19) Exchange +Input [4]: [sum#23, count#24, count#25, count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(20) HashAggregate [codegen id : 5] +Input [4]: [sum#23, count#24, count#25, count#27] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#18)), count(ss_list_price#18), count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22, count(ss_list_price#18)#26] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#18))#21 / 100.0) as decimal(11,6)) AS B2_LP#28, count(ss_list_price#18)#22 AS B2_CNT#29, count(ss_list_price#18)#26 AS B2_CNTD#30] + +(21) BroadcastExchange +Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] +Arguments: IdentityBroadcastMode, [plan_id=5] + +(22) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(23) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15), Or(Or(And(GreaterThanOrEqual(ss_list_price,142.00),LessThanOrEqual(ss_list_price,152.00)),And(GreaterThanOrEqual(ss_coupon_amt,12214.00),LessThanOrEqual(ss_coupon_amt,13214.00))),And(GreaterThanOrEqual(ss_wholesale_cost,79.00),LessThanOrEqual(ss_wholesale_cost,99.00)))] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 6] +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] + +(25) Filter [codegen id : 6] +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Condition : (((isnotnull(ss_quantity#31) AND (ss_quantity#31 >= 11)) AND (ss_quantity#31 <= 15)) AND ((((ss_list_price#33 >= 142.00) AND (ss_list_price#33 <= 152.00)) OR ((ss_coupon_amt#34 >= 12214.00) AND (ss_coupon_amt#34 <= 13214.00))) OR ((ss_wholesale_cost#32 >= 79.00) AND (ss_wholesale_cost#32 <= 99.00)))) + +(26) Project [codegen id : 6] +Output [1]: [ss_list_price#33] +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] + +(27) HashAggregate [codegen id : 6] +Input [1]: [ss_list_price#33] +Keys [1]: [ss_list_price#33] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#33)), partial_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37] +Results [4]: [ss_list_price#33, sum#38, count#39, count#40] + +(28) Exchange +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Arguments: hashpartitioning(ss_list_price#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(29) HashAggregate [codegen id : 7] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Keys [1]: [ss_list_price#33] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37] +Results [4]: [ss_list_price#33, sum#38, count#39, count#40] + +(30) HashAggregate [codegen id : 7] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33), partial_count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37, count(ss_list_price#33)#41] +Results [4]: [sum#38, count#39, count#40, count#42] + +(31) Exchange +Input [4]: [sum#38, count#39, count#40, count#42] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(32) HashAggregate [codegen id : 8] +Input [4]: [sum#38, count#39, count#40, count#42] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#33)), count(ss_list_price#33), count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37, count(ss_list_price#33)#41] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#33))#36 / 100.0) as decimal(11,6)) AS B3_LP#43, count(ss_list_price#33)#37 AS B3_CNT#44, count(ss_list_price#33)#41 AS B3_CNTD#45] + +(33) BroadcastExchange +Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] +Arguments: IdentityBroadcastMode, [plan_id=8] + +(34) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(35) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20), Or(Or(And(GreaterThanOrEqual(ss_list_price,135.00),LessThanOrEqual(ss_list_price,145.00)),And(GreaterThanOrEqual(ss_coupon_amt,6071.00),LessThanOrEqual(ss_coupon_amt,7071.00))),And(GreaterThanOrEqual(ss_wholesale_cost,38.00),LessThanOrEqual(ss_wholesale_cost,58.00)))] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 9] +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] + +(37) Filter [codegen id : 9] +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Condition : (((isnotnull(ss_quantity#46) AND (ss_quantity#46 >= 16)) AND (ss_quantity#46 <= 20)) AND ((((ss_list_price#48 >= 135.00) AND (ss_list_price#48 <= 145.00)) OR ((ss_coupon_amt#49 >= 6071.00) AND (ss_coupon_amt#49 <= 7071.00))) OR ((ss_wholesale_cost#47 >= 38.00) AND (ss_wholesale_cost#47 <= 58.00)))) + +(38) Project [codegen id : 9] +Output [1]: [ss_list_price#48] +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] + +(39) HashAggregate [codegen id : 9] +Input [1]: [ss_list_price#48] +Keys [1]: [ss_list_price#48] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#48)), partial_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52] +Results [4]: [ss_list_price#48, sum#53, count#54, count#55] + +(40) Exchange +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Arguments: hashpartitioning(ss_list_price#48, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(41) HashAggregate [codegen id : 10] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Keys [1]: [ss_list_price#48] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52] +Results [4]: [ss_list_price#48, sum#53, count#54, count#55] + +(42) HashAggregate [codegen id : 10] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48), partial_count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52, count(ss_list_price#48)#56] +Results [4]: [sum#53, count#54, count#55, count#57] + +(43) Exchange +Input [4]: [sum#53, count#54, count#55, count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(44) HashAggregate [codegen id : 11] +Input [4]: [sum#53, count#54, count#55, count#57] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#48)), count(ss_list_price#48), count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52, count(ss_list_price#48)#56] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#48))#51 / 100.0) as decimal(11,6)) AS B4_LP#58, count(ss_list_price#48)#52 AS B4_CNT#59, count(ss_list_price#48)#56 AS B4_CNTD#60] + +(45) BroadcastExchange +Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] +Arguments: IdentityBroadcastMode, [plan_id=11] + +(46) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(47) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25), Or(Or(And(GreaterThanOrEqual(ss_list_price,122.00),LessThanOrEqual(ss_list_price,132.00)),And(GreaterThanOrEqual(ss_coupon_amt,836.00),LessThanOrEqual(ss_coupon_amt,1836.00))),And(GreaterThanOrEqual(ss_wholesale_cost,17.00),LessThanOrEqual(ss_wholesale_cost,37.00)))] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 12] +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] + +(49) Filter [codegen id : 12] +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Condition : (((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 21)) AND (ss_quantity#61 <= 25)) AND ((((ss_list_price#63 >= 122.00) AND (ss_list_price#63 <= 132.00)) OR ((ss_coupon_amt#64 >= 836.00) AND (ss_coupon_amt#64 <= 1836.00))) OR ((ss_wholesale_cost#62 >= 17.00) AND (ss_wholesale_cost#62 <= 37.00)))) + +(50) Project [codegen id : 12] +Output [1]: [ss_list_price#63] +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] + +(51) HashAggregate [codegen id : 12] +Input [1]: [ss_list_price#63] +Keys [1]: [ss_list_price#63] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#63)), partial_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67] +Results [4]: [ss_list_price#63, sum#68, count#69, count#70] + +(52) Exchange +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Arguments: hashpartitioning(ss_list_price#63, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(53) HashAggregate [codegen id : 13] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Keys [1]: [ss_list_price#63] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67] +Results [4]: [ss_list_price#63, sum#68, count#69, count#70] + +(54) HashAggregate [codegen id : 13] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63), partial_count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67, count(ss_list_price#63)#71] +Results [4]: [sum#68, count#69, count#70, count#72] + +(55) Exchange +Input [4]: [sum#68, count#69, count#70, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] + +(56) HashAggregate [codegen id : 14] +Input [4]: [sum#68, count#69, count#70, count#72] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#63)), count(ss_list_price#63), count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67, count(ss_list_price#63)#71] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#63))#66 / 100.0) as decimal(11,6)) AS B5_LP#73, count(ss_list_price#63)#67 AS B5_CNT#74, count(ss_list_price#63)#71 AS B5_CNTD#75] + +(57) BroadcastExchange +Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] +Arguments: IdentityBroadcastMode, [plan_id=14] + +(58) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(59) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30), Or(Or(And(GreaterThanOrEqual(ss_list_price,154.00),LessThanOrEqual(ss_list_price,164.00)),And(GreaterThanOrEqual(ss_coupon_amt,7326.00),LessThanOrEqual(ss_coupon_amt,8326.00))),And(GreaterThanOrEqual(ss_wholesale_cost,7.00),LessThanOrEqual(ss_wholesale_cost,27.00)))] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 15] +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] + +(61) Filter [codegen id : 15] +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Condition : (((isnotnull(ss_quantity#76) AND (ss_quantity#76 >= 26)) AND (ss_quantity#76 <= 30)) AND ((((ss_list_price#78 >= 154.00) AND (ss_list_price#78 <= 164.00)) OR ((ss_coupon_amt#79 >= 7326.00) AND (ss_coupon_amt#79 <= 8326.00))) OR ((ss_wholesale_cost#77 >= 7.00) AND (ss_wholesale_cost#77 <= 27.00)))) + +(62) Project [codegen id : 15] +Output [1]: [ss_list_price#78] +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] + +(63) HashAggregate [codegen id : 15] +Input [1]: [ss_list_price#78] +Keys [1]: [ss_list_price#78] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#78)), partial_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82] +Results [4]: [ss_list_price#78, sum#83, count#84, count#85] + +(64) Exchange +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Arguments: hashpartitioning(ss_list_price#78, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(65) HashAggregate [codegen id : 16] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Keys [1]: [ss_list_price#78] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82] +Results [4]: [ss_list_price#78, sum#83, count#84, count#85] + +(66) HashAggregate [codegen id : 16] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78), partial_count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82, count(ss_list_price#78)#86] +Results [4]: [sum#83, count#84, count#85, count#87] + +(67) Exchange +Input [4]: [sum#83, count#84, count#85, count#87] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] + +(68) HashAggregate [codegen id : 17] +Input [4]: [sum#83, count#84, count#85, count#87] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#78)), count(ss_list_price#78), count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82, count(ss_list_price#78)#86] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#78))#81 / 100.0) as decimal(11,6)) AS B6_LP#88, count(ss_list_price#78)#82 AS B6_CNT#89, count(ss_list_price#78)#86 AS B6_CNTD#90] + +(69) BroadcastExchange +Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] +Arguments: IdentityBroadcastMode, [plan_id=17] + +(70) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_datafusion/simplified.txt new file mode 100644 index 0000000000..25f9d020f5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_datafusion/simplified.txt @@ -0,0 +1,111 @@ +WholeStageCodegen (18) + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #2 + WholeStageCodegen (1) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (5) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] + InputAdapter + Exchange #4 + WholeStageCodegen (4) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #5 + WholeStageCodegen (3) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] + InputAdapter + Exchange #7 + WholeStageCodegen (7) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #8 + WholeStageCodegen (6) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (11) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] + InputAdapter + Exchange #10 + WholeStageCodegen (10) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #11 + WholeStageCodegen (9) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (14) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] + InputAdapter + Exchange #13 + WholeStageCodegen (13) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #14 + WholeStageCodegen (12) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (17) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + InputAdapter + Exchange #16 + WholeStageCodegen (16) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #17 + WholeStageCodegen (15) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..bfc684b148 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_iceberg_compat/explain.txt @@ -0,0 +1,437 @@ +== Physical Plan == +* BroadcastNestedLoopJoin Inner BuildRight (70) +:- * BroadcastNestedLoopJoin Inner BuildRight (58) +: :- * BroadcastNestedLoopJoin Inner BuildRight (46) +: : :- * BroadcastNestedLoopJoin Inner BuildRight (34) +: : : :- * BroadcastNestedLoopJoin Inner BuildRight (22) +: : : : :- * HashAggregate (10) +: : : : : +- Exchange (9) +: : : : : +- * HashAggregate (8) +: : : : : +- * HashAggregate (7) +: : : : : +- Exchange (6) +: : : : : +- * HashAggregate (5) +: : : : : +- * Project (4) +: : : : : +- * Filter (3) +: : : : : +- * ColumnarToRow (2) +: : : : : +- Scan parquet spark_catalog.default.store_sales (1) +: : : : +- BroadcastExchange (21) +: : : : +- * HashAggregate (20) +: : : : +- Exchange (19) +: : : : +- * HashAggregate (18) +: : : : +- * HashAggregate (17) +: : : : +- Exchange (16) +: : : : +- * HashAggregate (15) +: : : : +- * Project (14) +: : : : +- * Filter (13) +: : : : +- * ColumnarToRow (12) +: : : : +- Scan parquet spark_catalog.default.store_sales (11) +: : : +- BroadcastExchange (33) +: : : +- * HashAggregate (32) +: : : +- Exchange (31) +: : : +- * HashAggregate (30) +: : : +- * HashAggregate (29) +: : : +- Exchange (28) +: : : +- * HashAggregate (27) +: : : +- * Project (26) +: : : +- * Filter (25) +: : : +- * ColumnarToRow (24) +: : : +- Scan parquet spark_catalog.default.store_sales (23) +: : +- BroadcastExchange (45) +: : +- * HashAggregate (44) +: : +- Exchange (43) +: : +- * HashAggregate (42) +: : +- * HashAggregate (41) +: : +- Exchange (40) +: : +- * HashAggregate (39) +: : +- * Project (38) +: : +- * Filter (37) +: : +- * ColumnarToRow (36) +: : +- Scan parquet spark_catalog.default.store_sales (35) +: +- BroadcastExchange (57) +: +- * HashAggregate (56) +: +- Exchange (55) +: +- * HashAggregate (54) +: +- * HashAggregate (53) +: +- Exchange (52) +: +- * HashAggregate (51) +: +- * Project (50) +: +- * Filter (49) +: +- * ColumnarToRow (48) +: +- Scan parquet spark_catalog.default.store_sales (47) ++- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * Project (62) + +- * Filter (61) + +- * ColumnarToRow (60) + +- Scan parquet spark_catalog.default.store_sales (59) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5), Or(Or(And(GreaterThanOrEqual(ss_list_price,8.00),LessThanOrEqual(ss_list_price,18.00)),And(GreaterThanOrEqual(ss_coupon_amt,459.00),LessThanOrEqual(ss_coupon_amt,1459.00))),And(GreaterThanOrEqual(ss_wholesale_cost,57.00),LessThanOrEqual(ss_wholesale_cost,77.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 1] +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (ss_list_price#3 <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (ss_coupon_amt#4 <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (ss_wholesale_cost#2 <= 77.00)))) + +(4) Project [codegen id : 1] +Output [1]: [ss_list_price#3] +Input [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5] + +(5) HashAggregate [codegen id : 1] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7] +Results [4]: [ss_list_price#3, sum#8, count#9, count#10] + +(6) Exchange +Input [4]: [ss_list_price#3, sum#8, count#9, count#10] +Arguments: hashpartitioning(ss_list_price#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(7) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#8, count#9, count#10] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7] +Results [4]: [ss_list_price#3, sum#8, count#9, count#10] + +(8) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#8, count#9, count#10] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11] +Results [4]: [sum#8, count#9, count#10, count#12] + +(9) Exchange +Input [4]: [sum#8, count#9, count#10, count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(10) HashAggregate [codegen id : 18] +Input [4]: [sum#8, count#9, count#10, count#12] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#6 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#7 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15] + +(11) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10), Or(Or(And(GreaterThanOrEqual(ss_list_price,90.00),LessThanOrEqual(ss_list_price,100.00)),And(GreaterThanOrEqual(ss_coupon_amt,2323.00),LessThanOrEqual(ss_coupon_amt,3323.00))),And(GreaterThanOrEqual(ss_wholesale_cost,31.00),LessThanOrEqual(ss_wholesale_cost,51.00)))] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] + +(13) Filter [codegen id : 3] +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Condition : (((isnotnull(ss_quantity#16) AND (ss_quantity#16 >= 6)) AND (ss_quantity#16 <= 10)) AND ((((ss_list_price#18 >= 90.00) AND (ss_list_price#18 <= 100.00)) OR ((ss_coupon_amt#19 >= 2323.00) AND (ss_coupon_amt#19 <= 3323.00))) OR ((ss_wholesale_cost#17 >= 31.00) AND (ss_wholesale_cost#17 <= 51.00)))) + +(14) Project [codegen id : 3] +Output [1]: [ss_list_price#18] +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] + +(15) HashAggregate [codegen id : 3] +Input [1]: [ss_list_price#18] +Keys [1]: [ss_list_price#18] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#18)), partial_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22] +Results [4]: [ss_list_price#18, sum#23, count#24, count#25] + +(16) Exchange +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Arguments: hashpartitioning(ss_list_price#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(17) HashAggregate [codegen id : 4] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Keys [1]: [ss_list_price#18] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22] +Results [4]: [ss_list_price#18, sum#23, count#24, count#25] + +(18) HashAggregate [codegen id : 4] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18), partial_count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22, count(ss_list_price#18)#26] +Results [4]: [sum#23, count#24, count#25, count#27] + +(19) Exchange +Input [4]: [sum#23, count#24, count#25, count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(20) HashAggregate [codegen id : 5] +Input [4]: [sum#23, count#24, count#25, count#27] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#18)), count(ss_list_price#18), count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22, count(ss_list_price#18)#26] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#18))#21 / 100.0) as decimal(11,6)) AS B2_LP#28, count(ss_list_price#18)#22 AS B2_CNT#29, count(ss_list_price#18)#26 AS B2_CNTD#30] + +(21) BroadcastExchange +Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] +Arguments: IdentityBroadcastMode, [plan_id=5] + +(22) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(23) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15), Or(Or(And(GreaterThanOrEqual(ss_list_price,142.00),LessThanOrEqual(ss_list_price,152.00)),And(GreaterThanOrEqual(ss_coupon_amt,12214.00),LessThanOrEqual(ss_coupon_amt,13214.00))),And(GreaterThanOrEqual(ss_wholesale_cost,79.00),LessThanOrEqual(ss_wholesale_cost,99.00)))] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 6] +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] + +(25) Filter [codegen id : 6] +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Condition : (((isnotnull(ss_quantity#31) AND (ss_quantity#31 >= 11)) AND (ss_quantity#31 <= 15)) AND ((((ss_list_price#33 >= 142.00) AND (ss_list_price#33 <= 152.00)) OR ((ss_coupon_amt#34 >= 12214.00) AND (ss_coupon_amt#34 <= 13214.00))) OR ((ss_wholesale_cost#32 >= 79.00) AND (ss_wholesale_cost#32 <= 99.00)))) + +(26) Project [codegen id : 6] +Output [1]: [ss_list_price#33] +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] + +(27) HashAggregate [codegen id : 6] +Input [1]: [ss_list_price#33] +Keys [1]: [ss_list_price#33] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#33)), partial_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37] +Results [4]: [ss_list_price#33, sum#38, count#39, count#40] + +(28) Exchange +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Arguments: hashpartitioning(ss_list_price#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(29) HashAggregate [codegen id : 7] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Keys [1]: [ss_list_price#33] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37] +Results [4]: [ss_list_price#33, sum#38, count#39, count#40] + +(30) HashAggregate [codegen id : 7] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33), partial_count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37, count(ss_list_price#33)#41] +Results [4]: [sum#38, count#39, count#40, count#42] + +(31) Exchange +Input [4]: [sum#38, count#39, count#40, count#42] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(32) HashAggregate [codegen id : 8] +Input [4]: [sum#38, count#39, count#40, count#42] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#33)), count(ss_list_price#33), count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37, count(ss_list_price#33)#41] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#33))#36 / 100.0) as decimal(11,6)) AS B3_LP#43, count(ss_list_price#33)#37 AS B3_CNT#44, count(ss_list_price#33)#41 AS B3_CNTD#45] + +(33) BroadcastExchange +Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] +Arguments: IdentityBroadcastMode, [plan_id=8] + +(34) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(35) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20), Or(Or(And(GreaterThanOrEqual(ss_list_price,135.00),LessThanOrEqual(ss_list_price,145.00)),And(GreaterThanOrEqual(ss_coupon_amt,6071.00),LessThanOrEqual(ss_coupon_amt,7071.00))),And(GreaterThanOrEqual(ss_wholesale_cost,38.00),LessThanOrEqual(ss_wholesale_cost,58.00)))] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 9] +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] + +(37) Filter [codegen id : 9] +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Condition : (((isnotnull(ss_quantity#46) AND (ss_quantity#46 >= 16)) AND (ss_quantity#46 <= 20)) AND ((((ss_list_price#48 >= 135.00) AND (ss_list_price#48 <= 145.00)) OR ((ss_coupon_amt#49 >= 6071.00) AND (ss_coupon_amt#49 <= 7071.00))) OR ((ss_wholesale_cost#47 >= 38.00) AND (ss_wholesale_cost#47 <= 58.00)))) + +(38) Project [codegen id : 9] +Output [1]: [ss_list_price#48] +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] + +(39) HashAggregate [codegen id : 9] +Input [1]: [ss_list_price#48] +Keys [1]: [ss_list_price#48] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#48)), partial_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52] +Results [4]: [ss_list_price#48, sum#53, count#54, count#55] + +(40) Exchange +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Arguments: hashpartitioning(ss_list_price#48, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(41) HashAggregate [codegen id : 10] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Keys [1]: [ss_list_price#48] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52] +Results [4]: [ss_list_price#48, sum#53, count#54, count#55] + +(42) HashAggregate [codegen id : 10] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48), partial_count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52, count(ss_list_price#48)#56] +Results [4]: [sum#53, count#54, count#55, count#57] + +(43) Exchange +Input [4]: [sum#53, count#54, count#55, count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(44) HashAggregate [codegen id : 11] +Input [4]: [sum#53, count#54, count#55, count#57] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#48)), count(ss_list_price#48), count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52, count(ss_list_price#48)#56] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#48))#51 / 100.0) as decimal(11,6)) AS B4_LP#58, count(ss_list_price#48)#52 AS B4_CNT#59, count(ss_list_price#48)#56 AS B4_CNTD#60] + +(45) BroadcastExchange +Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] +Arguments: IdentityBroadcastMode, [plan_id=11] + +(46) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(47) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25), Or(Or(And(GreaterThanOrEqual(ss_list_price,122.00),LessThanOrEqual(ss_list_price,132.00)),And(GreaterThanOrEqual(ss_coupon_amt,836.00),LessThanOrEqual(ss_coupon_amt,1836.00))),And(GreaterThanOrEqual(ss_wholesale_cost,17.00),LessThanOrEqual(ss_wholesale_cost,37.00)))] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 12] +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] + +(49) Filter [codegen id : 12] +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Condition : (((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 21)) AND (ss_quantity#61 <= 25)) AND ((((ss_list_price#63 >= 122.00) AND (ss_list_price#63 <= 132.00)) OR ((ss_coupon_amt#64 >= 836.00) AND (ss_coupon_amt#64 <= 1836.00))) OR ((ss_wholesale_cost#62 >= 17.00) AND (ss_wholesale_cost#62 <= 37.00)))) + +(50) Project [codegen id : 12] +Output [1]: [ss_list_price#63] +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] + +(51) HashAggregate [codegen id : 12] +Input [1]: [ss_list_price#63] +Keys [1]: [ss_list_price#63] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#63)), partial_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67] +Results [4]: [ss_list_price#63, sum#68, count#69, count#70] + +(52) Exchange +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Arguments: hashpartitioning(ss_list_price#63, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(53) HashAggregate [codegen id : 13] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Keys [1]: [ss_list_price#63] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67] +Results [4]: [ss_list_price#63, sum#68, count#69, count#70] + +(54) HashAggregate [codegen id : 13] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63), partial_count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67, count(ss_list_price#63)#71] +Results [4]: [sum#68, count#69, count#70, count#72] + +(55) Exchange +Input [4]: [sum#68, count#69, count#70, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] + +(56) HashAggregate [codegen id : 14] +Input [4]: [sum#68, count#69, count#70, count#72] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#63)), count(ss_list_price#63), count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67, count(ss_list_price#63)#71] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#63))#66 / 100.0) as decimal(11,6)) AS B5_LP#73, count(ss_list_price#63)#67 AS B5_CNT#74, count(ss_list_price#63)#71 AS B5_CNTD#75] + +(57) BroadcastExchange +Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] +Arguments: IdentityBroadcastMode, [plan_id=14] + +(58) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + +(59) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30), Or(Or(And(GreaterThanOrEqual(ss_list_price,154.00),LessThanOrEqual(ss_list_price,164.00)),And(GreaterThanOrEqual(ss_coupon_amt,7326.00),LessThanOrEqual(ss_coupon_amt,8326.00))),And(GreaterThanOrEqual(ss_wholesale_cost,7.00),LessThanOrEqual(ss_wholesale_cost,27.00)))] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 15] +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] + +(61) Filter [codegen id : 15] +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Condition : (((isnotnull(ss_quantity#76) AND (ss_quantity#76 >= 26)) AND (ss_quantity#76 <= 30)) AND ((((ss_list_price#78 >= 154.00) AND (ss_list_price#78 <= 164.00)) OR ((ss_coupon_amt#79 >= 7326.00) AND (ss_coupon_amt#79 <= 8326.00))) OR ((ss_wholesale_cost#77 >= 7.00) AND (ss_wholesale_cost#77 <= 27.00)))) + +(62) Project [codegen id : 15] +Output [1]: [ss_list_price#78] +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] + +(63) HashAggregate [codegen id : 15] +Input [1]: [ss_list_price#78] +Keys [1]: [ss_list_price#78] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#78)), partial_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82] +Results [4]: [ss_list_price#78, sum#83, count#84, count#85] + +(64) Exchange +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Arguments: hashpartitioning(ss_list_price#78, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(65) HashAggregate [codegen id : 16] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Keys [1]: [ss_list_price#78] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82] +Results [4]: [ss_list_price#78, sum#83, count#84, count#85] + +(66) HashAggregate [codegen id : 16] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78), partial_count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82, count(ss_list_price#78)#86] +Results [4]: [sum#83, count#84, count#85, count#87] + +(67) Exchange +Input [4]: [sum#83, count#84, count#85, count#87] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] + +(68) HashAggregate [codegen id : 17] +Input [4]: [sum#83, count#84, count#85, count#87] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#78)), count(ss_list_price#78), count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82, count(ss_list_price#78)#86] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#78))#81 / 100.0) as decimal(11,6)) AS B6_LP#88, count(ss_list_price#78)#82 AS B6_CNT#89, count(ss_list_price#78)#86 AS B6_CNTD#90] + +(69) BroadcastExchange +Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] +Arguments: IdentityBroadcastMode, [plan_id=17] + +(70) BroadcastNestedLoopJoin [codegen id : 18] +Join type: Inner +Join condition: None + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..25f9d020f5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.native_iceberg_compat/simplified.txt @@ -0,0 +1,111 @@ +WholeStageCodegen (18) + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #2 + WholeStageCodegen (1) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (5) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] + InputAdapter + Exchange #4 + WholeStageCodegen (4) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #5 + WholeStageCodegen (3) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] + InputAdapter + Exchange #7 + WholeStageCodegen (7) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #8 + WholeStageCodegen (6) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (11) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] + InputAdapter + Exchange #10 + WholeStageCodegen (10) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #11 + WholeStageCodegen (9) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (14) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] + InputAdapter + Exchange #13 + WholeStageCodegen (13) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #14 + WholeStageCodegen (12) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (17) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] + InputAdapter + Exchange #16 + WholeStageCodegen (16) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + InputAdapter + Exchange [ss_list_price] #17 + WholeStageCodegen (15) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] + Project [ss_list_price] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_datafusion/explain.txt new file mode 100644 index 0000000000..a94e9b7806 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_datafusion/explain.txt @@ -0,0 +1,302 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin Inner BuildRight (47) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet spark_catalog.default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet spark_catalog.default.date_dim (23) + : : +- BroadcastExchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet spark_catalog.default.date_dim (30) + : +- BroadcastExchange (40) + : +- * Filter (39) + : +- * ColumnarToRow (38) + : +- Scan parquet spark_catalog.default.store (37) + +- BroadcastExchange (46) + +- * Filter (45) + +- * ColumnarToRow (44) + +- Scan parquet spark_catalog.default.item (43) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#11)] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(7) BroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] +Right keys [3]: [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(10) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(12) Filter [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#8, sr_item_sk#7] +Right keys [2]: [cs_bill_customer_sk#12, cs_item_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 9)) AND (d_year#17 = 1999)) AND isnotnull(d_date_sk#16)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#16] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(20) BroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#16] + +(23) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] + +(25) Filter [codegen id : 4] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : (((((isnotnull(d_moy#21) AND isnotnull(d_year#20)) AND (d_moy#21 >= 9)) AND (d_moy#21 <= 12)) AND (d_year#20 = 1999)) AND isnotnull(d_date_sk#19)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#19] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] + +(27) BroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#11] +Right keys [1]: [d_date_sk#19] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#19] + +(30) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#22, d_year#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] +Condition : (d_year#23 IN (1999,2000,2001) AND isnotnull(d_date_sk#22)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#22] +Input [2]: [d_date_sk#22, d_year#23] + +(34) BroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#22] + +(37) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] + +(39) Filter [codegen id : 6] +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Condition : isnotnull(s_store_sk#24) + +(40) BroadcastExchange +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#24] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 8] +Output [6]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_sk#24, s_store_id#25, s_store_name#26] + +(43) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] + +(45) Filter [codegen id : 7] +Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Condition : isnotnull(i_item_sk#27) + +(46) BroadcastExchange +Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#27] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 8] +Output [7]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] +Input [9]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_sk#27, i_item_id#28, i_item_desc#29] + +(49) HashAggregate [codegen id : 8] +Input [7]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] +Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26] +Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#10), partial_sum(cs_quantity#14)] +Aggregate Attributes [3]: [sum#30, sum#31, sum#32] +Results [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#33, sum#34, sum#35] + +(50) Exchange +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#33, sum#34, sum#35] +Arguments: hashpartitioning(i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 9] +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#33, sum#34, sum#35] +Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26] +Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#10), sum(cs_quantity#14)] +Aggregate Attributes [3]: [sum(ss_quantity#5)#36, sum(sr_return_quantity#10)#37, sum(cs_quantity#14)#38] +Results [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum(ss_quantity#5)#36 AS store_sales_quantity#39, sum(sr_return_quantity#10)#37 AS store_returns_quantity#40, sum(cs_quantity#14)#38 AS catalog_sales_quantity#41] + +(52) TakeOrderedAndProject +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#39, store_returns_quantity#40, catalog_sales_quantity#41] +Arguments: 100, [i_item_id#28 ASC NULLS FIRST, i_item_desc#29 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, s_store_name#26 ASC NULLS FIRST], [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#39, store_returns_quantity#40, catalog_sales_quantity#41] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_datafusion/simplified.txt new file mode 100644 index 0000000000..01f107ed0a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_datafusion/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_quantity,store_returns_quantity,catalog_sales_quantity] + WholeStageCodegen (9) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(ss_quantity),sum(sr_return_quantity),sum(cs_quantity),store_sales_quantity,store_returns_quantity,catalog_sales_quantity,sum,sum,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_quantity,sr_return_quantity,cs_quantity] [sum,sum,sum,sum,sum,sum] + Project [ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_id,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..a94e9b7806 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_iceberg_compat/explain.txt @@ -0,0 +1,302 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin Inner BuildRight (47) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet spark_catalog.default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet spark_catalog.default.date_dim (23) + : : +- BroadcastExchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet spark_catalog.default.date_dim (30) + : +- BroadcastExchange (40) + : +- * Filter (39) + : +- * ColumnarToRow (38) + : +- Scan parquet spark_catalog.default.store (37) + +- BroadcastExchange (46) + +- * Filter (45) + +- * ColumnarToRow (44) + +- Scan parquet spark_catalog.default.item (43) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#11)] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_customer_sk#8) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_ticket_number#9)) + +(7) BroadcastExchange +Input [5]: [sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] +Right keys [3]: [sr_customer_sk#8, sr_item_sk#7, sr_ticket_number#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(10) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(12) Filter [codegen id : 2] +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Condition : (isnotnull(cs_bill_customer_sk#12) AND isnotnull(cs_item_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#8, sr_item_sk#7] +Right keys [2]: [cs_bill_customer_sk#12, cs_item_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_return_quantity#10, sr_returned_date_sk#11, cs_bill_customer_sk#12, cs_item_sk#13, cs_quantity#14, cs_sold_date_sk#15] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 9)) AND (d_year#17 = 1999)) AND isnotnull(d_date_sk#16)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#16] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(20) BroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15] +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#16] + +(23) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#19, d_year#20, d_moy#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] + +(25) Filter [codegen id : 4] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] +Condition : (((((isnotnull(d_moy#21) AND isnotnull(d_year#20)) AND (d_moy#21 >= 9)) AND (d_moy#21 <= 12)) AND (d_year#20 = 1999)) AND isnotnull(d_date_sk#19)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#19] +Input [3]: [d_date_sk#19, d_year#20, d_moy#21] + +(27) BroadcastExchange +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#11] +Right keys [1]: [d_date_sk#19] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, sr_returned_date_sk#11, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#19] + +(30) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#22, d_year#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] +Condition : (d_year#23 IN (1999,2000,2001) AND isnotnull(d_date_sk#22)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#22] +Input [2]: [d_date_sk#22, d_year#23] + +(34) BroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, cs_sold_date_sk#15, d_date_sk#22] + +(37) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] + +(39) Filter [codegen id : 6] +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Condition : isnotnull(s_store_sk#24) + +(40) BroadcastExchange +Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#24] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 8] +Output [6]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_sk#24, s_store_id#25, s_store_name#26] + +(43) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] + +(45) Filter [codegen id : 7] +Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Condition : isnotnull(i_item_sk#27) + +(46) BroadcastExchange +Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#27] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 8] +Output [7]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] +Input [9]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_sk#27, i_item_id#28, i_item_desc#29] + +(49) HashAggregate [codegen id : 8] +Input [7]: [ss_quantity#5, sr_return_quantity#10, cs_quantity#14, s_store_id#25, s_store_name#26, i_item_id#28, i_item_desc#29] +Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26] +Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#10), partial_sum(cs_quantity#14)] +Aggregate Attributes [3]: [sum#30, sum#31, sum#32] +Results [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#33, sum#34, sum#35] + +(50) Exchange +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#33, sum#34, sum#35] +Arguments: hashpartitioning(i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 9] +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum#33, sum#34, sum#35] +Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26] +Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#10), sum(cs_quantity#14)] +Aggregate Attributes [3]: [sum(ss_quantity#5)#36, sum(sr_return_quantity#10)#37, sum(cs_quantity#14)#38] +Results [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, sum(ss_quantity#5)#36 AS store_sales_quantity#39, sum(sr_return_quantity#10)#37 AS store_returns_quantity#40, sum(cs_quantity#14)#38 AS catalog_sales_quantity#41] + +(52) TakeOrderedAndProject +Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#39, store_returns_quantity#40, catalog_sales_quantity#41] +Arguments: 100, [i_item_id#28 ASC NULLS FIRST, i_item_desc#29 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, s_store_name#26 ASC NULLS FIRST], [i_item_id#28, i_item_desc#29, s_store_id#25, s_store_name#26, store_sales_quantity#39, store_returns_quantity#40, catalog_sales_quantity#41] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..01f107ed0a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.native_iceberg_compat/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_quantity,store_returns_quantity,catalog_sales_quantity] + WholeStageCodegen (9) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(ss_quantity),sum(sr_return_quantity),sum(cs_quantity),store_sales_quantity,store_returns_quantity,catalog_sales_quantity,sum,sum,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_quantity,sr_return_quantity,cs_quantity] [sum,sum,sum,sum,sum,sum] + Project [ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_id,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,sr_returned_date_sk,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sold_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity,sr_returned_date_sk] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_datafusion/explain.txt new file mode 100644 index 0000000000..e05061d827 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_datafusion/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet spark_catalog.default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.item (11) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((isnotnull(d_moy#3) AND (d_moy#3 = 11)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(8) BroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Condition : ((isnotnull(i_manufact_id#10) AND (i_manufact_id#10 = 128)) AND isnotnull(i_item_sk#7)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] + +(15) BroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] + +(19) Exchange +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS sum_agg#16] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#14, brand#15, sum_agg#16] +Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#2, brand_id#14, brand#15, sum_agg#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_datafusion/simplified.txt new file mode 100644 index 0000000000..686e941c28 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_datafusion/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [d_year,sum_agg,brand_id,brand] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,sum_agg,sum] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..e05061d827 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_iceberg_compat/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet spark_catalog.default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.item (11) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((isnotnull(d_moy#3) AND (d_moy#3 = 11)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(8) BroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Condition : ((isnotnull(i_manufact_id#10) AND (i_manufact_id#10 = 128)) AND isnotnull(i_item_sk#7)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] + +(15) BroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] + +(19) Exchange +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS sum_agg#16] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#14, brand#15, sum_agg#16] +Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#2, brand_id#14, brand#15, sum_agg#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..686e941c28 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.native_iceberg_compat/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [d_year,sum_agg,brand_id,brand] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,sum_agg,sum] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_datafusion/explain.txt new file mode 100644 index 0000000000..f272556a95 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_datafusion/explain.txt @@ -0,0 +1,312 @@ +== Physical Plan == +TakeOrderedAndProject (53) ++- * Project (52) + +- * BroadcastHashJoin Inner BuildRight (51) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Filter (20) + : : : +- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.web_returns (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.customer_address (11) + : : +- BroadcastExchange (37) + : : +- * Filter (36) + : : +- * HashAggregate (35) + : : +- Exchange (34) + : : +- * HashAggregate (33) + : : +- * HashAggregate (32) + : : +- Exchange (31) + : : +- * HashAggregate (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Filter (23) + : : : : +- * ColumnarToRow (22) + : : : : +- Scan parquet spark_catalog.default.web_returns (21) + : : : +- ReusedExchange (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet spark_catalog.default.customer (40) + +- BroadcastExchange (50) + +- * Project (49) + +- * Filter (48) + +- * ColumnarToRow (47) + +- Scan parquet spark_catalog.default.customer_address (46) + + +(1) Scan parquet spark_catalog.default.web_returns +Output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#4)] +PushedFilters: [IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] + +(3) Filter [codegen id : 3] +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Condition : (isnotnull(wr_returning_addr_sk#2) AND isnotnull(wr_returning_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [wr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3] +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_state#8] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_state#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) + +(14) BroadcastExchange +Input [2]: [ca_address_sk#7, ca_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [wr_returning_addr_sk#2] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, ca_address_sk#7, ca_state#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#3))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] + +(18) Exchange +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] +Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 11] +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(wr_return_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#3))#11] +Results [3]: [wr_returning_customer_sk#1 AS ctr_customer_sk#12, ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(wr_return_amt#3))#11,17,2) AS ctr_total_return#14] + +(20) Filter [codegen id : 11] +Input [3]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14] +Condition : isnotnull(ctr_total_return#14) + +(21) Scan parquet spark_catalog.default.web_returns +Output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#4)] +PushedFilters: [IsNotNull(wr_returning_addr_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] + +(23) Filter [codegen id : 6] +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Condition : isnotnull(wr_returning_addr_sk#2) + +(24) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [wr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [3]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3] +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4, d_date_sk#5] + +(27) ReusedExchange [Reuses operator id: 14] +Output [2]: [ca_address_sk#7, ca_state#8] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [wr_returning_addr_sk#2] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 6] +Output [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, ca_address_sk#7, ca_state#8] + +(30) HashAggregate [codegen id : 6] +Input [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#3))] +Aggregate Attributes [1]: [sum#15] +Results [3]: [wr_returning_customer_sk#1, ca_state#8, sum#16] + +(31) Exchange +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#16] +Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(32) HashAggregate [codegen id : 7] +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#16] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(wr_return_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#3))#11] +Results [2]: [ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(wr_return_amt#3))#11,17,2) AS ctr_total_return#14] + +(33) HashAggregate [codegen id : 7] +Input [2]: [ctr_state#13, ctr_total_return#14] +Keys [1]: [ctr_state#13] +Functions [1]: [partial_avg(ctr_total_return#14)] +Aggregate Attributes [2]: [sum#17, count#18] +Results [3]: [ctr_state#13, sum#19, count#20] + +(34) Exchange +Input [3]: [ctr_state#13, sum#19, count#20] +Arguments: hashpartitioning(ctr_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(35) HashAggregate [codegen id : 8] +Input [3]: [ctr_state#13, sum#19, count#20] +Keys [1]: [ctr_state#13] +Functions [1]: [avg(ctr_total_return#14)] +Aggregate Attributes [1]: [avg(ctr_total_return#14)#21] +Results [2]: [(avg(ctr_total_return#14)#21 * 1.2) AS (avg(ctr_total_return) * 1.2)#22, ctr_state#13 AS ctr_state#13#23] + +(36) Filter [codegen id : 8] +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#22) + +(37) BroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_state#13] +Right keys [1]: [ctr_state#13#23] +Join type: Inner +Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#22) + +(39) Project [codegen id : 11] +Output [2]: [ctr_customer_sk#12, ctr_total_return#14] +Input [5]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14, (avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] + +(40) Scan parquet spark_catalog.default.customer +Output [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] + +(42) Filter [codegen id : 9] +Input [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_current_addr_sk#26)) + +(43) BroadcastExchange +Input [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_customer_sk#12] +Right keys [1]: [c_customer_sk#24] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 11] +Output [14]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Input [16]: [ctr_customer_sk#12, ctr_total_return#14, c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] + +(46) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#38, ca_state#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 10] +Input [2]: [ca_address_sk#38, ca_state#39] + +(48) Filter [codegen id : 10] +Input [2]: [ca_address_sk#38, ca_state#39] +Condition : ((isnotnull(ca_state#39) AND (ca_state#39 = GA)) AND isnotnull(ca_address_sk#38)) + +(49) Project [codegen id : 10] +Output [1]: [ca_address_sk#38] +Input [2]: [ca_address_sk#38, ca_state#39] + +(50) BroadcastExchange +Input [1]: [ca_address_sk#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(51) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#26] +Right keys [1]: [ca_address_sk#38] +Join type: Inner +Join condition: None + +(52) Project [codegen id : 11] +Output [13]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ctr_total_return#14] +Input [15]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ca_address_sk#38] + +(53) TakeOrderedAndProject +Input [13]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ctr_total_return#14] +Arguments: 100, [c_customer_id#25 ASC NULLS FIRST, c_salutation#27 ASC NULLS FIRST, c_first_name#28 ASC NULLS FIRST, c_last_name#29 ASC NULLS FIRST, c_preferred_cust_flag#30 ASC NULLS FIRST, c_birth_day#31 ASC NULLS FIRST, c_birth_month#32 ASC NULLS FIRST, c_birth_year#33 ASC NULLS FIRST, c_birth_country#34 ASC NULLS FIRST, c_login#35 ASC NULLS FIRST, c_email_address#36 ASC NULLS FIRST, c_last_review_date#37 ASC NULLS FIRST, ctr_total_return#14 ASC NULLS FIRST], [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ctr_total_return#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8cd53281f4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_datafusion/simplified.txt @@ -0,0 +1,78 @@ +TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + WholeStageCodegen (11) + Project [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(avg(ctr_total_return) * 1.2)] + Filter [ctr_total_return] + HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_customer_sk,ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [wr_returning_customer_sk,ca_state] #1 + WholeStageCodegen (3) + HashAggregate [wr_returning_customer_sk,ca_state,wr_return_amt] [sum,sum] + Project [wr_returning_customer_sk,wr_return_amt,ca_state] + BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] + Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_returning_addr_sk,wr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (8) + Filter [(avg(ctr_total_return) * 1.2)] + HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),ctr_state,sum,count] + InputAdapter + Exchange [ctr_state] #5 + WholeStageCodegen (7) + HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] + HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [wr_returning_customer_sk,ca_state] #6 + WholeStageCodegen (6) + HashAggregate [wr_returning_customer_sk,ca_state,wr_return_amt] [sum,sum] + Project [wr_returning_customer_sk,wr_return_amt,ca_state] + BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] + Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + ReusedExchange [ca_address_sk,ca_state] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f272556a95 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_iceberg_compat/explain.txt @@ -0,0 +1,312 @@ +== Physical Plan == +TakeOrderedAndProject (53) ++- * Project (52) + +- * BroadcastHashJoin Inner BuildRight (51) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Filter (20) + : : : +- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.web_returns (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.customer_address (11) + : : +- BroadcastExchange (37) + : : +- * Filter (36) + : : +- * HashAggregate (35) + : : +- Exchange (34) + : : +- * HashAggregate (33) + : : +- * HashAggregate (32) + : : +- Exchange (31) + : : +- * HashAggregate (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Filter (23) + : : : : +- * ColumnarToRow (22) + : : : : +- Scan parquet spark_catalog.default.web_returns (21) + : : : +- ReusedExchange (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet spark_catalog.default.customer (40) + +- BroadcastExchange (50) + +- * Project (49) + +- * Filter (48) + +- * ColumnarToRow (47) + +- Scan parquet spark_catalog.default.customer_address (46) + + +(1) Scan parquet spark_catalog.default.web_returns +Output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#4)] +PushedFilters: [IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] + +(3) Filter [codegen id : 3] +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Condition : (isnotnull(wr_returning_addr_sk#2) AND isnotnull(wr_returning_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [wr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3] +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_state#8] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_state#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) + +(14) BroadcastExchange +Input [2]: [ca_address_sk#7, ca_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [wr_returning_addr_sk#2] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, ca_address_sk#7, ca_state#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#3))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] + +(18) Exchange +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] +Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 11] +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(wr_return_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#3))#11] +Results [3]: [wr_returning_customer_sk#1 AS ctr_customer_sk#12, ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(wr_return_amt#3))#11,17,2) AS ctr_total_return#14] + +(20) Filter [codegen id : 11] +Input [3]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14] +Condition : isnotnull(ctr_total_return#14) + +(21) Scan parquet spark_catalog.default.web_returns +Output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#4)] +PushedFilters: [IsNotNull(wr_returning_addr_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] + +(23) Filter [codegen id : 6] +Input [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] +Condition : isnotnull(wr_returning_addr_sk#2) + +(24) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [wr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [3]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3] +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4, d_date_sk#5] + +(27) ReusedExchange [Reuses operator id: 14] +Output [2]: [ca_address_sk#7, ca_state#8] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [wr_returning_addr_sk#2] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 6] +Output [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, ca_address_sk#7, ca_state#8] + +(30) HashAggregate [codegen id : 6] +Input [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#3))] +Aggregate Attributes [1]: [sum#15] +Results [3]: [wr_returning_customer_sk#1, ca_state#8, sum#16] + +(31) Exchange +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#16] +Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(32) HashAggregate [codegen id : 7] +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#16] +Keys [2]: [wr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(wr_return_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#3))#11] +Results [2]: [ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(wr_return_amt#3))#11,17,2) AS ctr_total_return#14] + +(33) HashAggregate [codegen id : 7] +Input [2]: [ctr_state#13, ctr_total_return#14] +Keys [1]: [ctr_state#13] +Functions [1]: [partial_avg(ctr_total_return#14)] +Aggregate Attributes [2]: [sum#17, count#18] +Results [3]: [ctr_state#13, sum#19, count#20] + +(34) Exchange +Input [3]: [ctr_state#13, sum#19, count#20] +Arguments: hashpartitioning(ctr_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(35) HashAggregate [codegen id : 8] +Input [3]: [ctr_state#13, sum#19, count#20] +Keys [1]: [ctr_state#13] +Functions [1]: [avg(ctr_total_return#14)] +Aggregate Attributes [1]: [avg(ctr_total_return#14)#21] +Results [2]: [(avg(ctr_total_return#14)#21 * 1.2) AS (avg(ctr_total_return) * 1.2)#22, ctr_state#13 AS ctr_state#13#23] + +(36) Filter [codegen id : 8] +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#22) + +(37) BroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_state#13] +Right keys [1]: [ctr_state#13#23] +Join type: Inner +Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#22) + +(39) Project [codegen id : 11] +Output [2]: [ctr_customer_sk#12, ctr_total_return#14] +Input [5]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14, (avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] + +(40) Scan parquet spark_catalog.default.customer +Output [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] + +(42) Filter [codegen id : 9] +Input [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_current_addr_sk#26)) + +(43) BroadcastExchange +Input [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_customer_sk#12] +Right keys [1]: [c_customer_sk#24] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 11] +Output [14]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Input [16]: [ctr_customer_sk#12, ctr_total_return#14, c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] + +(46) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#38, ca_state#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 10] +Input [2]: [ca_address_sk#38, ca_state#39] + +(48) Filter [codegen id : 10] +Input [2]: [ca_address_sk#38, ca_state#39] +Condition : ((isnotnull(ca_state#39) AND (ca_state#39 = GA)) AND isnotnull(ca_address_sk#38)) + +(49) Project [codegen id : 10] +Output [1]: [ca_address_sk#38] +Input [2]: [ca_address_sk#38, ca_state#39] + +(50) BroadcastExchange +Input [1]: [ca_address_sk#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(51) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#26] +Right keys [1]: [ca_address_sk#38] +Join type: Inner +Join condition: None + +(52) Project [codegen id : 11] +Output [13]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ctr_total_return#14] +Input [15]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ca_address_sk#38] + +(53) TakeOrderedAndProject +Input [13]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ctr_total_return#14] +Arguments: 100, [c_customer_id#25 ASC NULLS FIRST, c_salutation#27 ASC NULLS FIRST, c_first_name#28 ASC NULLS FIRST, c_last_name#29 ASC NULLS FIRST, c_preferred_cust_flag#30 ASC NULLS FIRST, c_birth_day#31 ASC NULLS FIRST, c_birth_month#32 ASC NULLS FIRST, c_birth_year#33 ASC NULLS FIRST, c_birth_country#34 ASC NULLS FIRST, c_login#35 ASC NULLS FIRST, c_email_address#36 ASC NULLS FIRST, c_last_review_date#37 ASC NULLS FIRST, ctr_total_return#14 ASC NULLS FIRST], [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ctr_total_return#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8cd53281f4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.native_iceberg_compat/simplified.txt @@ -0,0 +1,78 @@ +TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + WholeStageCodegen (11) + Project [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(avg(ctr_total_return) * 1.2)] + Filter [ctr_total_return] + HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_customer_sk,ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [wr_returning_customer_sk,ca_state] #1 + WholeStageCodegen (3) + HashAggregate [wr_returning_customer_sk,ca_state,wr_return_amt] [sum,sum] + Project [wr_returning_customer_sk,wr_return_amt,ca_state] + BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] + Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_returning_addr_sk,wr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (8) + Filter [(avg(ctr_total_return) * 1.2)] + HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),ctr_state,sum,count] + InputAdapter + Exchange [ctr_state] #5 + WholeStageCodegen (7) + HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] + HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [wr_returning_customer_sk,ca_state] #6 + WholeStageCodegen (6) + HashAggregate [wr_returning_customer_sk,ca_state,wr_return_amt] [sum,sum] + Project [wr_returning_customer_sk,wr_return_amt,ca_state] + BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] + Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + ReusedExchange [ca_address_sk,ca_state] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_datafusion/explain.txt new file mode 100644 index 0000000000..12ca5d428b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_datafusion/explain.txt @@ -0,0 +1,586 @@ +== Physical Plan == +* Sort (99) ++- Exchange (98) + +- * Project (97) + +- * BroadcastHashJoin Inner BuildRight (96) + :- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * BroadcastHashJoin Inner BuildRight (67) + : : :- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * BroadcastHashJoin Inner BuildRight (35) + : : : : :- * HashAggregate (18) + : : : : : +- Exchange (17) + : : : : : +- * HashAggregate (16) + : : : : : +- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet spark_catalog.default.customer_address (10) + : : : : +- BroadcastExchange (34) + : : : : +- * HashAggregate (33) + : : : : +- Exchange (32) + : : : : +- * HashAggregate (31) + : : : : +- * Project (30) + : : : : +- * BroadcastHashJoin Inner BuildRight (29) + : : : : :- * Project (27) + : : : : : +- * BroadcastHashJoin Inner BuildRight (26) + : : : : : :- * Filter (21) + : : : : : : +- * ColumnarToRow (20) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (19) + : : : : : +- BroadcastExchange (25) + : : : : : +- * Filter (24) + : : : : : +- * ColumnarToRow (23) + : : : : : +- Scan parquet spark_catalog.default.date_dim (22) + : : : : +- ReusedExchange (28) + : : : +- BroadcastExchange (51) + : : : +- * HashAggregate (50) + : : : +- Exchange (49) + : : : +- * HashAggregate (48) + : : : +- * Project (47) + : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : :- * Project (44) + : : : : +- * BroadcastHashJoin Inner BuildRight (43) + : : : : :- * Filter (38) + : : : : : +- * ColumnarToRow (37) + : : : : : +- Scan parquet spark_catalog.default.store_sales (36) + : : : : +- BroadcastExchange (42) + : : : : +- * Filter (41) + : : : : +- * ColumnarToRow (40) + : : : : +- Scan parquet spark_catalog.default.date_dim (39) + : : : +- ReusedExchange (45) + : : +- BroadcastExchange (66) + : : +- * HashAggregate (65) + : : +- Exchange (64) + : : +- * HashAggregate (63) + : : +- * Project (62) + : : +- * BroadcastHashJoin Inner BuildRight (61) + : : :- * Project (59) + : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : :- * Filter (56) + : : : : +- * ColumnarToRow (55) + : : : : +- Scan parquet spark_catalog.default.web_sales (54) + : : : +- ReusedExchange (57) + : : +- ReusedExchange (60) + : +- BroadcastExchange (80) + : +- * HashAggregate (79) + : +- Exchange (78) + : +- * HashAggregate (77) + : +- * Project (76) + : +- * BroadcastHashJoin Inner BuildRight (75) + : :- * Project (73) + : : +- * BroadcastHashJoin Inner BuildRight (72) + : : :- * Filter (70) + : : : +- * ColumnarToRow (69) + : : : +- Scan parquet spark_catalog.default.web_sales (68) + : : +- ReusedExchange (71) + : +- ReusedExchange (74) + +- BroadcastExchange (95) + +- * HashAggregate (94) + +- Exchange (93) + +- * HashAggregate (92) + +- * Project (91) + +- * BroadcastHashJoin Inner BuildRight (90) + :- * Project (88) + : +- * BroadcastHashJoin Inner BuildRight (87) + : :- * Filter (85) + : : +- * ColumnarToRow (84) + : : +- Scan parquet spark_catalog.default.web_sales (83) + : +- ReusedExchange (86) + +- ReusedExchange (89) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_addr_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 1)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [4]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6] +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_year#5, d_qoy#6] + +(10) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_county#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_county#8] + +(12) Filter [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_county#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_county#8)) + +(13) BroadcastExchange +Input [2]: [ca_address_sk#7, ca_county#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_addr_sk#1] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [4]: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_address_sk#7, ca_county#8] + +(16) HashAggregate [codegen id : 3] +Input [4]: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] +Keys [3]: [ca_county#8, d_qoy#6, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#9] +Results [4]: [ca_county#8, d_qoy#6, d_year#5, sum#10] + +(17) Exchange +Input [4]: [ca_county#8, d_qoy#6, d_year#5, sum#10] +Arguments: hashpartitioning(ca_county#8, d_qoy#6, d_year#5, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 24] +Input [4]: [ca_county#8, d_qoy#6, d_year#5, sum#10] +Keys [3]: [ca_county#8, d_qoy#6, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#11] +Results [3]: [ca_county#8, d_year#5, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#11,17,2) AS store_sales#12] + +(19) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#15)] +PushedFilters: [IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [3]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15] + +(21) Filter [codegen id : 6] +Input [3]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15] +Condition : isnotnull(ss_addr_sk#13) + +(22) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] + +(24) Filter [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Condition : ((((isnotnull(d_qoy#18) AND isnotnull(d_year#17)) AND (d_qoy#18 = 2)) AND (d_year#17 = 2000)) AND isnotnull(d_date_sk#16)) + +(25) BroadcastExchange +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 6] +Output [4]: [ss_addr_sk#13, ss_ext_sales_price#14, d_year#17, d_qoy#18] +Input [6]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15, d_date_sk#16, d_year#17, d_qoy#18] + +(28) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#19, ca_county#20] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_addr_sk#13] +Right keys [1]: [ca_address_sk#19] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 6] +Output [4]: [ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_county#20] +Input [6]: [ss_addr_sk#13, ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_address_sk#19, ca_county#20] + +(31) HashAggregate [codegen id : 6] +Input [4]: [ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_county#20] +Keys [3]: [ca_county#20, d_qoy#18, d_year#17] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#14))] +Aggregate Attributes [1]: [sum#21] +Results [4]: [ca_county#20, d_qoy#18, d_year#17, sum#22] + +(32) Exchange +Input [4]: [ca_county#20, d_qoy#18, d_year#17, sum#22] +Arguments: hashpartitioning(ca_county#20, d_qoy#18, d_year#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 7] +Input [4]: [ca_county#20, d_qoy#18, d_year#17, sum#22] +Keys [3]: [ca_county#20, d_qoy#18, d_year#17] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#14))#11] +Results [2]: [ca_county#20, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#14))#11,17,2) AS store_sales#23] + +(34) BroadcastExchange +Input [2]: [ca_county#20, store_sales#23] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(35) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#8] +Right keys [1]: [ca_county#20] +Join type: Inner +Join condition: None + +(36) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#26)] +PushedFilters: [IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 10] +Input [3]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] + +(38) Filter [codegen id : 10] +Input [3]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Condition : isnotnull(ss_addr_sk#24) + +(39) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#27, d_year#28, d_qoy#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [3]: [d_date_sk#27, d_year#28, d_qoy#29] + +(41) Filter [codegen id : 8] +Input [3]: [d_date_sk#27, d_year#28, d_qoy#29] +Condition : ((((isnotnull(d_qoy#29) AND isnotnull(d_year#28)) AND (d_qoy#29 = 3)) AND (d_year#28 = 2000)) AND isnotnull(d_date_sk#27)) + +(42) BroadcastExchange +Input [3]: [d_date_sk#27, d_year#28, d_qoy#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(43) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#26] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 10] +Output [4]: [ss_addr_sk#24, ss_ext_sales_price#25, d_year#28, d_qoy#29] +Input [6]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26, d_date_sk#27, d_year#28, d_qoy#29] + +(45) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#30, ca_county#31] + +(46) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_addr_sk#24] +Right keys [1]: [ca_address_sk#30] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 10] +Output [4]: [ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_county#31] +Input [6]: [ss_addr_sk#24, ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_address_sk#30, ca_county#31] + +(48) HashAggregate [codegen id : 10] +Input [4]: [ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_county#31] +Keys [3]: [ca_county#31, d_qoy#29, d_year#28] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#25))] +Aggregate Attributes [1]: [sum#32] +Results [4]: [ca_county#31, d_qoy#29, d_year#28, sum#33] + +(49) Exchange +Input [4]: [ca_county#31, d_qoy#29, d_year#28, sum#33] +Arguments: hashpartitioning(ca_county#31, d_qoy#29, d_year#28, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(50) HashAggregate [codegen id : 11] +Input [4]: [ca_county#31, d_qoy#29, d_year#28, sum#33] +Keys [3]: [ca_county#31, d_qoy#29, d_year#28] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#25))#11] +Results [2]: [ca_county#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#25))#11,17,2) AS store_sales#34] + +(51) BroadcastExchange +Input [2]: [ca_county#31, store_sales#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(52) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#20] +Right keys [1]: [ca_county#31] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 24] +Output [5]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34] +Input [7]: [ca_county#8, d_year#5, store_sales#12, ca_county#20, store_sales#23, ca_county#31, store_sales#34] + +(54) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#37)] +PushedFilters: [IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 14] +Input [3]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] + +(56) Filter [codegen id : 14] +Input [3]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Condition : isnotnull(ws_bill_addr_sk#35) + +(57) ReusedExchange [Reuses operator id: 7] +Output [3]: [d_date_sk#38, d_year#39, d_qoy#40] + +(58) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 14] +Output [4]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, d_year#39, d_qoy#40] +Input [6]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37, d_date_sk#38, d_year#39, d_qoy#40] + +(60) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#41, ca_county#42] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_bill_addr_sk#35] +Right keys [1]: [ca_address_sk#41] +Join type: Inner +Join condition: None + +(62) Project [codegen id : 14] +Output [4]: [ws_ext_sales_price#36, d_year#39, d_qoy#40, ca_county#42] +Input [6]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, d_year#39, d_qoy#40, ca_address_sk#41, ca_county#42] + +(63) HashAggregate [codegen id : 14] +Input [4]: [ws_ext_sales_price#36, d_year#39, d_qoy#40, ca_county#42] +Keys [3]: [ca_county#42, d_qoy#40, d_year#39] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] +Aggregate Attributes [1]: [sum#43] +Results [4]: [ca_county#42, d_qoy#40, d_year#39, sum#44] + +(64) Exchange +Input [4]: [ca_county#42, d_qoy#40, d_year#39, sum#44] +Arguments: hashpartitioning(ca_county#42, d_qoy#40, d_year#39, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(65) HashAggregate [codegen id : 15] +Input [4]: [ca_county#42, d_qoy#40, d_year#39, sum#44] +Keys [3]: [ca_county#42, d_qoy#40, d_year#39] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#45] +Results [2]: [ca_county#42, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#45,17,2) AS web_sales#46] + +(66) BroadcastExchange +Input [2]: [ca_county#42, web_sales#46] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(67) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#8] +Right keys [1]: [ca_county#42] +Join type: Inner +Join condition: None + +(68) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#49)] +PushedFilters: [IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(69) ColumnarToRow [codegen id : 18] +Input [3]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49] + +(70) Filter [codegen id : 18] +Input [3]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49] +Condition : isnotnull(ws_bill_addr_sk#47) + +(71) ReusedExchange [Reuses operator id: 25] +Output [3]: [d_date_sk#50, d_year#51, d_qoy#52] + +(72) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#49] +Right keys [1]: [d_date_sk#50] +Join type: Inner +Join condition: None + +(73) Project [codegen id : 18] +Output [4]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, d_year#51, d_qoy#52] +Input [6]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49, d_date_sk#50, d_year#51, d_qoy#52] + +(74) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#53, ca_county#54] + +(75) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_bill_addr_sk#47] +Right keys [1]: [ca_address_sk#53] +Join type: Inner +Join condition: None + +(76) Project [codegen id : 18] +Output [4]: [ws_ext_sales_price#48, d_year#51, d_qoy#52, ca_county#54] +Input [6]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, d_year#51, d_qoy#52, ca_address_sk#53, ca_county#54] + +(77) HashAggregate [codegen id : 18] +Input [4]: [ws_ext_sales_price#48, d_year#51, d_qoy#52, ca_county#54] +Keys [3]: [ca_county#54, d_qoy#52, d_year#51] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#48))] +Aggregate Attributes [1]: [sum#55] +Results [4]: [ca_county#54, d_qoy#52, d_year#51, sum#56] + +(78) Exchange +Input [4]: [ca_county#54, d_qoy#52, d_year#51, sum#56] +Arguments: hashpartitioning(ca_county#54, d_qoy#52, d_year#51, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(79) HashAggregate [codegen id : 19] +Input [4]: [ca_county#54, d_qoy#52, d_year#51, sum#56] +Keys [3]: [ca_county#54, d_qoy#52, d_year#51] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#48))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#48))#45] +Results [2]: [ca_county#54, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#48))#45,17,2) AS web_sales#57] + +(80) BroadcastExchange +Input [2]: [ca_county#54, web_sales#57] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=13] + +(81) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#42] +Right keys [1]: [ca_county#54] +Join type: Inner +Join condition: (CASE WHEN (web_sales#46 > 0.00) THEN (web_sales#57 / web_sales#46) END > CASE WHEN (store_sales#12 > 0.00) THEN (store_sales#23 / store_sales#12) END) + +(82) Project [codegen id : 24] +Output [8]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34, ca_county#42, web_sales#46, web_sales#57] +Input [9]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34, ca_county#42, web_sales#46, ca_county#54, web_sales#57] + +(83) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#60)] +PushedFilters: [IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(84) ColumnarToRow [codegen id : 22] +Input [3]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60] + +(85) Filter [codegen id : 22] +Input [3]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60] +Condition : isnotnull(ws_bill_addr_sk#58) + +(86) ReusedExchange [Reuses operator id: 42] +Output [3]: [d_date_sk#61, d_year#62, d_qoy#63] + +(87) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#60] +Right keys [1]: [d_date_sk#61] +Join type: Inner +Join condition: None + +(88) Project [codegen id : 22] +Output [4]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, d_year#62, d_qoy#63] +Input [6]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60, d_date_sk#61, d_year#62, d_qoy#63] + +(89) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#64, ca_county#65] + +(90) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_bill_addr_sk#58] +Right keys [1]: [ca_address_sk#64] +Join type: Inner +Join condition: None + +(91) Project [codegen id : 22] +Output [4]: [ws_ext_sales_price#59, d_year#62, d_qoy#63, ca_county#65] +Input [6]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, d_year#62, d_qoy#63, ca_address_sk#64, ca_county#65] + +(92) HashAggregate [codegen id : 22] +Input [4]: [ws_ext_sales_price#59, d_year#62, d_qoy#63, ca_county#65] +Keys [3]: [ca_county#65, d_qoy#63, d_year#62] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#59))] +Aggregate Attributes [1]: [sum#66] +Results [4]: [ca_county#65, d_qoy#63, d_year#62, sum#67] + +(93) Exchange +Input [4]: [ca_county#65, d_qoy#63, d_year#62, sum#67] +Arguments: hashpartitioning(ca_county#65, d_qoy#63, d_year#62, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(94) HashAggregate [codegen id : 23] +Input [4]: [ca_county#65, d_qoy#63, d_year#62, sum#67] +Keys [3]: [ca_county#65, d_qoy#63, d_year#62] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#59))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#59))#45] +Results [2]: [ca_county#65, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#59))#45,17,2) AS web_sales#68] + +(95) BroadcastExchange +Input [2]: [ca_county#65, web_sales#68] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=15] + +(96) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#42] +Right keys [1]: [ca_county#65] +Join type: Inner +Join condition: (CASE WHEN (web_sales#57 > 0.00) THEN (web_sales#68 / web_sales#57) END > CASE WHEN (store_sales#23 > 0.00) THEN (store_sales#34 / store_sales#23) END) + +(97) Project [codegen id : 24] +Output [6]: [ca_county#8, d_year#5, (web_sales#57 / web_sales#46) AS web_q1_q2_increase#69, (store_sales#23 / store_sales#12) AS store_q1_q2_increase#70, (web_sales#68 / web_sales#57) AS web_q2_q3_increase#71, (store_sales#34 / store_sales#23) AS store_q2_q3_increase#72] +Input [10]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34, ca_county#42, web_sales#46, web_sales#57, ca_county#65, web_sales#68] + +(98) Exchange +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#69, store_q1_q2_increase#70, web_q2_q3_increase#71, store_q2_q3_increase#72] +Arguments: rangepartitioning(ca_county#8 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(99) Sort [codegen id : 25] +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#69, store_q1_q2_increase#70, web_q2_q3_increase#71, store_q2_q3_increase#72] +Arguments: [ca_county#8 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_datafusion/simplified.txt new file mode 100644 index 0000000000..5c01b33c93 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_datafusion/simplified.txt @@ -0,0 +1,150 @@ +WholeStageCodegen (25) + Sort [ca_county] + InputAdapter + Exchange [ca_county] #1 + WholeStageCodegen (24) + Project [ca_county,d_year,web_sales,web_sales,store_sales,store_sales,web_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales] + Project [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county] + Project [ca_county,d_year,store_sales,store_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county] + BroadcastHashJoin [ca_county,ca_county] + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #2 + WholeStageCodegen (3) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #6 + WholeStageCodegen (6) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (11) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #9 + WholeStageCodegen (10) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (15) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #12 + WholeStageCodegen (14) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] #3 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (19) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #14 + WholeStageCodegen (18) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] #7 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (23) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #16 + WholeStageCodegen (22) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] #10 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..12ca5d428b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_iceberg_compat/explain.txt @@ -0,0 +1,586 @@ +== Physical Plan == +* Sort (99) ++- Exchange (98) + +- * Project (97) + +- * BroadcastHashJoin Inner BuildRight (96) + :- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * BroadcastHashJoin Inner BuildRight (67) + : : :- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * BroadcastHashJoin Inner BuildRight (35) + : : : : :- * HashAggregate (18) + : : : : : +- Exchange (17) + : : : : : +- * HashAggregate (16) + : : : : : +- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet spark_catalog.default.customer_address (10) + : : : : +- BroadcastExchange (34) + : : : : +- * HashAggregate (33) + : : : : +- Exchange (32) + : : : : +- * HashAggregate (31) + : : : : +- * Project (30) + : : : : +- * BroadcastHashJoin Inner BuildRight (29) + : : : : :- * Project (27) + : : : : : +- * BroadcastHashJoin Inner BuildRight (26) + : : : : : :- * Filter (21) + : : : : : : +- * ColumnarToRow (20) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (19) + : : : : : +- BroadcastExchange (25) + : : : : : +- * Filter (24) + : : : : : +- * ColumnarToRow (23) + : : : : : +- Scan parquet spark_catalog.default.date_dim (22) + : : : : +- ReusedExchange (28) + : : : +- BroadcastExchange (51) + : : : +- * HashAggregate (50) + : : : +- Exchange (49) + : : : +- * HashAggregate (48) + : : : +- * Project (47) + : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : :- * Project (44) + : : : : +- * BroadcastHashJoin Inner BuildRight (43) + : : : : :- * Filter (38) + : : : : : +- * ColumnarToRow (37) + : : : : : +- Scan parquet spark_catalog.default.store_sales (36) + : : : : +- BroadcastExchange (42) + : : : : +- * Filter (41) + : : : : +- * ColumnarToRow (40) + : : : : +- Scan parquet spark_catalog.default.date_dim (39) + : : : +- ReusedExchange (45) + : : +- BroadcastExchange (66) + : : +- * HashAggregate (65) + : : +- Exchange (64) + : : +- * HashAggregate (63) + : : +- * Project (62) + : : +- * BroadcastHashJoin Inner BuildRight (61) + : : :- * Project (59) + : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : :- * Filter (56) + : : : : +- * ColumnarToRow (55) + : : : : +- Scan parquet spark_catalog.default.web_sales (54) + : : : +- ReusedExchange (57) + : : +- ReusedExchange (60) + : +- BroadcastExchange (80) + : +- * HashAggregate (79) + : +- Exchange (78) + : +- * HashAggregate (77) + : +- * Project (76) + : +- * BroadcastHashJoin Inner BuildRight (75) + : :- * Project (73) + : : +- * BroadcastHashJoin Inner BuildRight (72) + : : :- * Filter (70) + : : : +- * ColumnarToRow (69) + : : : +- Scan parquet spark_catalog.default.web_sales (68) + : : +- ReusedExchange (71) + : +- ReusedExchange (74) + +- BroadcastExchange (95) + +- * HashAggregate (94) + +- Exchange (93) + +- * HashAggregate (92) + +- * Project (91) + +- * BroadcastHashJoin Inner BuildRight (90) + :- * Project (88) + : +- * BroadcastHashJoin Inner BuildRight (87) + : :- * Filter (85) + : : +- * ColumnarToRow (84) + : : +- Scan parquet spark_catalog.default.web_sales (83) + : +- ReusedExchange (86) + +- ReusedExchange (89) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_addr_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 1)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [4]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6] +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_year#5, d_qoy#6] + +(10) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_county#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_county#8] + +(12) Filter [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_county#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_county#8)) + +(13) BroadcastExchange +Input [2]: [ca_address_sk#7, ca_county#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_addr_sk#1] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [4]: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_address_sk#7, ca_county#8] + +(16) HashAggregate [codegen id : 3] +Input [4]: [ss_ext_sales_price#2, d_year#5, d_qoy#6, ca_county#8] +Keys [3]: [ca_county#8, d_qoy#6, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#9] +Results [4]: [ca_county#8, d_qoy#6, d_year#5, sum#10] + +(17) Exchange +Input [4]: [ca_county#8, d_qoy#6, d_year#5, sum#10] +Arguments: hashpartitioning(ca_county#8, d_qoy#6, d_year#5, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 24] +Input [4]: [ca_county#8, d_qoy#6, d_year#5, sum#10] +Keys [3]: [ca_county#8, d_qoy#6, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#11] +Results [3]: [ca_county#8, d_year#5, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#11,17,2) AS store_sales#12] + +(19) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#15)] +PushedFilters: [IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [3]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15] + +(21) Filter [codegen id : 6] +Input [3]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15] +Condition : isnotnull(ss_addr_sk#13) + +(22) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] + +(24) Filter [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Condition : ((((isnotnull(d_qoy#18) AND isnotnull(d_year#17)) AND (d_qoy#18 = 2)) AND (d_year#17 = 2000)) AND isnotnull(d_date_sk#16)) + +(25) BroadcastExchange +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 6] +Output [4]: [ss_addr_sk#13, ss_ext_sales_price#14, d_year#17, d_qoy#18] +Input [6]: [ss_addr_sk#13, ss_ext_sales_price#14, ss_sold_date_sk#15, d_date_sk#16, d_year#17, d_qoy#18] + +(28) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#19, ca_county#20] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_addr_sk#13] +Right keys [1]: [ca_address_sk#19] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 6] +Output [4]: [ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_county#20] +Input [6]: [ss_addr_sk#13, ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_address_sk#19, ca_county#20] + +(31) HashAggregate [codegen id : 6] +Input [4]: [ss_ext_sales_price#14, d_year#17, d_qoy#18, ca_county#20] +Keys [3]: [ca_county#20, d_qoy#18, d_year#17] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#14))] +Aggregate Attributes [1]: [sum#21] +Results [4]: [ca_county#20, d_qoy#18, d_year#17, sum#22] + +(32) Exchange +Input [4]: [ca_county#20, d_qoy#18, d_year#17, sum#22] +Arguments: hashpartitioning(ca_county#20, d_qoy#18, d_year#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 7] +Input [4]: [ca_county#20, d_qoy#18, d_year#17, sum#22] +Keys [3]: [ca_county#20, d_qoy#18, d_year#17] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#14))#11] +Results [2]: [ca_county#20, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#14))#11,17,2) AS store_sales#23] + +(34) BroadcastExchange +Input [2]: [ca_county#20, store_sales#23] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(35) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#8] +Right keys [1]: [ca_county#20] +Join type: Inner +Join condition: None + +(36) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#26)] +PushedFilters: [IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 10] +Input [3]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] + +(38) Filter [codegen id : 10] +Input [3]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Condition : isnotnull(ss_addr_sk#24) + +(39) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#27, d_year#28, d_qoy#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [3]: [d_date_sk#27, d_year#28, d_qoy#29] + +(41) Filter [codegen id : 8] +Input [3]: [d_date_sk#27, d_year#28, d_qoy#29] +Condition : ((((isnotnull(d_qoy#29) AND isnotnull(d_year#28)) AND (d_qoy#29 = 3)) AND (d_year#28 = 2000)) AND isnotnull(d_date_sk#27)) + +(42) BroadcastExchange +Input [3]: [d_date_sk#27, d_year#28, d_qoy#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(43) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#26] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 10] +Output [4]: [ss_addr_sk#24, ss_ext_sales_price#25, d_year#28, d_qoy#29] +Input [6]: [ss_addr_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26, d_date_sk#27, d_year#28, d_qoy#29] + +(45) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#30, ca_county#31] + +(46) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_addr_sk#24] +Right keys [1]: [ca_address_sk#30] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 10] +Output [4]: [ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_county#31] +Input [6]: [ss_addr_sk#24, ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_address_sk#30, ca_county#31] + +(48) HashAggregate [codegen id : 10] +Input [4]: [ss_ext_sales_price#25, d_year#28, d_qoy#29, ca_county#31] +Keys [3]: [ca_county#31, d_qoy#29, d_year#28] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#25))] +Aggregate Attributes [1]: [sum#32] +Results [4]: [ca_county#31, d_qoy#29, d_year#28, sum#33] + +(49) Exchange +Input [4]: [ca_county#31, d_qoy#29, d_year#28, sum#33] +Arguments: hashpartitioning(ca_county#31, d_qoy#29, d_year#28, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(50) HashAggregate [codegen id : 11] +Input [4]: [ca_county#31, d_qoy#29, d_year#28, sum#33] +Keys [3]: [ca_county#31, d_qoy#29, d_year#28] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#25))#11] +Results [2]: [ca_county#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#25))#11,17,2) AS store_sales#34] + +(51) BroadcastExchange +Input [2]: [ca_county#31, store_sales#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(52) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#20] +Right keys [1]: [ca_county#31] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 24] +Output [5]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34] +Input [7]: [ca_county#8, d_year#5, store_sales#12, ca_county#20, store_sales#23, ca_county#31, store_sales#34] + +(54) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#37)] +PushedFilters: [IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 14] +Input [3]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] + +(56) Filter [codegen id : 14] +Input [3]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Condition : isnotnull(ws_bill_addr_sk#35) + +(57) ReusedExchange [Reuses operator id: 7] +Output [3]: [d_date_sk#38, d_year#39, d_qoy#40] + +(58) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 14] +Output [4]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, d_year#39, d_qoy#40] +Input [6]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37, d_date_sk#38, d_year#39, d_qoy#40] + +(60) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#41, ca_county#42] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_bill_addr_sk#35] +Right keys [1]: [ca_address_sk#41] +Join type: Inner +Join condition: None + +(62) Project [codegen id : 14] +Output [4]: [ws_ext_sales_price#36, d_year#39, d_qoy#40, ca_county#42] +Input [6]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, d_year#39, d_qoy#40, ca_address_sk#41, ca_county#42] + +(63) HashAggregate [codegen id : 14] +Input [4]: [ws_ext_sales_price#36, d_year#39, d_qoy#40, ca_county#42] +Keys [3]: [ca_county#42, d_qoy#40, d_year#39] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] +Aggregate Attributes [1]: [sum#43] +Results [4]: [ca_county#42, d_qoy#40, d_year#39, sum#44] + +(64) Exchange +Input [4]: [ca_county#42, d_qoy#40, d_year#39, sum#44] +Arguments: hashpartitioning(ca_county#42, d_qoy#40, d_year#39, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(65) HashAggregate [codegen id : 15] +Input [4]: [ca_county#42, d_qoy#40, d_year#39, sum#44] +Keys [3]: [ca_county#42, d_qoy#40, d_year#39] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#45] +Results [2]: [ca_county#42, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#45,17,2) AS web_sales#46] + +(66) BroadcastExchange +Input [2]: [ca_county#42, web_sales#46] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(67) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#8] +Right keys [1]: [ca_county#42] +Join type: Inner +Join condition: None + +(68) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#49)] +PushedFilters: [IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(69) ColumnarToRow [codegen id : 18] +Input [3]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49] + +(70) Filter [codegen id : 18] +Input [3]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49] +Condition : isnotnull(ws_bill_addr_sk#47) + +(71) ReusedExchange [Reuses operator id: 25] +Output [3]: [d_date_sk#50, d_year#51, d_qoy#52] + +(72) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#49] +Right keys [1]: [d_date_sk#50] +Join type: Inner +Join condition: None + +(73) Project [codegen id : 18] +Output [4]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, d_year#51, d_qoy#52] +Input [6]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49, d_date_sk#50, d_year#51, d_qoy#52] + +(74) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#53, ca_county#54] + +(75) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_bill_addr_sk#47] +Right keys [1]: [ca_address_sk#53] +Join type: Inner +Join condition: None + +(76) Project [codegen id : 18] +Output [4]: [ws_ext_sales_price#48, d_year#51, d_qoy#52, ca_county#54] +Input [6]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, d_year#51, d_qoy#52, ca_address_sk#53, ca_county#54] + +(77) HashAggregate [codegen id : 18] +Input [4]: [ws_ext_sales_price#48, d_year#51, d_qoy#52, ca_county#54] +Keys [3]: [ca_county#54, d_qoy#52, d_year#51] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#48))] +Aggregate Attributes [1]: [sum#55] +Results [4]: [ca_county#54, d_qoy#52, d_year#51, sum#56] + +(78) Exchange +Input [4]: [ca_county#54, d_qoy#52, d_year#51, sum#56] +Arguments: hashpartitioning(ca_county#54, d_qoy#52, d_year#51, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(79) HashAggregate [codegen id : 19] +Input [4]: [ca_county#54, d_qoy#52, d_year#51, sum#56] +Keys [3]: [ca_county#54, d_qoy#52, d_year#51] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#48))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#48))#45] +Results [2]: [ca_county#54, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#48))#45,17,2) AS web_sales#57] + +(80) BroadcastExchange +Input [2]: [ca_county#54, web_sales#57] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=13] + +(81) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#42] +Right keys [1]: [ca_county#54] +Join type: Inner +Join condition: (CASE WHEN (web_sales#46 > 0.00) THEN (web_sales#57 / web_sales#46) END > CASE WHEN (store_sales#12 > 0.00) THEN (store_sales#23 / store_sales#12) END) + +(82) Project [codegen id : 24] +Output [8]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34, ca_county#42, web_sales#46, web_sales#57] +Input [9]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34, ca_county#42, web_sales#46, ca_county#54, web_sales#57] + +(83) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#60)] +PushedFilters: [IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(84) ColumnarToRow [codegen id : 22] +Input [3]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60] + +(85) Filter [codegen id : 22] +Input [3]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60] +Condition : isnotnull(ws_bill_addr_sk#58) + +(86) ReusedExchange [Reuses operator id: 42] +Output [3]: [d_date_sk#61, d_year#62, d_qoy#63] + +(87) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#60] +Right keys [1]: [d_date_sk#61] +Join type: Inner +Join condition: None + +(88) Project [codegen id : 22] +Output [4]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, d_year#62, d_qoy#63] +Input [6]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60, d_date_sk#61, d_year#62, d_qoy#63] + +(89) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#64, ca_county#65] + +(90) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_bill_addr_sk#58] +Right keys [1]: [ca_address_sk#64] +Join type: Inner +Join condition: None + +(91) Project [codegen id : 22] +Output [4]: [ws_ext_sales_price#59, d_year#62, d_qoy#63, ca_county#65] +Input [6]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, d_year#62, d_qoy#63, ca_address_sk#64, ca_county#65] + +(92) HashAggregate [codegen id : 22] +Input [4]: [ws_ext_sales_price#59, d_year#62, d_qoy#63, ca_county#65] +Keys [3]: [ca_county#65, d_qoy#63, d_year#62] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#59))] +Aggregate Attributes [1]: [sum#66] +Results [4]: [ca_county#65, d_qoy#63, d_year#62, sum#67] + +(93) Exchange +Input [4]: [ca_county#65, d_qoy#63, d_year#62, sum#67] +Arguments: hashpartitioning(ca_county#65, d_qoy#63, d_year#62, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(94) HashAggregate [codegen id : 23] +Input [4]: [ca_county#65, d_qoy#63, d_year#62, sum#67] +Keys [3]: [ca_county#65, d_qoy#63, d_year#62] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#59))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#59))#45] +Results [2]: [ca_county#65, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#59))#45,17,2) AS web_sales#68] + +(95) BroadcastExchange +Input [2]: [ca_county#65, web_sales#68] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=15] + +(96) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#42] +Right keys [1]: [ca_county#65] +Join type: Inner +Join condition: (CASE WHEN (web_sales#57 > 0.00) THEN (web_sales#68 / web_sales#57) END > CASE WHEN (store_sales#23 > 0.00) THEN (store_sales#34 / store_sales#23) END) + +(97) Project [codegen id : 24] +Output [6]: [ca_county#8, d_year#5, (web_sales#57 / web_sales#46) AS web_q1_q2_increase#69, (store_sales#23 / store_sales#12) AS store_q1_q2_increase#70, (web_sales#68 / web_sales#57) AS web_q2_q3_increase#71, (store_sales#34 / store_sales#23) AS store_q2_q3_increase#72] +Input [10]: [ca_county#8, d_year#5, store_sales#12, store_sales#23, store_sales#34, ca_county#42, web_sales#46, web_sales#57, ca_county#65, web_sales#68] + +(98) Exchange +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#69, store_q1_q2_increase#70, web_q2_q3_increase#71, store_q2_q3_increase#72] +Arguments: rangepartitioning(ca_county#8 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(99) Sort [codegen id : 25] +Input [6]: [ca_county#8, d_year#5, web_q1_q2_increase#69, store_q1_q2_increase#70, web_q2_q3_increase#71, store_q2_q3_increase#72] +Arguments: [ca_county#8 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..5c01b33c93 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.native_iceberg_compat/simplified.txt @@ -0,0 +1,150 @@ +WholeStageCodegen (25) + Sort [ca_county] + InputAdapter + Exchange [ca_county] #1 + WholeStageCodegen (24) + Project [ca_county,d_year,web_sales,web_sales,store_sales,store_sales,web_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales] + Project [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county] + Project [ca_county,d_year,store_sales,store_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county] + BroadcastHashJoin [ca_county,ca_county] + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #2 + WholeStageCodegen (3) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #6 + WholeStageCodegen (6) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (11) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #9 + WholeStageCodegen (10) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (15) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #12 + WholeStageCodegen (14) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] #3 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (19) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #14 + WholeStageCodegen (18) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] #7 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (23) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #16 + WholeStageCodegen (22) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] #10 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_datafusion/explain.txt new file mode 100644 index 0000000000..65288fb634 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_datafusion/explain.txt @@ -0,0 +1,197 @@ +== Physical Plan == +* HashAggregate (33) ++- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.item (4) + : +- BroadcastExchange (25) + : +- * Filter (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * BroadcastHashJoin Inner BuildRight (19) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.catalog_sales (11) + : +- BroadcastExchange (18) + : +- * Project (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet spark_catalog.default.date_dim (14) + +- ReusedExchange (28) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] + +(3) Filter [codegen id : 6] +Input [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Condition : (isnotnull(cs_item_sk#1) AND isnotnull(cs_ext_discount_amt#2)) + +(4) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 977)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [i_item_sk#4] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(8) BroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 6] +Output [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] +Input [4]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] + +(11) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] + +(13) Filter [codegen id : 3] +Input [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Condition : isnotnull(cs_item_sk#6) + +(14) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] + +(16) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-01-27)) AND (d_date#10 <= 2000-04-26)) AND isnotnull(d_date_sk#9)) + +(17) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_date#10] + +(18) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(19) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 3] +Output [2]: [cs_item_sk#6, cs_ext_discount_amt#7] +Input [4]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8, d_date_sk#9] + +(21) HashAggregate [codegen id : 3] +Input [2]: [cs_item_sk#6, cs_ext_discount_amt#7] +Keys [1]: [cs_item_sk#6] +Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#7))] +Aggregate Attributes [2]: [sum#11, count#12] +Results [3]: [cs_item_sk#6, sum#13, count#14] + +(22) Exchange +Input [3]: [cs_item_sk#6, sum#13, count#14] +Arguments: hashpartitioning(cs_item_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(23) HashAggregate [codegen id : 4] +Input [3]: [cs_item_sk#6, sum#13, count#14] +Keys [1]: [cs_item_sk#6] +Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#7))] +Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#7))#15] +Results [2]: [(1.3 * cast((avg(UnscaledValue(cs_ext_discount_amt#7))#15 / 100.0) as decimal(11,6))) AS (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] + +(24) Filter [codegen id : 4] +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] +Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#16) + +(25) BroadcastExchange +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_sk#4] +Right keys [1]: [cs_item_sk#6] +Join type: Inner +Join condition: (cast(cs_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#16) + +(27) Project [codegen id : 6] +Output [2]: [cs_ext_discount_amt#2, cs_sold_date_sk#3] +Input [5]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4, (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] + +(28) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#17] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#3] +Right keys [1]: [d_date_sk#17] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 6] +Output [1]: [cs_ext_discount_amt#2] +Input [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, d_date_sk#17] + +(31) HashAggregate [codegen id : 6] +Input [1]: [cs_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum#18] +Results [1]: [sum#19] + +(32) Exchange +Input [1]: [sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 7] +Input [1]: [sum#19] +Keys: [] +Functions [1]: [sum(UnscaledValue(cs_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_discount_amt#2))#20] +Results [1]: [MakeDecimal(sum(UnscaledValue(cs_ext_discount_amt#2))#20,17,2) AS excess discount amount#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_datafusion/simplified.txt new file mode 100644 index 0000000000..5e7d3a0c11 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_datafusion/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (7) + HashAggregate [sum] [sum(UnscaledValue(cs_ext_discount_amt)),excess discount amount,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [cs_ext_discount_amt] [sum,sum] + Project [cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_discount_amt,cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(1.3 * avg(cs_ext_discount_amt))] + Project [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_ext_discount_amt] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_item_sk] + Filter [i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Filter [(1.3 * avg(cs_ext_discount_amt))] + HashAggregate [cs_item_sk,sum,count] [avg(UnscaledValue(cs_ext_discount_amt)),(1.3 * avg(cs_ext_discount_amt)),sum,count] + InputAdapter + Exchange [cs_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [cs_item_sk,cs_ext_discount_amt] [sum,count,sum,count] + Project [cs_item_sk,cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..65288fb634 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_iceberg_compat/explain.txt @@ -0,0 +1,197 @@ +== Physical Plan == +* HashAggregate (33) ++- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.item (4) + : +- BroadcastExchange (25) + : +- * Filter (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * BroadcastHashJoin Inner BuildRight (19) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.catalog_sales (11) + : +- BroadcastExchange (18) + : +- * Project (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet spark_catalog.default.date_dim (14) + +- ReusedExchange (28) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] + +(3) Filter [codegen id : 6] +Input [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3] +Condition : (isnotnull(cs_item_sk#1) AND isnotnull(cs_ext_discount_amt#2)) + +(4) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 977)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [i_item_sk#4] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(8) BroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 6] +Output [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] +Input [4]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4] + +(11) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] + +(13) Filter [codegen id : 3] +Input [3]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8] +Condition : isnotnull(cs_item_sk#6) + +(14) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] + +(16) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-01-27)) AND (d_date#10 <= 2000-04-26)) AND isnotnull(d_date_sk#9)) + +(17) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_date#10] + +(18) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(19) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 3] +Output [2]: [cs_item_sk#6, cs_ext_discount_amt#7] +Input [4]: [cs_item_sk#6, cs_ext_discount_amt#7, cs_sold_date_sk#8, d_date_sk#9] + +(21) HashAggregate [codegen id : 3] +Input [2]: [cs_item_sk#6, cs_ext_discount_amt#7] +Keys [1]: [cs_item_sk#6] +Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#7))] +Aggregate Attributes [2]: [sum#11, count#12] +Results [3]: [cs_item_sk#6, sum#13, count#14] + +(22) Exchange +Input [3]: [cs_item_sk#6, sum#13, count#14] +Arguments: hashpartitioning(cs_item_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(23) HashAggregate [codegen id : 4] +Input [3]: [cs_item_sk#6, sum#13, count#14] +Keys [1]: [cs_item_sk#6] +Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#7))] +Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#7))#15] +Results [2]: [(1.3 * cast((avg(UnscaledValue(cs_ext_discount_amt#7))#15 / 100.0) as decimal(11,6))) AS (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] + +(24) Filter [codegen id : 4] +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] +Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#16) + +(25) BroadcastExchange +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_sk#4] +Right keys [1]: [cs_item_sk#6] +Join type: Inner +Join condition: (cast(cs_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#16) + +(27) Project [codegen id : 6] +Output [2]: [cs_ext_discount_amt#2, cs_sold_date_sk#3] +Input [5]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#4, (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#6] + +(28) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#17] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#3] +Right keys [1]: [d_date_sk#17] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 6] +Output [1]: [cs_ext_discount_amt#2] +Input [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, d_date_sk#17] + +(31) HashAggregate [codegen id : 6] +Input [1]: [cs_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum#18] +Results [1]: [sum#19] + +(32) Exchange +Input [1]: [sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 7] +Input [1]: [sum#19] +Keys: [] +Functions [1]: [sum(UnscaledValue(cs_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_discount_amt#2))#20] +Results [1]: [MakeDecimal(sum(UnscaledValue(cs_ext_discount_amt#2))#20,17,2) AS excess discount amount#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..5e7d3a0c11 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.native_iceberg_compat/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (7) + HashAggregate [sum] [sum(UnscaledValue(cs_ext_discount_amt)),excess discount amount,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [cs_ext_discount_amt] [sum,sum] + Project [cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_discount_amt,cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(1.3 * avg(cs_ext_discount_amt))] + Project [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_ext_discount_amt] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_item_sk] + Filter [i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Filter [(1.3 * avg(cs_ext_discount_amt))] + HashAggregate [cs_item_sk,sum,count] [avg(UnscaledValue(cs_ext_discount_amt)),(1.3 * avg(cs_ext_discount_amt)),sum,count] + InputAdapter + Exchange [cs_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [cs_item_sk,cs_ext_discount_amt] [sum,count,sum,count] + Project [cs_item_sk,cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_datafusion/explain.txt new file mode 100644 index 0000000000..7b096d51d4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_datafusion/explain.txt @@ -0,0 +1,391 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet spark_catalog.default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet spark_catalog.default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet spark_catalog.default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 5)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#8] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#8] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] + +(18) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#10, i_manufact_id#11] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#10, i_manufact_id#11] +Condition : isnotnull(i_item_sk#10) + +(21) Scan parquet spark_catalog.default.item +Output [2]: [i_category#12, i_manufact_id#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics )] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_category#12, i_manufact_id#13] + +(23) Filter [codegen id : 3] +Input [2]: [i_category#12, i_manufact_id#13] +Condition : (isnotnull(i_category#12) AND (i_category#12 = Electronics )) + +(24) Project [codegen id : 3] +Output [1]: [i_manufact_id#13] +Input [2]: [i_category#12, i_manufact_id#13] + +(25) BroadcastExchange +Input [1]: [i_manufact_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_manufact_id#11] +Right keys [1]: [i_manufact_id#13] +Join type: LeftSemi +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#10, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#10] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#3, i_manufact_id#11] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_manufact_id#11] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#3, i_manufact_id#11] +Keys [1]: [i_manufact_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [2]: [i_manufact_id#11, sum#15] + +(31) Exchange +Input [2]: [i_manufact_id#11, sum#15] +Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_manufact_id#11, sum#15] +Keys [1]: [i_manufact_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [2]: [i_manufact_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS total_sales#17] + +(33) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#21)] +PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] + +(35) Filter [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Condition : (isnotnull(cs_bill_addr_sk#18) AND isnotnull(cs_item_sk#19)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#22] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Input [5]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21, d_date_sk#22] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#23] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#18] +Right keys [1]: [ca_address_sk#23] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#19, cs_ext_sales_price#20] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, ca_address_sk#23] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#24, i_manufact_id#25] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#24] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#20, i_manufact_id#25] +Input [4]: [cs_item_sk#19, cs_ext_sales_price#20, i_item_sk#24, i_manufact_id#25] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#20, i_manufact_id#25] +Keys [1]: [i_manufact_id#25] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum#26] +Results [2]: [i_manufact_id#25, sum#27] + +(46) Exchange +Input [2]: [i_manufact_id#25, sum#27] +Arguments: hashpartitioning(i_manufact_id#25, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_manufact_id#25, sum#27] +Keys [1]: [i_manufact_id#25] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#20))#28] +Results [2]: [i_manufact_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#20))#28,17,2) AS total_sales#29] + +(48) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#33)] +PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] + +(50) Filter [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Condition : (isnotnull(ws_bill_addr_sk#31) AND isnotnull(ws_item_sk#30)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#34] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#33] +Right keys [1]: [d_date_sk#34] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32] +Input [5]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33, d_date_sk#34] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#35] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#31] +Right keys [1]: [ca_address_sk#35] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#30, ws_ext_sales_price#32] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ca_address_sk#35] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#36, i_manufact_id#37] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#30] +Right keys [1]: [i_item_sk#36] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#32, i_manufact_id#37] +Input [4]: [ws_item_sk#30, ws_ext_sales_price#32, i_item_sk#36, i_manufact_id#37] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#32, i_manufact_id#37] +Keys [1]: [i_manufact_id#37] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum#38] +Results [2]: [i_manufact_id#37, sum#39] + +(61) Exchange +Input [2]: [i_manufact_id#37, sum#39] +Arguments: hashpartitioning(i_manufact_id#37, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_manufact_id#37, sum#39] +Keys [1]: [i_manufact_id#37] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#32))#40] +Results [2]: [i_manufact_id#37, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#32))#40,17,2) AS total_sales#41] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_manufact_id#11, total_sales#17] +Keys [1]: [i_manufact_id#11] +Functions [1]: [partial_sum(total_sales#17)] +Aggregate Attributes [2]: [sum#42, isEmpty#43] +Results [3]: [i_manufact_id#11, sum#44, isEmpty#45] + +(65) Exchange +Input [3]: [i_manufact_id#11, sum#44, isEmpty#45] +Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_manufact_id#11, sum#44, isEmpty#45] +Keys [1]: [i_manufact_id#11] +Functions [1]: [sum(total_sales#17)] +Aggregate Attributes [1]: [sum(total_sales#17)#46] +Results [2]: [i_manufact_id#11, sum(total_sales#17)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_manufact_id#11, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_manufact_id#11, total_sales#47] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_datafusion/simplified.txt new file mode 100644 index 0000000000..49d9592d2d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_datafusion/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [total_sales,i_manufact_id] + WholeStageCodegen (20) + HashAggregate [i_manufact_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (19) + HashAggregate [i_manufact_id,total_sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #2 + WholeStageCodegen (5) + HashAggregate [i_manufact_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_manufact_id,i_manufact_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_manufact_id] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_category,i_manufact_id] + WholeStageCodegen (12) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #7 + WholeStageCodegen (11) + HashAggregate [i_manufact_id,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] #5 + WholeStageCodegen (18) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #8 + WholeStageCodegen (17) + HashAggregate [i_manufact_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..7b096d51d4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_iceberg_compat/explain.txt @@ -0,0 +1,391 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet spark_catalog.default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet spark_catalog.default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet spark_catalog.default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 5)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#8] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#8] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] + +(18) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#10, i_manufact_id#11] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#10, i_manufact_id#11] +Condition : isnotnull(i_item_sk#10) + +(21) Scan parquet spark_catalog.default.item +Output [2]: [i_category#12, i_manufact_id#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics )] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_category#12, i_manufact_id#13] + +(23) Filter [codegen id : 3] +Input [2]: [i_category#12, i_manufact_id#13] +Condition : (isnotnull(i_category#12) AND (i_category#12 = Electronics )) + +(24) Project [codegen id : 3] +Output [1]: [i_manufact_id#13] +Input [2]: [i_category#12, i_manufact_id#13] + +(25) BroadcastExchange +Input [1]: [i_manufact_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_manufact_id#11] +Right keys [1]: [i_manufact_id#13] +Join type: LeftSemi +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#10, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#10] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#3, i_manufact_id#11] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_manufact_id#11] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#3, i_manufact_id#11] +Keys [1]: [i_manufact_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [2]: [i_manufact_id#11, sum#15] + +(31) Exchange +Input [2]: [i_manufact_id#11, sum#15] +Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_manufact_id#11, sum#15] +Keys [1]: [i_manufact_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [2]: [i_manufact_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS total_sales#17] + +(33) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#21)] +PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] + +(35) Filter [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Condition : (isnotnull(cs_bill_addr_sk#18) AND isnotnull(cs_item_sk#19)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#22] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Input [5]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21, d_date_sk#22] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#23] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#18] +Right keys [1]: [ca_address_sk#23] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#19, cs_ext_sales_price#20] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, ca_address_sk#23] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#24, i_manufact_id#25] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#24] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#20, i_manufact_id#25] +Input [4]: [cs_item_sk#19, cs_ext_sales_price#20, i_item_sk#24, i_manufact_id#25] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#20, i_manufact_id#25] +Keys [1]: [i_manufact_id#25] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum#26] +Results [2]: [i_manufact_id#25, sum#27] + +(46) Exchange +Input [2]: [i_manufact_id#25, sum#27] +Arguments: hashpartitioning(i_manufact_id#25, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_manufact_id#25, sum#27] +Keys [1]: [i_manufact_id#25] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#20))#28] +Results [2]: [i_manufact_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#20))#28,17,2) AS total_sales#29] + +(48) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#33)] +PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] + +(50) Filter [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Condition : (isnotnull(ws_bill_addr_sk#31) AND isnotnull(ws_item_sk#30)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#34] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#33] +Right keys [1]: [d_date_sk#34] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32] +Input [5]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33, d_date_sk#34] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#35] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#31] +Right keys [1]: [ca_address_sk#35] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#30, ws_ext_sales_price#32] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ca_address_sk#35] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#36, i_manufact_id#37] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#30] +Right keys [1]: [i_item_sk#36] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#32, i_manufact_id#37] +Input [4]: [ws_item_sk#30, ws_ext_sales_price#32, i_item_sk#36, i_manufact_id#37] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#32, i_manufact_id#37] +Keys [1]: [i_manufact_id#37] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum#38] +Results [2]: [i_manufact_id#37, sum#39] + +(61) Exchange +Input [2]: [i_manufact_id#37, sum#39] +Arguments: hashpartitioning(i_manufact_id#37, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_manufact_id#37, sum#39] +Keys [1]: [i_manufact_id#37] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#32))#40] +Results [2]: [i_manufact_id#37, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#32))#40,17,2) AS total_sales#41] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_manufact_id#11, total_sales#17] +Keys [1]: [i_manufact_id#11] +Functions [1]: [partial_sum(total_sales#17)] +Aggregate Attributes [2]: [sum#42, isEmpty#43] +Results [3]: [i_manufact_id#11, sum#44, isEmpty#45] + +(65) Exchange +Input [3]: [i_manufact_id#11, sum#44, isEmpty#45] +Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_manufact_id#11, sum#44, isEmpty#45] +Keys [1]: [i_manufact_id#11] +Functions [1]: [sum(total_sales#17)] +Aggregate Attributes [1]: [sum(total_sales#17)#46] +Results [2]: [i_manufact_id#11, sum(total_sales#17)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_manufact_id#11, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_manufact_id#11, total_sales#47] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..49d9592d2d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.native_iceberg_compat/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [total_sales,i_manufact_id] + WholeStageCodegen (20) + HashAggregate [i_manufact_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (19) + HashAggregate [i_manufact_id,total_sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #2 + WholeStageCodegen (5) + HashAggregate [i_manufact_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_manufact_id,i_manufact_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_manufact_id] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_category,i_manufact_id] + WholeStageCodegen (12) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #7 + WholeStageCodegen (11) + HashAggregate [i_manufact_id,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] #5 + WholeStageCodegen (18) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #8 + WholeStageCodegen (17) + HashAggregate [i_manufact_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_datafusion/explain.txt new file mode 100644 index 0000000000..03e931787b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_datafusion/explain.txt @@ -0,0 +1,208 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet spark_catalog.default.customer (29) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#9, s_county#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] +Condition : ((isnotnull(s_county#10) AND (s_county#10 = Williamson County)) AND isnotnull(s_store_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#9] +Input [2]: [s_store_sk#9, s_county#10] + +(15) BroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.2) END) AND isnotnull(hd_demo_sk#11)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#11] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#1, ss_ticket_number#4] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] + +(26) Exchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#17 AS cnt#18] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18] +Condition : ((cnt#18 >= 15) AND (cnt#18 <= 20)) + +(29) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Condition : isnotnull(c_customer_sk#19) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#19] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18, c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(35) Exchange +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: rangepartitioning(c_last_name#22 ASC NULLS FIRST, c_first_name#21 ASC NULLS FIRST, c_salutation#20 ASC NULLS FIRST, c_preferred_cust_flag#23 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: [c_last_name#22 ASC NULLS FIRST, c_first_name#21 ASC NULLS FIRST, c_salutation#20 ASC NULLS FIRST, c_preferred_cust_flag#23 DESC NULLS LAST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_datafusion/simplified.txt new file mode 100644 index 0000000000..048a2e5a32 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_datafusion/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] + InputAdapter + Exchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] #1 + WholeStageCodegen (6) + Project [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [cnt] + HashAggregate [ss_ticket_number,ss_customer_sk,count] [count(1),cnt,count] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk] #2 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..03e931787b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_iceberg_compat/explain.txt @@ -0,0 +1,208 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet spark_catalog.default.customer (29) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#9, s_county#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] +Condition : ((isnotnull(s_county#10) AND (s_county#10 = Williamson County)) AND isnotnull(s_store_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#9] +Input [2]: [s_store_sk#9, s_county#10] + +(15) BroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.2) END) AND isnotnull(hd_demo_sk#11)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#11] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#1, ss_ticket_number#4] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] + +(26) Exchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#17 AS cnt#18] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18] +Condition : ((cnt#18 >= 15) AND (cnt#18 <= 20)) + +(29) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Condition : isnotnull(c_customer_sk#19) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#19] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18, c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(35) Exchange +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: rangepartitioning(c_last_name#22 ASC NULLS FIRST, c_first_name#21 ASC NULLS FIRST, c_salutation#20 ASC NULLS FIRST, c_preferred_cust_flag#23 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: [c_last_name#22 ASC NULLS FIRST, c_first_name#21 ASC NULLS FIRST, c_salutation#20 ASC NULLS FIRST, c_preferred_cust_flag#23 DESC NULLS LAST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..048a2e5a32 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.native_iceberg_compat/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] + InputAdapter + Exchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] #1 + WholeStageCodegen (6) + Project [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [cnt] + HashAggregate [ss_ticket_number,ss_customer_sk,count] [count(1),cnt,count] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk] #2 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_datafusion/explain.txt new file mode 100644 index 0000000000..6140e1584c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_datafusion/explain.txt @@ -0,0 +1,267 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (30) + : : +- * Filter (29) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (28) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (21) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : +- BroadcastExchange (10) + : : : : +- * Project (9) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : : +- BroadcastExchange (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * ColumnarToRow (23) + : : : +- Scan parquet spark_catalog.default.catalog_sales (22) + : : +- ReusedExchange (24) + : +- BroadcastExchange (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet spark_catalog.default.customer_address (31) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.customer_demographics (37) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(10) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#6] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#12)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#13] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#11] +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] + +(20) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(22) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] + +(24) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#16] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#14] +Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] + +(27) BroadcastExchange +Input [1]: [cs_ship_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(29) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(30) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(31) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] + +(33) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) + +(34) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, ca_state#18] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#17, ca_state#18] + +(37) Scan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(39) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Condition : isnotnull(cd_demo_sk#19) + +(40) BroadcastExchange +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 9] +Output [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Input [8]: [c_current_cdemo_sk#4, ca_state#18, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(43) HashAggregate [codegen id : 9] +Input [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [partial_count(1), partial_min(cd_dep_count#22), partial_max(cd_dep_count#22), partial_avg(cd_dep_count#22), partial_min(cd_dep_employed_count#23), partial_max(cd_dep_employed_count#23), partial_avg(cd_dep_employed_count#23), partial_min(cd_dep_college_count#24), partial_max(cd_dep_college_count#24), partial_avg(cd_dep_college_count#24)] +Aggregate Attributes [13]: [count#25, min#26, max#27, sum#28, count#29, min#30, max#31, sum#32, count#33, min#34, max#35, sum#36, count#37] +Results [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, min#39, max#40, sum#41, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50] + +(44) Exchange +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, min#39, max#40, sum#41, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50] +Arguments: hashpartitioning(ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 10] +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, min#39, max#40, sum#41, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [count(1), min(cd_dep_count#22), max(cd_dep_count#22), avg(cd_dep_count#22), min(cd_dep_employed_count#23), max(cd_dep_employed_count#23), avg(cd_dep_employed_count#23), min(cd_dep_college_count#24), max(cd_dep_college_count#24), avg(cd_dep_college_count#24)] +Aggregate Attributes [10]: [count(1)#51, min(cd_dep_count#22)#52, max(cd_dep_count#22)#53, avg(cd_dep_count#22)#54, min(cd_dep_employed_count#23)#55, max(cd_dep_employed_count#23)#56, avg(cd_dep_employed_count#23)#57, min(cd_dep_college_count#24)#58, max(cd_dep_college_count#24)#59, avg(cd_dep_college_count#24)#60] +Results [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, count(1)#51 AS cnt1#61, min(cd_dep_count#22)#52 AS min(cd_dep_count)#62, max(cd_dep_count#22)#53 AS max(cd_dep_count)#63, avg(cd_dep_count#22)#54 AS avg(cd_dep_count)#64, cd_dep_employed_count#23, count(1)#51 AS cnt2#65, min(cd_dep_employed_count#23)#55 AS min(cd_dep_employed_count)#66, max(cd_dep_employed_count#23)#56 AS max(cd_dep_employed_count)#67, avg(cd_dep_employed_count#23)#57 AS avg(cd_dep_employed_count)#68, cd_dep_college_count#24, count(1)#51 AS cnt3#69, min(cd_dep_college_count#24)#58 AS min(cd_dep_college_count)#70, max(cd_dep_college_count#24)#59 AS max(cd_dep_college_count)#71, avg(cd_dep_college_count#24)#60 AS avg(cd_dep_college_count)#72, cd_dep_count#22] + +(46) TakeOrderedAndProject +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cnt1#61, min(cd_dep_count)#62, max(cd_dep_count)#63, avg(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, min(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, avg(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, min(cd_dep_college_count)#70, max(cd_dep_college_count)#71, avg(cd_dep_college_count)#72, cd_dep_count#22] +Arguments: 100, [ca_state#18 ASC NULLS FIRST, cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_dep_count#22 ASC NULLS FIRST, cd_dep_employed_count#23 ASC NULLS FIRST, cd_dep_college_count#24 ASC NULLS FIRST], [ca_state#18, cd_gender#20, cd_marital_status#21, cnt1#61, min(cd_dep_count)#62, max(cd_dep_count)#63, avg(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, min(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, avg(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, min(cd_dep_college_count)#70, max(cd_dep_college_count)#71, avg(cd_dep_college_count)#72] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c00f3f58f0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_datafusion/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count)] + WholeStageCodegen (10) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] [count(1),min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,min,max,sum,count,min,max,sum,count,min,max,sum,count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_qoy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..6140e1584c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_iceberg_compat/explain.txt @@ -0,0 +1,267 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (30) + : : +- * Filter (29) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (28) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (21) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : +- BroadcastExchange (10) + : : : : +- * Project (9) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : : +- BroadcastExchange (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * ColumnarToRow (23) + : : : +- Scan parquet spark_catalog.default.catalog_sales (22) + : : +- ReusedExchange (24) + : +- BroadcastExchange (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet spark_catalog.default.customer_address (31) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.customer_demographics (37) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(10) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#6] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#12)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#13] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#11] +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] + +(20) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(22) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] + +(24) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#16] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#14] +Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] + +(27) BroadcastExchange +Input [1]: [cs_ship_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(29) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(30) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(31) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] + +(33) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) + +(34) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, ca_state#18] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#17, ca_state#18] + +(37) Scan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(39) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Condition : isnotnull(cd_demo_sk#19) + +(40) BroadcastExchange +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 9] +Output [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Input [8]: [c_current_cdemo_sk#4, ca_state#18, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(43) HashAggregate [codegen id : 9] +Input [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [partial_count(1), partial_min(cd_dep_count#22), partial_max(cd_dep_count#22), partial_avg(cd_dep_count#22), partial_min(cd_dep_employed_count#23), partial_max(cd_dep_employed_count#23), partial_avg(cd_dep_employed_count#23), partial_min(cd_dep_college_count#24), partial_max(cd_dep_college_count#24), partial_avg(cd_dep_college_count#24)] +Aggregate Attributes [13]: [count#25, min#26, max#27, sum#28, count#29, min#30, max#31, sum#32, count#33, min#34, max#35, sum#36, count#37] +Results [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, min#39, max#40, sum#41, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50] + +(44) Exchange +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, min#39, max#40, sum#41, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50] +Arguments: hashpartitioning(ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 10] +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, min#39, max#40, sum#41, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [count(1), min(cd_dep_count#22), max(cd_dep_count#22), avg(cd_dep_count#22), min(cd_dep_employed_count#23), max(cd_dep_employed_count#23), avg(cd_dep_employed_count#23), min(cd_dep_college_count#24), max(cd_dep_college_count#24), avg(cd_dep_college_count#24)] +Aggregate Attributes [10]: [count(1)#51, min(cd_dep_count#22)#52, max(cd_dep_count#22)#53, avg(cd_dep_count#22)#54, min(cd_dep_employed_count#23)#55, max(cd_dep_employed_count#23)#56, avg(cd_dep_employed_count#23)#57, min(cd_dep_college_count#24)#58, max(cd_dep_college_count#24)#59, avg(cd_dep_college_count#24)#60] +Results [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, count(1)#51 AS cnt1#61, min(cd_dep_count#22)#52 AS min(cd_dep_count)#62, max(cd_dep_count#22)#53 AS max(cd_dep_count)#63, avg(cd_dep_count#22)#54 AS avg(cd_dep_count)#64, cd_dep_employed_count#23, count(1)#51 AS cnt2#65, min(cd_dep_employed_count#23)#55 AS min(cd_dep_employed_count)#66, max(cd_dep_employed_count#23)#56 AS max(cd_dep_employed_count)#67, avg(cd_dep_employed_count#23)#57 AS avg(cd_dep_employed_count)#68, cd_dep_college_count#24, count(1)#51 AS cnt3#69, min(cd_dep_college_count#24)#58 AS min(cd_dep_college_count)#70, max(cd_dep_college_count#24)#59 AS max(cd_dep_college_count)#71, avg(cd_dep_college_count#24)#60 AS avg(cd_dep_college_count)#72, cd_dep_count#22] + +(46) TakeOrderedAndProject +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cnt1#61, min(cd_dep_count)#62, max(cd_dep_count)#63, avg(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, min(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, avg(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, min(cd_dep_college_count)#70, max(cd_dep_college_count)#71, avg(cd_dep_college_count)#72, cd_dep_count#22] +Arguments: 100, [ca_state#18 ASC NULLS FIRST, cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_dep_count#22 ASC NULLS FIRST, cd_dep_employed_count#23 ASC NULLS FIRST, cd_dep_college_count#24 ASC NULLS FIRST], [ca_state#18, cd_gender#20, cd_marital_status#21, cnt1#61, min(cd_dep_count)#62, max(cd_dep_count)#63, avg(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, min(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, avg(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, min(cd_dep_college_count)#70, max(cd_dep_college_count)#71, avg(cd_dep_college_count)#72] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c00f3f58f0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.native_iceberg_compat/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count)] + WholeStageCodegen (10) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] [count(1),min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,min,max,sum,count,min,max,sum,count,min,max,sum,count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_qoy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_datafusion/explain.txt new file mode 100644 index 0000000000..806e421651 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_datafusion/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- Window (30) + +- * Sort (29) + +- Exchange (28) + +- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Expand (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.item (11) + +- BroadcastExchange (21) + +- * Project (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.store (17) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_year#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#8, i_class#9, i_category#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#8, i_class#9, i_category#10] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Condition : isnotnull(i_item_sk#8) + +(14) BroadcastExchange +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#8] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_state#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#11, s_state#12] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) + +(20) Project [codegen id : 3] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_state#12] + +(21) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 4] +Output [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] + +(24) Expand [codegen id : 4] +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] +Arguments: [[ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9, 0], [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, null, 1], [ss_ext_sales_price#3, ss_net_profit#4, null, null, 3]], [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] + +(25) HashAggregate [codegen id : 4] +Input [5]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum#16, sum#17] +Results [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] + +(26) Exchange +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] +Arguments: hashpartitioning(i_category#13, i_class#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 5] +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21] +Results [7]: [(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2)) AS gross_margin#22, i_category#13, i_class#14, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS lochierarchy#23, (MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2)) AS _w0#24, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS _w1#25, CASE WHEN (cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint) = 0) THEN i_category#13 END AS _w2#26] + +(28) Exchange +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26] +Arguments: hashpartitioning(_w1#25, _w2#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) Sort [codegen id : 6] +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26] +Arguments: [_w1#25 ASC NULLS FIRST, _w2#26 ASC NULLS FIRST, _w0#24 ASC NULLS FIRST], false, 0 + +(30) Window +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26] +Arguments: [rank(_w0#24) windowspecdefinition(_w1#25, _w2#26, _w0#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#27], [_w1#25, _w2#26], [_w0#24 ASC NULLS FIRST] + +(31) Project [codegen id : 7] +Output [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] +Input [8]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26, rank_within_parent#27] + +(32) TakeOrderedAndProject +Input [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] +Arguments: 100, [lochierarchy#23 DESC NULLS LAST, CASE WHEN (lochierarchy#23 = 0) THEN i_category#13 END ASC NULLS FIRST, rank_within_parent#27 ASC NULLS FIRST], [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_datafusion/simplified.txt new file mode 100644 index 0000000000..61d30a3e81 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_datafusion/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] + WholeStageCodegen (7) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (6) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,lochierarchy,_w0,_w1,_w2,sum,sum] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,spark_grouping_id,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] + Expand [ss_ext_sales_price,ss_net_profit,i_category,i_class] + Project [ss_ext_sales_price,ss_net_profit,i_category,i_class] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..806e421651 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_iceberg_compat/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- Window (30) + +- * Sort (29) + +- Exchange (28) + +- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Expand (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.item (11) + +- BroadcastExchange (21) + +- * Project (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.store (17) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_year#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#8, i_class#9, i_category#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#8, i_class#9, i_category#10] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Condition : isnotnull(i_item_sk#8) + +(14) BroadcastExchange +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#8] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_state#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#11, s_state#12] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) + +(20) Project [codegen id : 3] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_state#12] + +(21) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 4] +Output [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] + +(24) Expand [codegen id : 4] +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] +Arguments: [[ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9, 0], [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, null, 1], [ss_ext_sales_price#3, ss_net_profit#4, null, null, 3]], [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] + +(25) HashAggregate [codegen id : 4] +Input [5]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum#16, sum#17] +Results [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] + +(26) Exchange +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] +Arguments: hashpartitioning(i_category#13, i_class#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 5] +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21] +Results [7]: [(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2)) AS gross_margin#22, i_category#13, i_class#14, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS lochierarchy#23, (MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2)) AS _w0#24, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS _w1#25, CASE WHEN (cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint) = 0) THEN i_category#13 END AS _w2#26] + +(28) Exchange +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26] +Arguments: hashpartitioning(_w1#25, _w2#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) Sort [codegen id : 6] +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26] +Arguments: [_w1#25 ASC NULLS FIRST, _w2#26 ASC NULLS FIRST, _w0#24 ASC NULLS FIRST], false, 0 + +(30) Window +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26] +Arguments: [rank(_w0#24) windowspecdefinition(_w1#25, _w2#26, _w0#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#27], [_w1#25, _w2#26], [_w0#24 ASC NULLS FIRST] + +(31) Project [codegen id : 7] +Output [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] +Input [8]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26, rank_within_parent#27] + +(32) TakeOrderedAndProject +Input [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] +Arguments: 100, [lochierarchy#23 DESC NULLS LAST, CASE WHEN (lochierarchy#23 = 0) THEN i_category#13 END ASC NULLS FIRST, rank_within_parent#27 ASC NULLS FIRST], [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..61d30a3e81 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.native_iceberg_compat/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] + WholeStageCodegen (7) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (6) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,lochierarchy,_w0,_w1,_w2,sum,sum] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,spark_grouping_id,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] + Expand [ss_ext_sales_price,ss_net_profit,i_category,i_class] + Project [ss_ext_sales_price,ss_net_profit,i_category,i_class] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_datafusion/explain.txt new file mode 100644 index 0000000000..326f4cb338 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_datafusion/explain.txt @@ -0,0 +1,169 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildLeft (24) + :- BroadcastExchange (19) + : +- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.item (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet spark_catalog.default.inventory (5) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet spark_catalog.default.date_dim (12) + +- * Project (23) + +- * Filter (22) + +- * ColumnarToRow (21) + +- Scan parquet spark_catalog.default.catalog_sales (20) + + +(1) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), LessThanOrEqual(i_current_price,98.00), In(i_manufact_id, [677,694,808,940]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(3) Filter [codegen id : 3] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 68.00)) AND (i_current_price#4 <= 98.00)) AND i_manufact_id#5 IN (677,940,694,808)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 3] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(5) Scan parquet spark_catalog.default.inventory +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#8)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(7) Filter [codegen id : 1] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) + +(8) Project [codegen id : 1] +Output [2]: [inv_item_sk#6, inv_date_sk#8] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(9) BroadcastExchange +Input [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(10) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [inv_item_sk#6] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 3] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] + +(12) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] + +(14) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-02-01)) AND (d_date#10 <= 2000-04-01)) AND isnotnull(d_date_sk#9)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_date#10] + +(16) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 3] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#9] + +(19) BroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(20) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(21) ColumnarToRow +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] + +(22) Filter +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Condition : isnotnull(cs_item_sk#11) + +(23) Project +Output [1]: [cs_item_sk#11] +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#11] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#11] + +(26) HashAggregate [codegen id : 4] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(27) Exchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(28) HashAggregate [codegen id : 5] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(29) TakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2d05feb776 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_datafusion/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + WholeStageCodegen (5) + HashAggregate [i_item_id,i_item_desc,i_current_price] + InputAdapter + Exchange [i_item_id,i_item_desc,i_current_price] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_current_price] + Project [i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [i_item_sk,cs_item_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (3) + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + Filter [i_current_price,i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [inv_item_sk,inv_date_sk] + Filter [inv_quantity_on_hand,inv_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + Project [cs_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..326f4cb338 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_iceberg_compat/explain.txt @@ -0,0 +1,169 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildLeft (24) + :- BroadcastExchange (19) + : +- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.item (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet spark_catalog.default.inventory (5) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet spark_catalog.default.date_dim (12) + +- * Project (23) + +- * Filter (22) + +- * ColumnarToRow (21) + +- Scan parquet spark_catalog.default.catalog_sales (20) + + +(1) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), LessThanOrEqual(i_current_price,98.00), In(i_manufact_id, [677,694,808,940]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(3) Filter [codegen id : 3] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 68.00)) AND (i_current_price#4 <= 98.00)) AND i_manufact_id#5 IN (677,940,694,808)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 3] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(5) Scan parquet spark_catalog.default.inventory +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#8)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(7) Filter [codegen id : 1] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) + +(8) Project [codegen id : 1] +Output [2]: [inv_item_sk#6, inv_date_sk#8] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(9) BroadcastExchange +Input [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(10) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [inv_item_sk#6] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 3] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] + +(12) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] + +(14) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-02-01)) AND (d_date#10 <= 2000-04-01)) AND isnotnull(d_date_sk#9)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_date#10] + +(16) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 3] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#9] + +(19) BroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(20) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(21) ColumnarToRow +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] + +(22) Filter +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Condition : isnotnull(cs_item_sk#11) + +(23) Project +Output [1]: [cs_item_sk#11] +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#11] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#11] + +(26) HashAggregate [codegen id : 4] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(27) Exchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(28) HashAggregate [codegen id : 5] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(29) TakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..2d05feb776 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.native_iceberg_compat/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + WholeStageCodegen (5) + HashAggregate [i_item_id,i_item_desc,i_current_price] + InputAdapter + Exchange [i_item_id,i_item_desc,i_current_price] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_current_price] + Project [i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [i_item_sk,cs_item_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (3) + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + Filter [i_current_price,i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [inv_item_sk,inv_date_sk] + Filter [inv_quantity_on_hand,inv_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + Project [cs_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_datafusion/explain.txt new file mode 100644 index 0000000000..d5e6da17aa --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_datafusion/explain.txt @@ -0,0 +1,307 @@ +== Physical Plan == +* HashAggregate (51) ++- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin LeftSemi BuildRight (47) + :- * BroadcastHashJoin LeftSemi BuildRight (33) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.customer (11) + : +- BroadcastExchange (32) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (23) + : +- ReusedExchange (26) + +- BroadcastExchange (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet spark_catalog.default.web_sales (34) + : +- ReusedExchange (37) + +- ReusedExchange (40) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#2)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] + +(3) Filter [codegen id : 3] +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Condition : isnotnull(ss_customer_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#3, d_date#4] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(8) BroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#2] +Right keys [1]: [d_date_sk#3] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_customer_sk#1, d_date#4] +Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#3, d_date#4] + +(11) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(13) Filter [codegen id : 2] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) + +(14) BroadcastExchange +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#6] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [c_last_name#8, c_first_name#7, d_date#4] +Input [5]: [ss_customer_sk#1, d_date#4, c_customer_sk#6, c_first_name#7, c_last_name#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(18) Exchange +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#4, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(20) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#10)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] + +(22) Filter [codegen id : 6] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_bill_customer_sk#9) + +(23) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#11, d_date#12] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [2]: [cs_bill_customer_sk#9, d_date#12] +Input [4]: [cs_bill_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11, d_date#12] + +(26) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_bill_customer_sk#9] +Right keys [1]: [c_customer_sk#13] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [c_last_name#15, c_first_name#14, d_date#12] +Input [5]: [cs_bill_customer_sk#9, d_date#12, c_customer_sk#13, c_first_name#14, c_last_name#15] + +(29) HashAggregate [codegen id : 6] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] + +(30) Exchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, d_date#12, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 7] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] + +(32) BroadcastExchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 12] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)] +Join type: LeftSemi +Join condition: None + +(34) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#17)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 10] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] + +(36) Filter [codegen id : 10] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Condition : isnotnull(ws_bill_customer_sk#16) + +(37) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#18, d_date#19] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#17] +Right keys [1]: [d_date_sk#18] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [2]: [ws_bill_customer_sk#16, d_date#19] +Input [4]: [ws_bill_customer_sk#16, ws_sold_date_sk#17, d_date_sk#18, d_date#19] + +(40) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_bill_customer_sk#16] +Right keys [1]: [c_customer_sk#20] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [3]: [c_last_name#22, c_first_name#21, d_date#19] +Input [5]: [ws_bill_customer_sk#16, d_date#19, c_customer_sk#20, c_first_name#21, c_last_name#22] + +(43) HashAggregate [codegen id : 10] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] + +(44) Exchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(c_last_name#22, c_first_name#21, d_date#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(45) HashAggregate [codegen id : 11] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] + +(46) BroadcastExchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 12] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)] +Join type: LeftSemi +Join condition: None + +(48) Project [codegen id : 12] +Output: [] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(49) HashAggregate [codegen id : 12] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] + +(50) Exchange +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 13] +Input [1]: [count#24] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS count(1)#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9122ac943b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_datafusion/simplified.txt @@ -0,0 +1,77 @@ +WholeStageCodegen (13) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (12) + HashAggregate [count,count] + Project + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen (6) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d5e6da17aa --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_iceberg_compat/explain.txt @@ -0,0 +1,307 @@ +== Physical Plan == +* HashAggregate (51) ++- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin LeftSemi BuildRight (47) + :- * BroadcastHashJoin LeftSemi BuildRight (33) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.customer (11) + : +- BroadcastExchange (32) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (23) + : +- ReusedExchange (26) + +- BroadcastExchange (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet spark_catalog.default.web_sales (34) + : +- ReusedExchange (37) + +- ReusedExchange (40) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#2)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] + +(3) Filter [codegen id : 3] +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Condition : isnotnull(ss_customer_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#3, d_date#4] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(8) BroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#2] +Right keys [1]: [d_date_sk#3] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_customer_sk#1, d_date#4] +Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#3, d_date#4] + +(11) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(13) Filter [codegen id : 2] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) + +(14) BroadcastExchange +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#6] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [c_last_name#8, c_first_name#7, d_date#4] +Input [5]: [ss_customer_sk#1, d_date#4, c_customer_sk#6, c_first_name#7, c_last_name#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(18) Exchange +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#4, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(20) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#10)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] + +(22) Filter [codegen id : 6] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_bill_customer_sk#9) + +(23) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#11, d_date#12] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [2]: [cs_bill_customer_sk#9, d_date#12] +Input [4]: [cs_bill_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11, d_date#12] + +(26) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_bill_customer_sk#9] +Right keys [1]: [c_customer_sk#13] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [c_last_name#15, c_first_name#14, d_date#12] +Input [5]: [cs_bill_customer_sk#9, d_date#12, c_customer_sk#13, c_first_name#14, c_last_name#15] + +(29) HashAggregate [codegen id : 6] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] + +(30) Exchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, d_date#12, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 7] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] + +(32) BroadcastExchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 12] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)] +Join type: LeftSemi +Join condition: None + +(34) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#17)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 10] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] + +(36) Filter [codegen id : 10] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Condition : isnotnull(ws_bill_customer_sk#16) + +(37) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#18, d_date#19] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#17] +Right keys [1]: [d_date_sk#18] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [2]: [ws_bill_customer_sk#16, d_date#19] +Input [4]: [ws_bill_customer_sk#16, ws_sold_date_sk#17, d_date_sk#18, d_date#19] + +(40) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_bill_customer_sk#16] +Right keys [1]: [c_customer_sk#20] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [3]: [c_last_name#22, c_first_name#21, d_date#19] +Input [5]: [ws_bill_customer_sk#16, d_date#19, c_customer_sk#20, c_first_name#21, c_last_name#22] + +(43) HashAggregate [codegen id : 10] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] + +(44) Exchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(c_last_name#22, c_first_name#21, d_date#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(45) HashAggregate [codegen id : 11] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] + +(46) BroadcastExchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 12] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)] +Join type: LeftSemi +Join condition: None + +(48) Project [codegen id : 12] +Output: [] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(49) HashAggregate [codegen id : 12] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] + +(50) Exchange +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 13] +Input [1]: [count#24] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS count(1)#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9122ac943b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.native_iceberg_compat/simplified.txt @@ -0,0 +1,77 @@ +WholeStageCodegen (13) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (12) + HashAggregate [count,count] + Project + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen (6) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_datafusion/explain.txt new file mode 100644 index 0000000000..e203fa858c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_datafusion/explain.txt @@ -0,0 +1,301 @@ +== Physical Plan == +* Sort (52) ++- Exchange (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (27) + : +- * Filter (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.inventory (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.item (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.warehouse (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.date_dim (16) + +- BroadcastExchange (49) + +- * Project (48) + +- * Filter (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet spark_catalog.default.inventory (28) + : : +- ReusedExchange (31) + : +- ReusedExchange (34) + +- BroadcastExchange (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.date_dim (37) + + +(1) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(4) Scan parquet spark_catalog.default.item +Output [1]: [i_item_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_sk#5] + +(6) Filter [codegen id : 1] +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] + +(10) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#6] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#8)) + +(19) Project [codegen id : 3] +Output [2]: [d_date_sk#8, d_moy#10] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(20) BroadcastExchange +Input [2]: [d_date_sk#8, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#4] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_date_sk#8, d_moy#10] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [5]: [n#11, avg#12, m2#13, sum#14, count#15] +Results [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] + +(24) Exchange +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Arguments: hashpartitioning(w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 10] +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#21, avg(inv_quantity_on_hand#3)#22] +Results [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stddev_samp(cast(inv_quantity_on_hand#3 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#3)#22 AS mean#24] + +(26) Filter [codegen id : 10] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END + +(27) Project [codegen id : 10] +Output [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] + +(28) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#29)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 8] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] + +(30) Filter [codegen id : 8] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Condition : (isnotnull(inv_item_sk#26) AND isnotnull(inv_warehouse_sk#27)) + +(31) ReusedExchange [Reuses operator id: 7] +Output [1]: [i_item_sk#30] + +(32) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_item_sk#26] +Right keys [1]: [i_item_sk#30] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 8] +Output [4]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] +Input [5]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] + +(34) ReusedExchange [Reuses operator id: 13] +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_warehouse_sk#27] +Right keys [1]: [w_warehouse_sk#31] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Input [6]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] + +(37) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(39) Filter [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(40) Project [codegen id : 7] +Output [2]: [d_date_sk#33, d_moy#35] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(41) BroadcastExchange +Input [2]: [d_date_sk#33, d_moy#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_date_sk#29] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Input [7]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] + +(44) HashAggregate [codegen id : 8] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#28 as double)), partial_avg(inv_quantity_on_hand#28)] +Aggregate Attributes [5]: [n#36, avg#37, m2#38, sum#39, count#40] +Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] + +(45) Exchange +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(46) HashAggregate [codegen id : 9] +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double)), avg(inv_quantity_on_hand#28)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double))#21, avg(inv_quantity_on_hand#28)#22] +Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#28 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#28)#22 AS mean#24] + +(47) Filter [codegen id : 9] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#23, mean#24] +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END + +(48) Project [codegen id : 9] +Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#23, mean#24] + +(49) BroadcastExchange +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=7] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [i_item_sk#5, w_warehouse_sk#6] +Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] +Join type: Inner +Join condition: None + +(51) Exchange +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: rangepartitioning(w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(52) Sort [codegen id : 11] +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: [w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..269ca30df4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_datafusion/simplified.txt @@ -0,0 +1,77 @@ +WholeStageCodegen (11) + Sort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] + InputAdapter + Exchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + WholeStageCodegen (10) + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen (8) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + ReusedExchange [i_item_sk] #3 + InputAdapter + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..e203fa858c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_iceberg_compat/explain.txt @@ -0,0 +1,301 @@ +== Physical Plan == +* Sort (52) ++- Exchange (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (27) + : +- * Filter (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.inventory (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.item (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.warehouse (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.date_dim (16) + +- BroadcastExchange (49) + +- * Project (48) + +- * Filter (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet spark_catalog.default.inventory (28) + : : +- ReusedExchange (31) + : +- ReusedExchange (34) + +- BroadcastExchange (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.date_dim (37) + + +(1) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(4) Scan parquet spark_catalog.default.item +Output [1]: [i_item_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_sk#5] + +(6) Filter [codegen id : 1] +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] + +(10) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#6] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#8)) + +(19) Project [codegen id : 3] +Output [2]: [d_date_sk#8, d_moy#10] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(20) BroadcastExchange +Input [2]: [d_date_sk#8, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#4] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_date_sk#8, d_moy#10] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [5]: [n#11, avg#12, m2#13, sum#14, count#15] +Results [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] + +(24) Exchange +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Arguments: hashpartitioning(w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 10] +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#21, avg(inv_quantity_on_hand#3)#22] +Results [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stddev_samp(cast(inv_quantity_on_hand#3 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#3)#22 AS mean#24] + +(26) Filter [codegen id : 10] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END + +(27) Project [codegen id : 10] +Output [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] + +(28) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#29)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 8] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] + +(30) Filter [codegen id : 8] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Condition : (isnotnull(inv_item_sk#26) AND isnotnull(inv_warehouse_sk#27)) + +(31) ReusedExchange [Reuses operator id: 7] +Output [1]: [i_item_sk#30] + +(32) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_item_sk#26] +Right keys [1]: [i_item_sk#30] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 8] +Output [4]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] +Input [5]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] + +(34) ReusedExchange [Reuses operator id: 13] +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_warehouse_sk#27] +Right keys [1]: [w_warehouse_sk#31] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Input [6]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] + +(37) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(39) Filter [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(40) Project [codegen id : 7] +Output [2]: [d_date_sk#33, d_moy#35] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(41) BroadcastExchange +Input [2]: [d_date_sk#33, d_moy#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_date_sk#29] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Input [7]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] + +(44) HashAggregate [codegen id : 8] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#28 as double)), partial_avg(inv_quantity_on_hand#28)] +Aggregate Attributes [5]: [n#36, avg#37, m2#38, sum#39, count#40] +Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] + +(45) Exchange +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(46) HashAggregate [codegen id : 9] +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double)), avg(inv_quantity_on_hand#28)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double))#21, avg(inv_quantity_on_hand#28)#22] +Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#28 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#28)#22 AS mean#24] + +(47) Filter [codegen id : 9] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#23, mean#24] +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END + +(48) Project [codegen id : 9] +Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#23, mean#24] + +(49) BroadcastExchange +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=7] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [i_item_sk#5, w_warehouse_sk#6] +Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] +Join type: Inner +Join condition: None + +(51) Exchange +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: rangepartitioning(w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(52) Sort [codegen id : 11] +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: [w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..269ca30df4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.native_iceberg_compat/simplified.txt @@ -0,0 +1,77 @@ +WholeStageCodegen (11) + Sort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] + InputAdapter + Exchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + WholeStageCodegen (10) + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen (8) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + ReusedExchange [i_item_sk] #3 + InputAdapter + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_datafusion/explain.txt new file mode 100644 index 0000000000..f7800f1f97 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_datafusion/explain.txt @@ -0,0 +1,301 @@ +== Physical Plan == +* Sort (52) ++- Exchange (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (27) + : +- * Filter (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.inventory (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.item (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.warehouse (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.date_dim (16) + +- BroadcastExchange (49) + +- * Project (48) + +- * Filter (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet spark_catalog.default.inventory (28) + : : +- ReusedExchange (31) + : +- ReusedExchange (34) + +- BroadcastExchange (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.date_dim (37) + + +(1) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(4) Scan parquet spark_catalog.default.item +Output [1]: [i_item_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_sk#5] + +(6) Filter [codegen id : 1] +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] + +(10) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#6] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#8)) + +(19) Project [codegen id : 3] +Output [2]: [d_date_sk#8, d_moy#10] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(20) BroadcastExchange +Input [2]: [d_date_sk#8, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#4] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_date_sk#8, d_moy#10] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [5]: [n#11, avg#12, m2#13, sum#14, count#15] +Results [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] + +(24) Exchange +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Arguments: hashpartitioning(w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 10] +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#21, avg(inv_quantity_on_hand#3)#22] +Results [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stddev_samp(cast(inv_quantity_on_hand#3 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#3)#22 AS mean#24] + +(26) Filter [codegen id : 10] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] +Condition : (CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END AND CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.5) END) + +(27) Project [codegen id : 10] +Output [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] + +(28) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#29)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 8] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] + +(30) Filter [codegen id : 8] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Condition : (isnotnull(inv_item_sk#26) AND isnotnull(inv_warehouse_sk#27)) + +(31) ReusedExchange [Reuses operator id: 7] +Output [1]: [i_item_sk#30] + +(32) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_item_sk#26] +Right keys [1]: [i_item_sk#30] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 8] +Output [4]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] +Input [5]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] + +(34) ReusedExchange [Reuses operator id: 13] +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_warehouse_sk#27] +Right keys [1]: [w_warehouse_sk#31] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Input [6]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] + +(37) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(39) Filter [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(40) Project [codegen id : 7] +Output [2]: [d_date_sk#33, d_moy#35] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(41) BroadcastExchange +Input [2]: [d_date_sk#33, d_moy#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_date_sk#29] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Input [7]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] + +(44) HashAggregate [codegen id : 8] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#28 as double)), partial_avg(inv_quantity_on_hand#28)] +Aggregate Attributes [5]: [n#36, avg#37, m2#38, sum#39, count#40] +Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] + +(45) Exchange +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(46) HashAggregate [codegen id : 9] +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double)), avg(inv_quantity_on_hand#28)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double))#21, avg(inv_quantity_on_hand#28)#22] +Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#28 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#28)#22 AS mean#24] + +(47) Filter [codegen id : 9] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#23, mean#24] +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END + +(48) Project [codegen id : 9] +Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#23, mean#24] + +(49) BroadcastExchange +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=7] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [i_item_sk#5, w_warehouse_sk#6] +Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] +Join type: Inner +Join condition: None + +(51) Exchange +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: rangepartitioning(w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(52) Sort [codegen id : 11] +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: [w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_datafusion/simplified.txt new file mode 100644 index 0000000000..269ca30df4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_datafusion/simplified.txt @@ -0,0 +1,77 @@ +WholeStageCodegen (11) + Sort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] + InputAdapter + Exchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + WholeStageCodegen (10) + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen (8) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + ReusedExchange [i_item_sk] #3 + InputAdapter + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f7800f1f97 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_iceberg_compat/explain.txt @@ -0,0 +1,301 @@ +== Physical Plan == +* Sort (52) ++- Exchange (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (27) + : +- * Filter (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.inventory (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.item (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.warehouse (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.date_dim (16) + +- BroadcastExchange (49) + +- * Project (48) + +- * Filter (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet spark_catalog.default.inventory (28) + : : +- ReusedExchange (31) + : +- ReusedExchange (34) + +- BroadcastExchange (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.date_dim (37) + + +(1) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(4) Scan parquet spark_catalog.default.item +Output [1]: [i_item_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_sk#5] + +(6) Filter [codegen id : 1] +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5] + +(10) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#6] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#8)) + +(19) Project [codegen id : 3] +Output [2]: [d_date_sk#8, d_moy#10] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(20) BroadcastExchange +Input [2]: [d_date_sk#8, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#4] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_date_sk#8, d_moy#10] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#5, w_warehouse_sk#6, w_warehouse_name#7, d_moy#10] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [5]: [n#11, avg#12, m2#13, sum#14, count#15] +Results [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] + +(24) Exchange +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Arguments: hashpartitioning(w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 10] +Input [9]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Keys [4]: [w_warehouse_name#7, w_warehouse_sk#6, i_item_sk#5, d_moy#10] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#21, avg(inv_quantity_on_hand#3)#22] +Results [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stddev_samp(cast(inv_quantity_on_hand#3 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#3)#22 AS mean#24] + +(26) Filter [codegen id : 10] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] +Condition : (CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END AND CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.5) END) + +(27) Project [codegen id : 10] +Output [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] +Input [5]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, stdev#23, mean#24] + +(28) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#29)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 8] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] + +(30) Filter [codegen id : 8] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Condition : (isnotnull(inv_item_sk#26) AND isnotnull(inv_warehouse_sk#27)) + +(31) ReusedExchange [Reuses operator id: 7] +Output [1]: [i_item_sk#30] + +(32) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_item_sk#26] +Right keys [1]: [i_item_sk#30] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 8] +Output [4]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] +Input [5]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30] + +(34) ReusedExchange [Reuses operator id: 13] +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_warehouse_sk#27] +Right keys [1]: [w_warehouse_sk#31] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Input [6]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] + +(37) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(39) Filter [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(40) Project [codegen id : 7] +Output [2]: [d_date_sk#33, d_moy#35] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(41) BroadcastExchange +Input [2]: [d_date_sk#33, d_moy#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_date_sk#29] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Input [7]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] + +(44) HashAggregate [codegen id : 8] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#28 as double)), partial_avg(inv_quantity_on_hand#28)] +Aggregate Attributes [5]: [n#36, avg#37, m2#38, sum#39, count#40] +Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] + +(45) Exchange +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(46) HashAggregate [codegen id : 9] +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double)), avg(inv_quantity_on_hand#28)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double))#21, avg(inv_quantity_on_hand#28)#22] +Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#28 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#28)#22 AS mean#24] + +(47) Filter [codegen id : 9] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#23, mean#24] +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END + +(48) Project [codegen id : 9] +Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#23, mean#24] + +(49) BroadcastExchange +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=7] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [i_item_sk#5, w_warehouse_sk#6] +Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] +Join type: Inner +Join condition: None + +(51) Exchange +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: rangepartitioning(w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(52) Sort [codegen id : 11] +Input [10]: [w_warehouse_sk#6, i_item_sk#5, d_moy#10, mean#24, cov#25, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#46, cov#47] +Arguments: [w_warehouse_sk#6 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..269ca30df4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.native_iceberg_compat/simplified.txt @@ -0,0 +1,77 @@ +WholeStageCodegen (11) + Sort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] + InputAdapter + Exchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + WholeStageCodegen (10) + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Filter [mean,stdev] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen (8) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,inv_date_sk,i_item_sk,w_warehouse_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + ReusedExchange [i_item_sk] #3 + InputAdapter + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_datafusion/explain.txt new file mode 100644 index 0000000000..b21f0f54f2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_datafusion/explain.txt @@ -0,0 +1,616 @@ +== Physical Plan == +TakeOrderedAndProject (105) ++- * Project (104) + +- * BroadcastHashJoin Inner BuildRight (103) + :- * Project (89) + : +- * BroadcastHashJoin Inner BuildRight (88) + : :- * Project (70) + : : +- * BroadcastHashJoin Inner BuildRight (69) + : : :- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * BroadcastHashJoin Inner BuildRight (36) + : : : : :- * Filter (19) + : : : : : +- * HashAggregate (18) + : : : : : +- Exchange (17) + : : : : : +- * HashAggregate (16) + : : : : : +- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : : : +- BroadcastExchange (35) + : : : : +- * HashAggregate (34) + : : : : +- Exchange (33) + : : : : +- * HashAggregate (32) + : : : : +- * Project (31) + : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : :- * Project (25) + : : : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : : : :- * Filter (22) + : : : : : : +- * ColumnarToRow (21) + : : : : : : +- Scan parquet spark_catalog.default.customer (20) + : : : : : +- ReusedExchange (23) + : : : : +- BroadcastExchange (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet spark_catalog.default.date_dim (26) + : : : +- BroadcastExchange (53) + : : : +- * Filter (52) + : : : +- * HashAggregate (51) + : : : +- Exchange (50) + : : : +- * HashAggregate (49) + : : : +- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (45) + : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : :- * Filter (39) + : : : : : +- * ColumnarToRow (38) + : : : : : +- Scan parquet spark_catalog.default.customer (37) + : : : : +- BroadcastExchange (43) + : : : : +- * Filter (42) + : : : : +- * ColumnarToRow (41) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (40) + : : : +- ReusedExchange (46) + : : +- BroadcastExchange (68) + : : +- * HashAggregate (67) + : : +- Exchange (66) + : : +- * HashAggregate (65) + : : +- * Project (64) + : : +- * BroadcastHashJoin Inner BuildRight (63) + : : :- * Project (61) + : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : :- * Filter (58) + : : : : +- * ColumnarToRow (57) + : : : : +- Scan parquet spark_catalog.default.customer (56) + : : : +- ReusedExchange (59) + : : +- ReusedExchange (62) + : +- BroadcastExchange (87) + : +- * Filter (86) + : +- * HashAggregate (85) + : +- Exchange (84) + : +- * HashAggregate (83) + : +- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * Project (79) + : : +- * BroadcastHashJoin Inner BuildRight (78) + : : :- * Filter (73) + : : : +- * ColumnarToRow (72) + : : : +- Scan parquet spark_catalog.default.customer (71) + : : +- BroadcastExchange (77) + : : +- * Filter (76) + : : +- * ColumnarToRow (75) + : : +- Scan parquet spark_catalog.default.web_sales (74) + : +- ReusedExchange (80) + +- BroadcastExchange (102) + +- * HashAggregate (101) + +- Exchange (100) + +- * HashAggregate (99) + +- * Project (98) + +- * BroadcastHashJoin Inner BuildRight (97) + :- * Project (95) + : +- * BroadcastHashJoin Inner BuildRight (94) + : :- * Filter (92) + : : +- * ColumnarToRow (91) + : : +- Scan parquet spark_catalog.default.customer (90) + : +- ReusedExchange (93) + +- ReusedExchange (96) + + +(1) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#14)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(6) Filter [codegen id : 1] +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Condition : isnotnull(ss_customer_sk#9) + +(7) BroadcastExchange +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#15, d_year#16] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#15, d_year#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14, d_date_sk#15, d_year#16] + +(16) HashAggregate [codegen id : 3] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16] +Functions [1]: [partial_sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))] +Aggregate Attributes [2]: [sum#17, isEmpty#18] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#19, isEmpty#20] + +(17) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#19, isEmpty#20] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 24] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#19, isEmpty#20] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16] +Functions [1]: [sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))] +Aggregate Attributes [1]: [sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))#21] +Results [2]: [c_customer_id#2 AS customer_id#22, sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))#21 AS year_total#23] + +(19) Filter [codegen id : 24] +Input [2]: [customer_id#22, year_total#23] +Condition : (isnotnull(year_total#23) AND (year_total#23 > 0.000000)) + +(20) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31] +Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_customer_id#25)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [6]: [ss_customer_sk#32, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#24] +Right keys [1]: [ss_customer_sk#32] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [12]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] +Input [14]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_customer_sk#32, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#38, d_year#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#38, d_year#39] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#38, d_year#39] +Condition : ((isnotnull(d_year#39) AND (d_year#39 = 2002)) AND isnotnull(d_date_sk#38)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#38, d_year#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [12]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, d_year#39] +Input [14]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37, d_date_sk#38, d_year#39] + +(32) HashAggregate [codegen id : 6] +Input [12]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, d_year#39] +Keys [8]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39] +Functions [1]: [partial_sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [10]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, sum#42, isEmpty#43] + +(33) Exchange +Input [10]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, sum#42, isEmpty#43] +Arguments: hashpartitioning(c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(34) HashAggregate [codegen id : 7] +Input [10]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, sum#42, isEmpty#43] +Keys [8]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39] +Functions [1]: [sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))] +Aggregate Attributes [1]: [sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))#21] +Results [8]: [c_customer_id#25 AS customer_id#44, c_first_name#26 AS customer_first_name#45, c_last_name#27 AS customer_last_name#46, c_preferred_cust_flag#28 AS customer_preferred_cust_flag#47, c_birth_country#29 AS customer_birth_country#48, c_login#30 AS customer_login#49, c_email_address#31 AS customer_email_address#50, sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))#21 AS year_total#51] + +(35) BroadcastExchange +Input [8]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#44] +Join type: Inner +Join condition: None + +(37) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#52, c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#52, c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59] + +(39) Filter [codegen id : 10] +Input [8]: [c_customer_sk#52, c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59] +Condition : (isnotnull(c_customer_sk#52) AND isnotnull(c_customer_id#53)) + +(40) Scan parquet spark_catalog.default.catalog_sales +Output [6]: [cs_bill_customer_sk#60, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#65)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [6]: [cs_bill_customer_sk#60, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] + +(42) Filter [codegen id : 8] +Input [6]: [cs_bill_customer_sk#60, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] +Condition : isnotnull(cs_bill_customer_sk#60) + +(43) BroadcastExchange +Input [6]: [cs_bill_customer_sk#60, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#52] +Right keys [1]: [cs_bill_customer_sk#60] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 10] +Output [12]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] +Input [14]: [c_customer_sk#52, c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, cs_bill_customer_sk#60, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#66, d_year#67] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [12]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, d_year#67] +Input [14]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65, d_date_sk#66, d_year#67] + +(49) HashAggregate [codegen id : 10] +Input [12]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, d_year#67] +Keys [8]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67] +Functions [1]: [partial_sum(((((cs_ext_list_price#64 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#61) + cs_ext_sales_price#62) / 2))] +Aggregate Attributes [2]: [sum#68, isEmpty#69] +Results [10]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67, sum#70, isEmpty#71] + +(50) Exchange +Input [10]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67, sum#70, isEmpty#71] +Arguments: hashpartitioning(c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 11] +Input [10]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67, sum#70, isEmpty#71] +Keys [8]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67] +Functions [1]: [sum(((((cs_ext_list_price#64 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#61) + cs_ext_sales_price#62) / 2))] +Aggregate Attributes [1]: [sum(((((cs_ext_list_price#64 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#61) + cs_ext_sales_price#62) / 2))#72] +Results [2]: [c_customer_id#53 AS customer_id#73, sum(((((cs_ext_list_price#64 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#61) + cs_ext_sales_price#62) / 2))#72 AS year_total#74] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#73, year_total#74] +Condition : (isnotnull(year_total#74) AND (year_total#74 > 0.000000)) + +(53) BroadcastExchange +Input [2]: [customer_id#73, year_total#74] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(54) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#73] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 24] +Output [11]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51, year_total#74] +Input [12]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51, customer_id#73, year_total#74] + +(56) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#75, c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#75, c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82] + +(58) Filter [codegen id : 14] +Input [8]: [c_customer_sk#75, c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82] +Condition : (isnotnull(c_customer_sk#75) AND isnotnull(c_customer_id#76)) + +(59) ReusedExchange [Reuses operator id: 43] +Output [6]: [cs_bill_customer_sk#83, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, cs_sold_date_sk#88] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#75] +Right keys [1]: [cs_bill_customer_sk#83] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 14] +Output [12]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, cs_sold_date_sk#88] +Input [14]: [c_customer_sk#75, c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, cs_bill_customer_sk#83, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, cs_sold_date_sk#88] + +(62) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#89, d_year#90] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [cs_sold_date_sk#88] +Right keys [1]: [d_date_sk#89] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 14] +Output [12]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, d_year#90] +Input [14]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, cs_sold_date_sk#88, d_date_sk#89, d_year#90] + +(65) HashAggregate [codegen id : 14] +Input [12]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, d_year#90] +Keys [8]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90] +Functions [1]: [partial_sum(((((cs_ext_list_price#87 - cs_ext_wholesale_cost#86) - cs_ext_discount_amt#84) + cs_ext_sales_price#85) / 2))] +Aggregate Attributes [2]: [sum#91, isEmpty#92] +Results [10]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90, sum#93, isEmpty#94] + +(66) Exchange +Input [10]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90, sum#93, isEmpty#94] +Arguments: hashpartitioning(c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(67) HashAggregate [codegen id : 15] +Input [10]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90, sum#93, isEmpty#94] +Keys [8]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90] +Functions [1]: [sum(((((cs_ext_list_price#87 - cs_ext_wholesale_cost#86) - cs_ext_discount_amt#84) + cs_ext_sales_price#85) / 2))] +Aggregate Attributes [1]: [sum(((((cs_ext_list_price#87 - cs_ext_wholesale_cost#86) - cs_ext_discount_amt#84) + cs_ext_sales_price#85) / 2))#72] +Results [2]: [c_customer_id#76 AS customer_id#95, sum(((((cs_ext_list_price#87 - cs_ext_wholesale_cost#86) - cs_ext_discount_amt#84) + cs_ext_sales_price#85) / 2))#72 AS year_total#96] + +(68) BroadcastExchange +Input [2]: [customer_id#95, year_total#96] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(69) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#95] +Join type: Inner +Join condition: (CASE WHEN (year_total#74 > 0.000000) THEN (year_total#96 / year_total#74) END > CASE WHEN (year_total#23 > 0.000000) THEN (year_total#51 / year_total#23) END) + +(70) Project [codegen id : 24] +Output [10]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#74, year_total#96] +Input [13]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51, year_total#74, customer_id#95, year_total#96] + +(71) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#97, c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 18] +Input [8]: [c_customer_sk#97, c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104] + +(73) Filter [codegen id : 18] +Input [8]: [c_customer_sk#97, c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104] +Condition : (isnotnull(c_customer_sk#97) AND isnotnull(c_customer_id#98)) + +(74) Scan parquet spark_catalog.default.web_sales +Output [6]: [ws_bill_customer_sk#105, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#110)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(75) ColumnarToRow [codegen id : 16] +Input [6]: [ws_bill_customer_sk#105, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] + +(76) Filter [codegen id : 16] +Input [6]: [ws_bill_customer_sk#105, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] +Condition : isnotnull(ws_bill_customer_sk#105) + +(77) BroadcastExchange +Input [6]: [ws_bill_customer_sk#105, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(78) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [c_customer_sk#97] +Right keys [1]: [ws_bill_customer_sk#105] +Join type: Inner +Join condition: None + +(79) Project [codegen id : 18] +Output [12]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] +Input [14]: [c_customer_sk#97, c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, ws_bill_customer_sk#105, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] + +(80) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#111, d_year#112] + +(81) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#110] +Right keys [1]: [d_date_sk#111] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 18] +Output [12]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, d_year#112] +Input [14]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110, d_date_sk#111, d_year#112] + +(83) HashAggregate [codegen id : 18] +Input [12]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, d_year#112] +Keys [8]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112] +Functions [1]: [partial_sum(((((ws_ext_list_price#109 - ws_ext_wholesale_cost#108) - ws_ext_discount_amt#106) + ws_ext_sales_price#107) / 2))] +Aggregate Attributes [2]: [sum#113, isEmpty#114] +Results [10]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112, sum#115, isEmpty#116] + +(84) Exchange +Input [10]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112, sum#115, isEmpty#116] +Arguments: hashpartitioning(c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(85) HashAggregate [codegen id : 19] +Input [10]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112, sum#115, isEmpty#116] +Keys [8]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112] +Functions [1]: [sum(((((ws_ext_list_price#109 - ws_ext_wholesale_cost#108) - ws_ext_discount_amt#106) + ws_ext_sales_price#107) / 2))] +Aggregate Attributes [1]: [sum(((((ws_ext_list_price#109 - ws_ext_wholesale_cost#108) - ws_ext_discount_amt#106) + ws_ext_sales_price#107) / 2))#117] +Results [2]: [c_customer_id#98 AS customer_id#118, sum(((((ws_ext_list_price#109 - ws_ext_wholesale_cost#108) - ws_ext_discount_amt#106) + ws_ext_sales_price#107) / 2))#117 AS year_total#119] + +(86) Filter [codegen id : 19] +Input [2]: [customer_id#118, year_total#119] +Condition : (isnotnull(year_total#119) AND (year_total#119 > 0.000000)) + +(87) BroadcastExchange +Input [2]: [customer_id#118, year_total#119] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=14] + +(88) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#118] +Join type: Inner +Join condition: None + +(89) Project [codegen id : 24] +Output [11]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#74, year_total#96, year_total#119] +Input [12]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#74, year_total#96, customer_id#118, year_total#119] + +(90) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#120, c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(91) ColumnarToRow [codegen id : 22] +Input [8]: [c_customer_sk#120, c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127] + +(92) Filter [codegen id : 22] +Input [8]: [c_customer_sk#120, c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127] +Condition : (isnotnull(c_customer_sk#120) AND isnotnull(c_customer_id#121)) + +(93) ReusedExchange [Reuses operator id: 77] +Output [6]: [ws_bill_customer_sk#128, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, ws_sold_date_sk#133] + +(94) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [c_customer_sk#120] +Right keys [1]: [ws_bill_customer_sk#128] +Join type: Inner +Join condition: None + +(95) Project [codegen id : 22] +Output [12]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, ws_sold_date_sk#133] +Input [14]: [c_customer_sk#120, c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, ws_bill_customer_sk#128, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, ws_sold_date_sk#133] + +(96) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#134, d_year#135] + +(97) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#133] +Right keys [1]: [d_date_sk#134] +Join type: Inner +Join condition: None + +(98) Project [codegen id : 22] +Output [12]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, d_year#135] +Input [14]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, ws_sold_date_sk#133, d_date_sk#134, d_year#135] + +(99) HashAggregate [codegen id : 22] +Input [12]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, d_year#135] +Keys [8]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135] +Functions [1]: [partial_sum(((((ws_ext_list_price#132 - ws_ext_wholesale_cost#131) - ws_ext_discount_amt#129) + ws_ext_sales_price#130) / 2))] +Aggregate Attributes [2]: [sum#136, isEmpty#137] +Results [10]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135, sum#138, isEmpty#139] + +(100) Exchange +Input [10]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135, sum#138, isEmpty#139] +Arguments: hashpartitioning(c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(101) HashAggregate [codegen id : 23] +Input [10]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135, sum#138, isEmpty#139] +Keys [8]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135] +Functions [1]: [sum(((((ws_ext_list_price#132 - ws_ext_wholesale_cost#131) - ws_ext_discount_amt#129) + ws_ext_sales_price#130) / 2))] +Aggregate Attributes [1]: [sum(((((ws_ext_list_price#132 - ws_ext_wholesale_cost#131) - ws_ext_discount_amt#129) + ws_ext_sales_price#130) / 2))#117] +Results [2]: [c_customer_id#121 AS customer_id#140, sum(((((ws_ext_list_price#132 - ws_ext_wholesale_cost#131) - ws_ext_discount_amt#129) + ws_ext_sales_price#130) / 2))#117 AS year_total#141] + +(102) BroadcastExchange +Input [2]: [customer_id#140, year_total#141] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=16] + +(103) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#140] +Join type: Inner +Join condition: (CASE WHEN (year_total#74 > 0.000000) THEN (year_total#96 / year_total#74) END > CASE WHEN (year_total#119 > 0.000000) THEN (year_total#141 / year_total#119) END) + +(104) Project [codegen id : 24] +Output [7]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50] +Input [13]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#74, year_total#96, year_total#119, customer_id#140, year_total#141] + +(105) TakeOrderedAndProject +Input [7]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50] +Arguments: 100, [customer_id#44 ASC NULLS FIRST, customer_first_name#45 ASC NULLS FIRST, customer_last_name#46 ASC NULLS FIRST, customer_preferred_cust_flag#47 ASC NULLS FIRST, customer_birth_country#48 ASC NULLS FIRST, customer_login#49 ASC NULLS FIRST, customer_email_address#50 ASC NULLS FIRST], [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_datafusion/simplified.txt new file mode 100644 index 0000000000..1b4ee994f6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_datafusion/simplified.txt @@ -0,0 +1,156 @@ +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + WholeStageCodegen (24) + Project [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + Project [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2)),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,cs_ext_list_price,cs_ext_wholesale_cost,cs_ext_discount_amt,cs_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,cs_ext_list_price,cs_ext_wholesale_cost,cs_ext_discount_amt,cs_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (19) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #13 + WholeStageCodegen (18) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_discount_amt,ws_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (16) + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (23) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #16 + WholeStageCodegen (22) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_discount_amt,ws_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] #14 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..b21f0f54f2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_iceberg_compat/explain.txt @@ -0,0 +1,616 @@ +== Physical Plan == +TakeOrderedAndProject (105) ++- * Project (104) + +- * BroadcastHashJoin Inner BuildRight (103) + :- * Project (89) + : +- * BroadcastHashJoin Inner BuildRight (88) + : :- * Project (70) + : : +- * BroadcastHashJoin Inner BuildRight (69) + : : :- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * BroadcastHashJoin Inner BuildRight (36) + : : : : :- * Filter (19) + : : : : : +- * HashAggregate (18) + : : : : : +- Exchange (17) + : : : : : +- * HashAggregate (16) + : : : : : +- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : : : +- BroadcastExchange (35) + : : : : +- * HashAggregate (34) + : : : : +- Exchange (33) + : : : : +- * HashAggregate (32) + : : : : +- * Project (31) + : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : :- * Project (25) + : : : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : : : :- * Filter (22) + : : : : : : +- * ColumnarToRow (21) + : : : : : : +- Scan parquet spark_catalog.default.customer (20) + : : : : : +- ReusedExchange (23) + : : : : +- BroadcastExchange (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet spark_catalog.default.date_dim (26) + : : : +- BroadcastExchange (53) + : : : +- * Filter (52) + : : : +- * HashAggregate (51) + : : : +- Exchange (50) + : : : +- * HashAggregate (49) + : : : +- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (45) + : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : :- * Filter (39) + : : : : : +- * ColumnarToRow (38) + : : : : : +- Scan parquet spark_catalog.default.customer (37) + : : : : +- BroadcastExchange (43) + : : : : +- * Filter (42) + : : : : +- * ColumnarToRow (41) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (40) + : : : +- ReusedExchange (46) + : : +- BroadcastExchange (68) + : : +- * HashAggregate (67) + : : +- Exchange (66) + : : +- * HashAggregate (65) + : : +- * Project (64) + : : +- * BroadcastHashJoin Inner BuildRight (63) + : : :- * Project (61) + : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : :- * Filter (58) + : : : : +- * ColumnarToRow (57) + : : : : +- Scan parquet spark_catalog.default.customer (56) + : : : +- ReusedExchange (59) + : : +- ReusedExchange (62) + : +- BroadcastExchange (87) + : +- * Filter (86) + : +- * HashAggregate (85) + : +- Exchange (84) + : +- * HashAggregate (83) + : +- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * Project (79) + : : +- * BroadcastHashJoin Inner BuildRight (78) + : : :- * Filter (73) + : : : +- * ColumnarToRow (72) + : : : +- Scan parquet spark_catalog.default.customer (71) + : : +- BroadcastExchange (77) + : : +- * Filter (76) + : : +- * ColumnarToRow (75) + : : +- Scan parquet spark_catalog.default.web_sales (74) + : +- ReusedExchange (80) + +- BroadcastExchange (102) + +- * HashAggregate (101) + +- Exchange (100) + +- * HashAggregate (99) + +- * Project (98) + +- * BroadcastHashJoin Inner BuildRight (97) + :- * Project (95) + : +- * BroadcastHashJoin Inner BuildRight (94) + : :- * Filter (92) + : : +- * ColumnarToRow (91) + : : +- Scan parquet spark_catalog.default.customer (90) + : +- ReusedExchange (93) + +- ReusedExchange (96) + + +(1) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#14)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(6) Filter [codegen id : 1] +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Condition : isnotnull(ss_customer_sk#9) + +(7) BroadcastExchange +Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#15, d_year#16] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#15, d_year#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14, d_date_sk#15, d_year#16] + +(16) HashAggregate [codegen id : 3] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#16] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16] +Functions [1]: [partial_sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))] +Aggregate Attributes [2]: [sum#17, isEmpty#18] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#19, isEmpty#20] + +(17) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#19, isEmpty#20] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 24] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16, sum#19, isEmpty#20] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#16] +Functions [1]: [sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))] +Aggregate Attributes [1]: [sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))#21] +Results [2]: [c_customer_id#2 AS customer_id#22, sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))#21 AS year_total#23] + +(19) Filter [codegen id : 24] +Input [2]: [customer_id#22, year_total#23] +Condition : (isnotnull(year_total#23) AND (year_total#23 > 0.000000)) + +(20) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31] +Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_customer_id#25)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [6]: [ss_customer_sk#32, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#24] +Right keys [1]: [ss_customer_sk#32] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [12]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] +Input [14]: [c_customer_sk#24, c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_customer_sk#32, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#38, d_year#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#38, d_year#39] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#38, d_year#39] +Condition : ((isnotnull(d_year#39) AND (d_year#39 = 2002)) AND isnotnull(d_date_sk#38)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#38, d_year#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [12]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, d_year#39] +Input [14]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, ss_sold_date_sk#37, d_date_sk#38, d_year#39] + +(32) HashAggregate [codegen id : 6] +Input [12]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, ss_ext_discount_amt#33, ss_ext_sales_price#34, ss_ext_wholesale_cost#35, ss_ext_list_price#36, d_year#39] +Keys [8]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39] +Functions [1]: [partial_sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [10]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, sum#42, isEmpty#43] + +(33) Exchange +Input [10]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, sum#42, isEmpty#43] +Arguments: hashpartitioning(c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(34) HashAggregate [codegen id : 7] +Input [10]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39, sum#42, isEmpty#43] +Keys [8]: [c_customer_id#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28, c_birth_country#29, c_login#30, c_email_address#31, d_year#39] +Functions [1]: [sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))] +Aggregate Attributes [1]: [sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))#21] +Results [8]: [c_customer_id#25 AS customer_id#44, c_first_name#26 AS customer_first_name#45, c_last_name#27 AS customer_last_name#46, c_preferred_cust_flag#28 AS customer_preferred_cust_flag#47, c_birth_country#29 AS customer_birth_country#48, c_login#30 AS customer_login#49, c_email_address#31 AS customer_email_address#50, sum(((((ss_ext_list_price#36 - ss_ext_wholesale_cost#35) - ss_ext_discount_amt#33) + ss_ext_sales_price#34) / 2))#21 AS year_total#51] + +(35) BroadcastExchange +Input [8]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#44] +Join type: Inner +Join condition: None + +(37) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#52, c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#52, c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59] + +(39) Filter [codegen id : 10] +Input [8]: [c_customer_sk#52, c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59] +Condition : (isnotnull(c_customer_sk#52) AND isnotnull(c_customer_id#53)) + +(40) Scan parquet spark_catalog.default.catalog_sales +Output [6]: [cs_bill_customer_sk#60, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#65)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [6]: [cs_bill_customer_sk#60, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] + +(42) Filter [codegen id : 8] +Input [6]: [cs_bill_customer_sk#60, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] +Condition : isnotnull(cs_bill_customer_sk#60) + +(43) BroadcastExchange +Input [6]: [cs_bill_customer_sk#60, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#52] +Right keys [1]: [cs_bill_customer_sk#60] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 10] +Output [12]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] +Input [14]: [c_customer_sk#52, c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, cs_bill_customer_sk#60, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#66, d_year#67] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [12]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, d_year#67] +Input [14]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, cs_sold_date_sk#65, d_date_sk#66, d_year#67] + +(49) HashAggregate [codegen id : 10] +Input [12]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, cs_ext_discount_amt#61, cs_ext_sales_price#62, cs_ext_wholesale_cost#63, cs_ext_list_price#64, d_year#67] +Keys [8]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67] +Functions [1]: [partial_sum(((((cs_ext_list_price#64 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#61) + cs_ext_sales_price#62) / 2))] +Aggregate Attributes [2]: [sum#68, isEmpty#69] +Results [10]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67, sum#70, isEmpty#71] + +(50) Exchange +Input [10]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67, sum#70, isEmpty#71] +Arguments: hashpartitioning(c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 11] +Input [10]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67, sum#70, isEmpty#71] +Keys [8]: [c_customer_id#53, c_first_name#54, c_last_name#55, c_preferred_cust_flag#56, c_birth_country#57, c_login#58, c_email_address#59, d_year#67] +Functions [1]: [sum(((((cs_ext_list_price#64 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#61) + cs_ext_sales_price#62) / 2))] +Aggregate Attributes [1]: [sum(((((cs_ext_list_price#64 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#61) + cs_ext_sales_price#62) / 2))#72] +Results [2]: [c_customer_id#53 AS customer_id#73, sum(((((cs_ext_list_price#64 - cs_ext_wholesale_cost#63) - cs_ext_discount_amt#61) + cs_ext_sales_price#62) / 2))#72 AS year_total#74] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#73, year_total#74] +Condition : (isnotnull(year_total#74) AND (year_total#74 > 0.000000)) + +(53) BroadcastExchange +Input [2]: [customer_id#73, year_total#74] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(54) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#73] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 24] +Output [11]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51, year_total#74] +Input [12]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51, customer_id#73, year_total#74] + +(56) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#75, c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#75, c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82] + +(58) Filter [codegen id : 14] +Input [8]: [c_customer_sk#75, c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82] +Condition : (isnotnull(c_customer_sk#75) AND isnotnull(c_customer_id#76)) + +(59) ReusedExchange [Reuses operator id: 43] +Output [6]: [cs_bill_customer_sk#83, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, cs_sold_date_sk#88] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#75] +Right keys [1]: [cs_bill_customer_sk#83] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 14] +Output [12]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, cs_sold_date_sk#88] +Input [14]: [c_customer_sk#75, c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, cs_bill_customer_sk#83, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, cs_sold_date_sk#88] + +(62) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#89, d_year#90] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [cs_sold_date_sk#88] +Right keys [1]: [d_date_sk#89] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 14] +Output [12]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, d_year#90] +Input [14]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, cs_sold_date_sk#88, d_date_sk#89, d_year#90] + +(65) HashAggregate [codegen id : 14] +Input [12]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, cs_ext_discount_amt#84, cs_ext_sales_price#85, cs_ext_wholesale_cost#86, cs_ext_list_price#87, d_year#90] +Keys [8]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90] +Functions [1]: [partial_sum(((((cs_ext_list_price#87 - cs_ext_wholesale_cost#86) - cs_ext_discount_amt#84) + cs_ext_sales_price#85) / 2))] +Aggregate Attributes [2]: [sum#91, isEmpty#92] +Results [10]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90, sum#93, isEmpty#94] + +(66) Exchange +Input [10]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90, sum#93, isEmpty#94] +Arguments: hashpartitioning(c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(67) HashAggregate [codegen id : 15] +Input [10]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90, sum#93, isEmpty#94] +Keys [8]: [c_customer_id#76, c_first_name#77, c_last_name#78, c_preferred_cust_flag#79, c_birth_country#80, c_login#81, c_email_address#82, d_year#90] +Functions [1]: [sum(((((cs_ext_list_price#87 - cs_ext_wholesale_cost#86) - cs_ext_discount_amt#84) + cs_ext_sales_price#85) / 2))] +Aggregate Attributes [1]: [sum(((((cs_ext_list_price#87 - cs_ext_wholesale_cost#86) - cs_ext_discount_amt#84) + cs_ext_sales_price#85) / 2))#72] +Results [2]: [c_customer_id#76 AS customer_id#95, sum(((((cs_ext_list_price#87 - cs_ext_wholesale_cost#86) - cs_ext_discount_amt#84) + cs_ext_sales_price#85) / 2))#72 AS year_total#96] + +(68) BroadcastExchange +Input [2]: [customer_id#95, year_total#96] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(69) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#95] +Join type: Inner +Join condition: (CASE WHEN (year_total#74 > 0.000000) THEN (year_total#96 / year_total#74) END > CASE WHEN (year_total#23 > 0.000000) THEN (year_total#51 / year_total#23) END) + +(70) Project [codegen id : 24] +Output [10]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#74, year_total#96] +Input [13]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#51, year_total#74, customer_id#95, year_total#96] + +(71) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#97, c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 18] +Input [8]: [c_customer_sk#97, c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104] + +(73) Filter [codegen id : 18] +Input [8]: [c_customer_sk#97, c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104] +Condition : (isnotnull(c_customer_sk#97) AND isnotnull(c_customer_id#98)) + +(74) Scan parquet spark_catalog.default.web_sales +Output [6]: [ws_bill_customer_sk#105, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#110)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(75) ColumnarToRow [codegen id : 16] +Input [6]: [ws_bill_customer_sk#105, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] + +(76) Filter [codegen id : 16] +Input [6]: [ws_bill_customer_sk#105, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] +Condition : isnotnull(ws_bill_customer_sk#105) + +(77) BroadcastExchange +Input [6]: [ws_bill_customer_sk#105, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(78) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [c_customer_sk#97] +Right keys [1]: [ws_bill_customer_sk#105] +Join type: Inner +Join condition: None + +(79) Project [codegen id : 18] +Output [12]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] +Input [14]: [c_customer_sk#97, c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, ws_bill_customer_sk#105, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110] + +(80) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#111, d_year#112] + +(81) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#110] +Right keys [1]: [d_date_sk#111] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 18] +Output [12]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, d_year#112] +Input [14]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, ws_sold_date_sk#110, d_date_sk#111, d_year#112] + +(83) HashAggregate [codegen id : 18] +Input [12]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, ws_ext_discount_amt#106, ws_ext_sales_price#107, ws_ext_wholesale_cost#108, ws_ext_list_price#109, d_year#112] +Keys [8]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112] +Functions [1]: [partial_sum(((((ws_ext_list_price#109 - ws_ext_wholesale_cost#108) - ws_ext_discount_amt#106) + ws_ext_sales_price#107) / 2))] +Aggregate Attributes [2]: [sum#113, isEmpty#114] +Results [10]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112, sum#115, isEmpty#116] + +(84) Exchange +Input [10]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112, sum#115, isEmpty#116] +Arguments: hashpartitioning(c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(85) HashAggregate [codegen id : 19] +Input [10]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112, sum#115, isEmpty#116] +Keys [8]: [c_customer_id#98, c_first_name#99, c_last_name#100, c_preferred_cust_flag#101, c_birth_country#102, c_login#103, c_email_address#104, d_year#112] +Functions [1]: [sum(((((ws_ext_list_price#109 - ws_ext_wholesale_cost#108) - ws_ext_discount_amt#106) + ws_ext_sales_price#107) / 2))] +Aggregate Attributes [1]: [sum(((((ws_ext_list_price#109 - ws_ext_wholesale_cost#108) - ws_ext_discount_amt#106) + ws_ext_sales_price#107) / 2))#117] +Results [2]: [c_customer_id#98 AS customer_id#118, sum(((((ws_ext_list_price#109 - ws_ext_wholesale_cost#108) - ws_ext_discount_amt#106) + ws_ext_sales_price#107) / 2))#117 AS year_total#119] + +(86) Filter [codegen id : 19] +Input [2]: [customer_id#118, year_total#119] +Condition : (isnotnull(year_total#119) AND (year_total#119 > 0.000000)) + +(87) BroadcastExchange +Input [2]: [customer_id#118, year_total#119] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=14] + +(88) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#118] +Join type: Inner +Join condition: None + +(89) Project [codegen id : 24] +Output [11]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#74, year_total#96, year_total#119] +Input [12]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#74, year_total#96, customer_id#118, year_total#119] + +(90) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#120, c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(91) ColumnarToRow [codegen id : 22] +Input [8]: [c_customer_sk#120, c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127] + +(92) Filter [codegen id : 22] +Input [8]: [c_customer_sk#120, c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127] +Condition : (isnotnull(c_customer_sk#120) AND isnotnull(c_customer_id#121)) + +(93) ReusedExchange [Reuses operator id: 77] +Output [6]: [ws_bill_customer_sk#128, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, ws_sold_date_sk#133] + +(94) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [c_customer_sk#120] +Right keys [1]: [ws_bill_customer_sk#128] +Join type: Inner +Join condition: None + +(95) Project [codegen id : 22] +Output [12]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, ws_sold_date_sk#133] +Input [14]: [c_customer_sk#120, c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, ws_bill_customer_sk#128, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, ws_sold_date_sk#133] + +(96) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#134, d_year#135] + +(97) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#133] +Right keys [1]: [d_date_sk#134] +Join type: Inner +Join condition: None + +(98) Project [codegen id : 22] +Output [12]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, d_year#135] +Input [14]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, ws_sold_date_sk#133, d_date_sk#134, d_year#135] + +(99) HashAggregate [codegen id : 22] +Input [12]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, ws_ext_discount_amt#129, ws_ext_sales_price#130, ws_ext_wholesale_cost#131, ws_ext_list_price#132, d_year#135] +Keys [8]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135] +Functions [1]: [partial_sum(((((ws_ext_list_price#132 - ws_ext_wholesale_cost#131) - ws_ext_discount_amt#129) + ws_ext_sales_price#130) / 2))] +Aggregate Attributes [2]: [sum#136, isEmpty#137] +Results [10]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135, sum#138, isEmpty#139] + +(100) Exchange +Input [10]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135, sum#138, isEmpty#139] +Arguments: hashpartitioning(c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(101) HashAggregate [codegen id : 23] +Input [10]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135, sum#138, isEmpty#139] +Keys [8]: [c_customer_id#121, c_first_name#122, c_last_name#123, c_preferred_cust_flag#124, c_birth_country#125, c_login#126, c_email_address#127, d_year#135] +Functions [1]: [sum(((((ws_ext_list_price#132 - ws_ext_wholesale_cost#131) - ws_ext_discount_amt#129) + ws_ext_sales_price#130) / 2))] +Aggregate Attributes [1]: [sum(((((ws_ext_list_price#132 - ws_ext_wholesale_cost#131) - ws_ext_discount_amt#129) + ws_ext_sales_price#130) / 2))#117] +Results [2]: [c_customer_id#121 AS customer_id#140, sum(((((ws_ext_list_price#132 - ws_ext_wholesale_cost#131) - ws_ext_discount_amt#129) + ws_ext_sales_price#130) / 2))#117 AS year_total#141] + +(102) BroadcastExchange +Input [2]: [customer_id#140, year_total#141] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=16] + +(103) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#22] +Right keys [1]: [customer_id#140] +Join type: Inner +Join condition: (CASE WHEN (year_total#74 > 0.000000) THEN (year_total#96 / year_total#74) END > CASE WHEN (year_total#119 > 0.000000) THEN (year_total#141 / year_total#119) END) + +(104) Project [codegen id : 24] +Output [7]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50] +Input [13]: [customer_id#22, customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#74, year_total#96, year_total#119, customer_id#140, year_total#141] + +(105) TakeOrderedAndProject +Input [7]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50] +Arguments: 100, [customer_id#44 ASC NULLS FIRST, customer_first_name#45 ASC NULLS FIRST, customer_last_name#46 ASC NULLS FIRST, customer_preferred_cust_flag#47 ASC NULLS FIRST, customer_birth_country#48 ASC NULLS FIRST, customer_login#49 ASC NULLS FIRST, customer_email_address#50 ASC NULLS FIRST], [customer_id#44, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..1b4ee994f6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.native_iceberg_compat/simplified.txt @@ -0,0 +1,156 @@ +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + WholeStageCodegen (24) + Project [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + Project [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2)),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,cs_ext_list_price,cs_ext_wholesale_cost,cs_ext_discount_amt,cs_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,cs_ext_list_price,cs_ext_wholesale_cost,cs_ext_discount_amt,cs_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (19) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #13 + WholeStageCodegen (18) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_discount_amt,ws_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (16) + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (23) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #16 + WholeStageCodegen (22) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_discount_amt,ws_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk] #14 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_datafusion/explain.txt new file mode 100644 index 0000000000..32718e056b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_datafusion/explain.txt @@ -0,0 +1,208 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * Project (26) + : +- * BroadcastHashJoin Inner BuildRight (25) + : :- * Project (19) + : : +- * BroadcastHashJoin Inner BuildRight (18) + : : :- * Project (13) + : : : +- * SortMergeJoin LeftOuter (12) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : +- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.catalog_returns (6) + : : +- BroadcastExchange (17) + : : +- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet spark_catalog.default.warehouse (14) + : +- BroadcastExchange (24) + : +- * Project (23) + : +- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet spark_catalog.default.item (20) + +- BroadcastExchange (30) + +- * Filter (29) + +- * ColumnarToRow (28) + +- Scan parquet spark_catalog.default.date_dim (27) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] + +(3) Filter [codegen id : 1] +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Condition : (isnotnull(cs_warehouse_sk#1) AND isnotnull(cs_item_sk#2)) + +(4) Exchange +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] + +(8) Filter [codegen id : 3] +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Condition : (isnotnull(cr_order_number#7) AND isnotnull(cr_item_sk#6)) + +(9) Project [codegen id : 3] +Output [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] + +(10) Exchange +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: hashpartitioning(cr_order_number#7, cr_item_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cr_order_number#7 ASC NULLS FIRST, cr_item_sk#6 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 8] +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#7, cr_item_sk#6] +Join type: LeftOuter +Join condition: None + +(13) Project [codegen id : 8] +Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8] +Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] + +(14) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#10, w_state#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [2]: [w_warehouse_sk#10, w_state#11] + +(16) Filter [codegen id : 5] +Input [2]: [w_warehouse_sk#10, w_state#11] +Condition : isnotnull(w_warehouse_sk#10) + +(17) BroadcastExchange +Input [2]: [w_warehouse_sk#10, w_state#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(18) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_warehouse_sk#1] +Right keys [1]: [w_warehouse_sk#10] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 8] +Output [5]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11] +Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_warehouse_sk#10, w_state#11] + +(20) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] + +(22) Filter [codegen id : 6] +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Condition : (((isnotnull(i_current_price#14) AND (i_current_price#14 >= 0.99)) AND (i_current_price#14 <= 1.49)) AND isnotnull(i_item_sk#12)) + +(23) Project [codegen id : 6] +Output [2]: [i_item_sk#12, i_item_id#13] +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] + +(24) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 8] +Output [5]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13] +Input [7]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_sk#12, i_item_id#13] + +(27) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_date#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#15, d_date#16] + +(29) Filter [codegen id : 7] +Input [2]: [d_date_sk#15, d_date#16] +Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 2000-02-10)) AND (d_date#16 <= 2000-04-10)) AND isnotnull(d_date_sk#15)) + +(30) BroadcastExchange +Input [2]: [d_date_sk#15, d_date#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 8] +Output [5]: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] +Input [7]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date_sk#15, d_date#16] + +(33) HashAggregate [codegen id : 8] +Input [5]: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] +Keys [2]: [w_state#11, i_item_id#13] +Functions [2]: [partial_sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)] +Aggregate Attributes [4]: [sum#17, isEmpty#18, sum#19, isEmpty#20] +Results [6]: [w_state#11, i_item_id#13, sum#21, isEmpty#22, sum#23, isEmpty#24] + +(34) Exchange +Input [6]: [w_state#11, i_item_id#13, sum#21, isEmpty#22, sum#23, isEmpty#24] +Arguments: hashpartitioning(w_state#11, i_item_id#13, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) HashAggregate [codegen id : 9] +Input [6]: [w_state#11, i_item_id#13, sum#21, isEmpty#22, sum#23, isEmpty#24] +Keys [2]: [w_state#11, i_item_id#13] +Functions [2]: [sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END), sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#25, sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#26] +Results [4]: [w_state#11, i_item_id#13, sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#25 AS sales_before#27, sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#26 AS sales_after#28] + +(36) TakeOrderedAndProject +Input [4]: [w_state#11, i_item_id#13, sales_before#27, sales_after#28] +Arguments: 100, [w_state#11 ASC NULLS FIRST, i_item_id#13 ASC NULLS FIRST], [w_state#11, i_item_id#13, sales_before#27, sales_after#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6645ee24b7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_datafusion/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] + WholeStageCodegen (9) + HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_state,i_item_id] #1 + WholeStageCodegen (8) + HashAggregate [w_state,i_item_id,d_date,cs_sales_price,cr_refunded_cash] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Project [cs_sales_price,cr_refunded_cash,w_state,i_item_id,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Project [cs_warehouse_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #2 + WholeStageCodegen (1) + Filter [cs_warehouse_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #3 + WholeStageCodegen (3) + Project [cr_item_sk,cr_order_number,cr_refunded_cash] + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [i_item_sk,i_item_id] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_current_price] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..32718e056b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_iceberg_compat/explain.txt @@ -0,0 +1,208 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * Project (26) + : +- * BroadcastHashJoin Inner BuildRight (25) + : :- * Project (19) + : : +- * BroadcastHashJoin Inner BuildRight (18) + : : :- * Project (13) + : : : +- * SortMergeJoin LeftOuter (12) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : +- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.catalog_returns (6) + : : +- BroadcastExchange (17) + : : +- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet spark_catalog.default.warehouse (14) + : +- BroadcastExchange (24) + : +- * Project (23) + : +- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet spark_catalog.default.item (20) + +- BroadcastExchange (30) + +- * Filter (29) + +- * ColumnarToRow (28) + +- Scan parquet spark_catalog.default.date_dim (27) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] + +(3) Filter [codegen id : 1] +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Condition : (isnotnull(cs_warehouse_sk#1) AND isnotnull(cs_item_sk#2)) + +(4) Exchange +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] +Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] + +(8) Filter [codegen id : 3] +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] +Condition : (isnotnull(cr_order_number#7) AND isnotnull(cr_item_sk#6)) + +(9) Project [codegen id : 3] +Output [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Input [4]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8, cr_returned_date_sk#9] + +(10) Exchange +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: hashpartitioning(cr_order_number#7, cr_item_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: [cr_order_number#7 ASC NULLS FIRST, cr_item_sk#6 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 8] +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#7, cr_item_sk#6] +Join type: LeftOuter +Join condition: None + +(13) Project [codegen id : 8] +Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8] +Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] + +(14) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#10, w_state#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [2]: [w_warehouse_sk#10, w_state#11] + +(16) Filter [codegen id : 5] +Input [2]: [w_warehouse_sk#10, w_state#11] +Condition : isnotnull(w_warehouse_sk#10) + +(17) BroadcastExchange +Input [2]: [w_warehouse_sk#10, w_state#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(18) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_warehouse_sk#1] +Right keys [1]: [w_warehouse_sk#10] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 8] +Output [5]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11] +Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_warehouse_sk#10, w_state#11] + +(20) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] + +(22) Filter [codegen id : 6] +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] +Condition : (((isnotnull(i_current_price#14) AND (i_current_price#14 >= 0.99)) AND (i_current_price#14 <= 1.49)) AND isnotnull(i_item_sk#12)) + +(23) Project [codegen id : 6] +Output [2]: [i_item_sk#12, i_item_id#13] +Input [3]: [i_item_sk#12, i_item_id#13, i_current_price#14] + +(24) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 8] +Output [5]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13] +Input [7]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_sk#12, i_item_id#13] + +(27) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_date#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#15, d_date#16] + +(29) Filter [codegen id : 7] +Input [2]: [d_date_sk#15, d_date#16] +Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 2000-02-10)) AND (d_date#16 <= 2000-04-10)) AND isnotnull(d_date_sk#15)) + +(30) BroadcastExchange +Input [2]: [d_date_sk#15, d_date#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 8] +Output [5]: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] +Input [7]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date_sk#15, d_date#16] + +(33) HashAggregate [codegen id : 8] +Input [5]: [cs_sales_price#4, cr_refunded_cash#8, w_state#11, i_item_id#13, d_date#16] +Keys [2]: [w_state#11, i_item_id#13] +Functions [2]: [partial_sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)] +Aggregate Attributes [4]: [sum#17, isEmpty#18, sum#19, isEmpty#20] +Results [6]: [w_state#11, i_item_id#13, sum#21, isEmpty#22, sum#23, isEmpty#24] + +(34) Exchange +Input [6]: [w_state#11, i_item_id#13, sum#21, isEmpty#22, sum#23, isEmpty#24] +Arguments: hashpartitioning(w_state#11, i_item_id#13, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) HashAggregate [codegen id : 9] +Input [6]: [w_state#11, i_item_id#13, sum#21, isEmpty#22, sum#23, isEmpty#24] +Keys [2]: [w_state#11, i_item_id#13] +Functions [2]: [sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END), sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#25, sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#26] +Results [4]: [w_state#11, i_item_id#13, sum(CASE WHEN (d_date#16 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#25 AS sales_before#27, sum(CASE WHEN (d_date#16 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00)) ELSE 0.00 END)#26 AS sales_after#28] + +(36) TakeOrderedAndProject +Input [4]: [w_state#11, i_item_id#13, sales_before#27, sales_after#28] +Arguments: 100, [w_state#11 ASC NULLS FIRST, i_item_id#13 ASC NULLS FIRST], [w_state#11, i_item_id#13, sales_before#27, sales_after#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..6645ee24b7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.native_iceberg_compat/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] + WholeStageCodegen (9) + HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_state,i_item_id] #1 + WholeStageCodegen (8) + HashAggregate [w_state,i_item_id,d_date,cs_sales_price,cr_refunded_cash] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Project [cs_sales_price,cr_refunded_cash,w_state,i_item_id,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash,w_state] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Project [cs_warehouse_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk,cr_refunded_cash] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #2 + WholeStageCodegen (1) + Filter [cs_warehouse_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #3 + WholeStageCodegen (3) + Project [cr_item_sk,cr_order_number,cr_refunded_cash] + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [i_item_sk,i_item_id] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_current_price] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_datafusion/explain.txt new file mode 100644 index 0000000000..71cad8f2f0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_datafusion/explain.txt @@ -0,0 +1,121 @@ +== Physical Plan == +TakeOrderedAndProject (20) ++- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (4) + : +- * Filter (3) + : +- * ColumnarToRow (2) + : +- Scan parquet spark_catalog.default.item (1) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * HashAggregate (11) + +- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet spark_catalog.default.item (5) + + +(1) Scan parquet spark_catalog.default.item +Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,778), IsNotNull(i_manufact)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(3) Filter [codegen id : 3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_manufact_id#1 <= 778)) AND isnotnull(i_manufact#2)) + +(4) Project [codegen id : 3] +Output [2]: [i_manufact#2, i_product_name#3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(5) Scan parquet spark_catalog.default.item +Output [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(Or(And(EqualTo(i_category,Women ),Or(And(And(Or(EqualTo(i_color,powder ),EqualTo(i_color,khaki )),Or(EqualTo(i_units,Ounce ),EqualTo(i_units,Oz ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large ))),And(And(Or(EqualTo(i_color,brown ),EqualTo(i_color,honeydew )),Or(EqualTo(i_units,Bunch ),EqualTo(i_units,Ton ))),Or(EqualTo(i_size,N/A ),EqualTo(i_size,small ))))),And(EqualTo(i_category,Men ),Or(And(And(Or(EqualTo(i_color,floral ),EqualTo(i_color,deep )),Or(EqualTo(i_units,N/A ),EqualTo(i_units,Dozen ))),Or(EqualTo(i_size,petite ),EqualTo(i_size,large ))),And(And(Or(EqualTo(i_color,light ),EqualTo(i_color,cornflower )),Or(EqualTo(i_units,Box ),EqualTo(i_units,Pound ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large )))))),Or(And(EqualTo(i_category,Women ),Or(And(And(Or(EqualTo(i_color,midnight ),EqualTo(i_color,snow )),Or(EqualTo(i_units,Pallet ),EqualTo(i_units,Gross ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large ))),And(And(Or(EqualTo(i_color,cyan ),EqualTo(i_color,papaya )),Or(EqualTo(i_units,Cup ),EqualTo(i_units,Dram ))),Or(EqualTo(i_size,N/A ),EqualTo(i_size,small ))))),And(EqualTo(i_category,Men ),Or(And(And(Or(EqualTo(i_color,orange ),EqualTo(i_color,frosted )),Or(EqualTo(i_units,Each ),EqualTo(i_units,Tbl ))),Or(EqualTo(i_size,petite ),EqualTo(i_size,large ))),And(And(Or(EqualTo(i_color,forest ),EqualTo(i_color,ghost )),Or(EqualTo(i_units,Lb ),EqualTo(i_units,Bundle ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large ))))))), IsNotNull(i_manufact)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] + +(7) Filter [codegen id : 1] +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Condition : (((((i_category#4 = Women ) AND (((((i_color#7 = powder ) OR (i_color#7 = khaki )) AND ((i_units#8 = Ounce ) OR (i_units#8 = Oz ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))) OR ((((i_color#7 = brown ) OR (i_color#7 = honeydew )) AND ((i_units#8 = Bunch ) OR (i_units#8 = Ton ))) AND ((i_size#6 = N/A ) OR (i_size#6 = small ))))) OR ((i_category#4 = Men ) AND (((((i_color#7 = floral ) OR (i_color#7 = deep )) AND ((i_units#8 = N/A ) OR (i_units#8 = Dozen ))) AND ((i_size#6 = petite ) OR (i_size#6 = large ))) OR ((((i_color#7 = light ) OR (i_color#7 = cornflower )) AND ((i_units#8 = Box ) OR (i_units#8 = Pound ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large )))))) OR (((i_category#4 = Women ) AND (((((i_color#7 = midnight ) OR (i_color#7 = snow )) AND ((i_units#8 = Pallet ) OR (i_units#8 = Gross ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))) OR ((((i_color#7 = cyan ) OR (i_color#7 = papaya )) AND ((i_units#8 = Cup ) OR (i_units#8 = Dram ))) AND ((i_size#6 = N/A ) OR (i_size#6 = small ))))) OR ((i_category#4 = Men ) AND (((((i_color#7 = orange ) OR (i_color#7 = frosted )) AND ((i_units#8 = Each ) OR (i_units#8 = Tbl ))) AND ((i_size#6 = petite ) OR (i_size#6 = large ))) OR ((((i_color#7 = forest ) OR (i_color#7 = ghost )) AND ((i_units#8 = Lb ) OR (i_units#8 = Bundle ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))))))) AND isnotnull(i_manufact#5)) + +(8) Project [codegen id : 1] +Output [1]: [i_manufact#5] +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] + +(9) HashAggregate [codegen id : 1] +Input [1]: [i_manufact#5] +Keys [1]: [i_manufact#5] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#9] +Results [2]: [i_manufact#5, count#10] + +(10) Exchange +Input [2]: [i_manufact#5, count#10] +Arguments: hashpartitioning(i_manufact#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(11) HashAggregate [codegen id : 2] +Input [2]: [i_manufact#5, count#10] +Keys [1]: [i_manufact#5] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#11] +Results [2]: [count(1)#11 AS item_cnt#12, i_manufact#5] + +(12) Filter [codegen id : 2] +Input [2]: [item_cnt#12, i_manufact#5] +Condition : (item_cnt#12 > 0) + +(13) Project [codegen id : 2] +Output [1]: [i_manufact#5] +Input [2]: [item_cnt#12, i_manufact#5] + +(14) BroadcastExchange +Input [1]: [i_manufact#5] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_manufact#2] +Right keys [1]: [i_manufact#5] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [1]: [i_product_name#3] +Input [3]: [i_manufact#2, i_product_name#3, i_manufact#5] + +(17) HashAggregate [codegen id : 3] +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] +Aggregate Attributes: [] +Results [1]: [i_product_name#3] + +(18) Exchange +Input [1]: [i_product_name#3] +Arguments: hashpartitioning(i_product_name#3, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] +Aggregate Attributes: [] +Results [1]: [i_product_name#3] + +(20) TakeOrderedAndProject +Input [1]: [i_product_name#3] +Arguments: 100, [i_product_name#3 ASC NULLS FIRST], [i_product_name#3] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_datafusion/simplified.txt new file mode 100644 index 0000000000..e287c0fd05 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_datafusion/simplified.txt @@ -0,0 +1,29 @@ +TakeOrderedAndProject [i_product_name] + WholeStageCodegen (4) + HashAggregate [i_product_name] + InputAdapter + Exchange [i_product_name] #1 + WholeStageCodegen (3) + HashAggregate [i_product_name] + Project [i_product_name] + BroadcastHashJoin [i_manufact,i_manufact] + Project [i_manufact,i_product_name] + Filter [i_manufact_id,i_manufact] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_manufact_id,i_manufact,i_product_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [i_manufact] + Filter [item_cnt] + HashAggregate [i_manufact,count] [count(1),item_cnt,count] + InputAdapter + Exchange [i_manufact] #3 + WholeStageCodegen (1) + HashAggregate [i_manufact] [count,count] + Project [i_manufact] + Filter [i_category,i_color,i_units,i_size,i_manufact] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_category,i_manufact,i_size,i_color,i_units] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..71cad8f2f0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_iceberg_compat/explain.txt @@ -0,0 +1,121 @@ +== Physical Plan == +TakeOrderedAndProject (20) ++- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (4) + : +- * Filter (3) + : +- * ColumnarToRow (2) + : +- Scan parquet spark_catalog.default.item (1) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * HashAggregate (11) + +- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet spark_catalog.default.item (5) + + +(1) Scan parquet spark_catalog.default.item +Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,778), IsNotNull(i_manufact)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(3) Filter [codegen id : 3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_manufact_id#1 <= 778)) AND isnotnull(i_manufact#2)) + +(4) Project [codegen id : 3] +Output [2]: [i_manufact#2, i_product_name#3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(5) Scan parquet spark_catalog.default.item +Output [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(Or(And(EqualTo(i_category,Women ),Or(And(And(Or(EqualTo(i_color,powder ),EqualTo(i_color,khaki )),Or(EqualTo(i_units,Ounce ),EqualTo(i_units,Oz ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large ))),And(And(Or(EqualTo(i_color,brown ),EqualTo(i_color,honeydew )),Or(EqualTo(i_units,Bunch ),EqualTo(i_units,Ton ))),Or(EqualTo(i_size,N/A ),EqualTo(i_size,small ))))),And(EqualTo(i_category,Men ),Or(And(And(Or(EqualTo(i_color,floral ),EqualTo(i_color,deep )),Or(EqualTo(i_units,N/A ),EqualTo(i_units,Dozen ))),Or(EqualTo(i_size,petite ),EqualTo(i_size,large ))),And(And(Or(EqualTo(i_color,light ),EqualTo(i_color,cornflower )),Or(EqualTo(i_units,Box ),EqualTo(i_units,Pound ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large )))))),Or(And(EqualTo(i_category,Women ),Or(And(And(Or(EqualTo(i_color,midnight ),EqualTo(i_color,snow )),Or(EqualTo(i_units,Pallet ),EqualTo(i_units,Gross ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large ))),And(And(Or(EqualTo(i_color,cyan ),EqualTo(i_color,papaya )),Or(EqualTo(i_units,Cup ),EqualTo(i_units,Dram ))),Or(EqualTo(i_size,N/A ),EqualTo(i_size,small ))))),And(EqualTo(i_category,Men ),Or(And(And(Or(EqualTo(i_color,orange ),EqualTo(i_color,frosted )),Or(EqualTo(i_units,Each ),EqualTo(i_units,Tbl ))),Or(EqualTo(i_size,petite ),EqualTo(i_size,large ))),And(And(Or(EqualTo(i_color,forest ),EqualTo(i_color,ghost )),Or(EqualTo(i_units,Lb ),EqualTo(i_units,Bundle ))),Or(EqualTo(i_size,medium ),EqualTo(i_size,extra large ))))))), IsNotNull(i_manufact)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] + +(7) Filter [codegen id : 1] +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] +Condition : (((((i_category#4 = Women ) AND (((((i_color#7 = powder ) OR (i_color#7 = khaki )) AND ((i_units#8 = Ounce ) OR (i_units#8 = Oz ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))) OR ((((i_color#7 = brown ) OR (i_color#7 = honeydew )) AND ((i_units#8 = Bunch ) OR (i_units#8 = Ton ))) AND ((i_size#6 = N/A ) OR (i_size#6 = small ))))) OR ((i_category#4 = Men ) AND (((((i_color#7 = floral ) OR (i_color#7 = deep )) AND ((i_units#8 = N/A ) OR (i_units#8 = Dozen ))) AND ((i_size#6 = petite ) OR (i_size#6 = large ))) OR ((((i_color#7 = light ) OR (i_color#7 = cornflower )) AND ((i_units#8 = Box ) OR (i_units#8 = Pound ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large )))))) OR (((i_category#4 = Women ) AND (((((i_color#7 = midnight ) OR (i_color#7 = snow )) AND ((i_units#8 = Pallet ) OR (i_units#8 = Gross ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))) OR ((((i_color#7 = cyan ) OR (i_color#7 = papaya )) AND ((i_units#8 = Cup ) OR (i_units#8 = Dram ))) AND ((i_size#6 = N/A ) OR (i_size#6 = small ))))) OR ((i_category#4 = Men ) AND (((((i_color#7 = orange ) OR (i_color#7 = frosted )) AND ((i_units#8 = Each ) OR (i_units#8 = Tbl ))) AND ((i_size#6 = petite ) OR (i_size#6 = large ))) OR ((((i_color#7 = forest ) OR (i_color#7 = ghost )) AND ((i_units#8 = Lb ) OR (i_units#8 = Bundle ))) AND ((i_size#6 = medium ) OR (i_size#6 = extra large ))))))) AND isnotnull(i_manufact#5)) + +(8) Project [codegen id : 1] +Output [1]: [i_manufact#5] +Input [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8] + +(9) HashAggregate [codegen id : 1] +Input [1]: [i_manufact#5] +Keys [1]: [i_manufact#5] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#9] +Results [2]: [i_manufact#5, count#10] + +(10) Exchange +Input [2]: [i_manufact#5, count#10] +Arguments: hashpartitioning(i_manufact#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(11) HashAggregate [codegen id : 2] +Input [2]: [i_manufact#5, count#10] +Keys [1]: [i_manufact#5] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#11] +Results [2]: [count(1)#11 AS item_cnt#12, i_manufact#5] + +(12) Filter [codegen id : 2] +Input [2]: [item_cnt#12, i_manufact#5] +Condition : (item_cnt#12 > 0) + +(13) Project [codegen id : 2] +Output [1]: [i_manufact#5] +Input [2]: [item_cnt#12, i_manufact#5] + +(14) BroadcastExchange +Input [1]: [i_manufact#5] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_manufact#2] +Right keys [1]: [i_manufact#5] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [1]: [i_product_name#3] +Input [3]: [i_manufact#2, i_product_name#3, i_manufact#5] + +(17) HashAggregate [codegen id : 3] +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] +Aggregate Attributes: [] +Results [1]: [i_product_name#3] + +(18) Exchange +Input [1]: [i_product_name#3] +Arguments: hashpartitioning(i_product_name#3, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] +Aggregate Attributes: [] +Results [1]: [i_product_name#3] + +(20) TakeOrderedAndProject +Input [1]: [i_product_name#3] +Arguments: 100, [i_product_name#3 ASC NULLS FIRST], [i_product_name#3] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..e287c0fd05 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.native_iceberg_compat/simplified.txt @@ -0,0 +1,29 @@ +TakeOrderedAndProject [i_product_name] + WholeStageCodegen (4) + HashAggregate [i_product_name] + InputAdapter + Exchange [i_product_name] #1 + WholeStageCodegen (3) + HashAggregate [i_product_name] + Project [i_product_name] + BroadcastHashJoin [i_manufact,i_manufact] + Project [i_manufact,i_product_name] + Filter [i_manufact_id,i_manufact] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_manufact_id,i_manufact,i_product_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [i_manufact] + Filter [item_cnt] + HashAggregate [i_manufact,count] [count(1),item_cnt,count] + InputAdapter + Exchange [i_manufact] #3 + WholeStageCodegen (1) + HashAggregate [i_manufact] [count,count] + Project [i_manufact] + Filter [i_category,i_color,i_units,i_size,i_manufact] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_category,i_manufact,i_size,i_color,i_units] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_datafusion/explain.txt new file mode 100644 index 0000000000..327c17b215 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_datafusion/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet spark_catalog.default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.item (11) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(8) BroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] + +(15) BroadcastExchange +Input [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_category_id#8, i_category#9] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] + +(19) Exchange +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] +Arguments: hashpartitioning(d_year#2, i_category_id#8, i_category#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_category_id#8, i_category#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS sum(ss_ext_sales_price)#14] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#14] +Arguments: 100, [sum(ss_ext_sales_price)#14 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST, i_category#9 ASC NULLS FIRST], [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_datafusion/simplified.txt new file mode 100644 index 0000000000..aaf2b9dc2f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_datafusion/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category] + WholeStageCodegen (4) + HashAggregate [d_year,i_category_id,i_category,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] + InputAdapter + Exchange [d_year,i_category_id,i_category] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_category_id,i_category,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_category_id,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_category_id,i_category] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_category_id,i_category,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..327c17b215 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_iceberg_compat/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet spark_catalog.default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.item (11) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(8) BroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] + +(15) BroadcastExchange +Input [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_category_id#8, i_category#9] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] + +(19) Exchange +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] +Arguments: hashpartitioning(d_year#2, i_category_id#8, i_category#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_category_id#8, i_category#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS sum(ss_ext_sales_price)#14] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#14] +Arguments: 100, [sum(ss_ext_sales_price)#14 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST, i_category#9 ASC NULLS FIRST], [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..aaf2b9dc2f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.native_iceberg_compat/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category] + WholeStageCodegen (4) + HashAggregate [d_year,i_category_id,i_category,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] + InputAdapter + Exchange [d_year,i_category_id,i_category] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_category_id,i_category,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_category_id,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_category_id,i_category] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_category_id,i_category,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_datafusion/explain.txt new file mode 100644 index 0000000000..b21f03173d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_datafusion/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet spark_catalog.default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.store (11) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_day_name#3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_store_sk#4) + +(8) BroadcastExchange +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5] +Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(11) Scan parquet spark_catalog.default.store +Output [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Condition : ((isnotnull(s_gmt_offset#10) AND (s_gmt_offset#10 = -5.00)) AND isnotnull(s_store_sk#7)) + +(14) Project [codegen id : 2] +Output [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] + +(15) BroadcastExchange +Input [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#7] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#7, s_store_id#8, s_store_name#9] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Keys [2]: [s_store_name#9, s_store_id#8] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] +Aggregate Attributes [7]: [sum#11, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17] +Results [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] + +(19) Exchange +Input [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(s_store_name#9, s_store_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [2]: [s_store_name#9, s_store_id#8] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#31] +Results [9]: [s_store_name#9, s_store_id#8, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#25,17,2) AS sun_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#26,17,2) AS mon_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#27,17,2) AS tue_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#28,17,2) AS wed_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#29,17,2) AS thu_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#30,17,2) AS fri_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#31,17,2) AS sat_sales#38] + +(21) TakeOrderedAndProject +Input [9]: [s_store_name#9, s_store_id#8, sun_sales#32, mon_sales#33, tue_sales#34, wed_sales#35, thu_sales#36, fri_sales#37, sat_sales#38] +Arguments: 100, [s_store_name#9 ASC NULLS FIRST, s_store_id#8 ASC NULLS FIRST, sun_sales#32 ASC NULLS FIRST, mon_sales#33 ASC NULLS FIRST, tue_sales#34 ASC NULLS FIRST, wed_sales#35 ASC NULLS FIRST, thu_sales#36 ASC NULLS FIRST, fri_sales#37 ASC NULLS FIRST, sat_sales#38 ASC NULLS FIRST], [s_store_name#9, s_store_id#8, sun_sales#32, mon_sales#33, tue_sales#34, wed_sales#35, thu_sales#36, fri_sales#37, sat_sales#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_datafusion/simplified.txt new file mode 100644 index 0000000000..3e73642c4f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_datafusion/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + WholeStageCodegen (4) + HashAggregate [s_store_name,s_store_id,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_name,s_store_id] #1 + WholeStageCodegen (3) + HashAggregate [s_store_name,s_store_id,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,ss_sales_price,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_day_name,ss_store_sk,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_day_name] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_day_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk,s_store_id,s_store_name] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..b21f03173d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_iceberg_compat/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet spark_catalog.default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.store (11) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_day_name#3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_store_sk#4) + +(8) BroadcastExchange +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5] +Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(11) Scan parquet spark_catalog.default.store +Output [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Condition : ((isnotnull(s_gmt_offset#10) AND (s_gmt_offset#10 = -5.00)) AND isnotnull(s_store_sk#7)) + +(14) Project [codegen id : 2] +Output [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] + +(15) BroadcastExchange +Input [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#7] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#7, s_store_id#8, s_store_name#9] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Keys [2]: [s_store_name#9, s_store_id#8] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] +Aggregate Attributes [7]: [sum#11, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17] +Results [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] + +(19) Exchange +Input [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(s_store_name#9, s_store_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [2]: [s_store_name#9, s_store_id#8] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#31] +Results [9]: [s_store_name#9, s_store_id#8, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#25,17,2) AS sun_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#26,17,2) AS mon_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#27,17,2) AS tue_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#28,17,2) AS wed_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#29,17,2) AS thu_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#30,17,2) AS fri_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#31,17,2) AS sat_sales#38] + +(21) TakeOrderedAndProject +Input [9]: [s_store_name#9, s_store_id#8, sun_sales#32, mon_sales#33, tue_sales#34, wed_sales#35, thu_sales#36, fri_sales#37, sat_sales#38] +Arguments: 100, [s_store_name#9 ASC NULLS FIRST, s_store_id#8 ASC NULLS FIRST, sun_sales#32 ASC NULLS FIRST, mon_sales#33 ASC NULLS FIRST, tue_sales#34 ASC NULLS FIRST, wed_sales#35 ASC NULLS FIRST, thu_sales#36 ASC NULLS FIRST, fri_sales#37 ASC NULLS FIRST, sat_sales#38 ASC NULLS FIRST], [s_store_name#9, s_store_id#8, sun_sales#32, mon_sales#33, tue_sales#34, wed_sales#35, thu_sales#36, fri_sales#37, sat_sales#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..3e73642c4f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.native_iceberg_compat/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + WholeStageCodegen (4) + HashAggregate [s_store_name,s_store_id,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_name,s_store_id] #1 + WholeStageCodegen (3) + HashAggregate [s_store_name,s_store_id,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,ss_sales_price,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_day_name,ss_store_sk,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_day_name] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_day_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk,s_store_id,s_store_name] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_datafusion/explain.txt new file mode 100644 index 0000000000..4eda34a7e5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_datafusion/explain.txt @@ -0,0 +1,226 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * BroadcastHashJoin Inner BuildRight (30) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * SortMergeJoin Inner (21) + : : :- * Sort (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- Window (11) + : : : +- * Sort (10) + : : : +- Exchange (9) + : : : +- * Filter (8) + : : : +- * HashAggregate (7) + : : : +- Exchange (6) + : : : +- * HashAggregate (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- * Sort (20) + : : +- * Project (19) + : : +- * Filter (18) + : : +- Window (17) + : : +- * Sort (16) + : : +- ReusedExchange (15) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.item (23) + +- ReusedExchange (29) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) + +(4) Project [codegen id : 1] +Output [2]: [ss_item_sk#1, ss_net_profit#3] +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] + +(5) HashAggregate [codegen id : 1] +Input [2]: [ss_item_sk#1, ss_net_profit#3] +Keys [1]: [ss_item_sk#1] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#5, count#6] +Results [3]: [ss_item_sk#1, sum#7, count#8] + +(6) Exchange +Input [3]: [ss_item_sk#1, sum#7, count#8] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(7) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#1, sum#7, count#8] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#9] +Results [2]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS rank_col#11] + +(8) Filter [codegen id : 2] +Input [2]: [item_sk#10, rank_col#11] +Condition : (isnotnull(rank_col#11) AND (cast(rank_col#11 as decimal(13,7)) > (0.9 * Subquery scalar-subquery#12, [id=#13]))) + +(9) Exchange +Input [2]: [item_sk#10, rank_col#11] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(10) Sort [codegen id : 3] +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], false, 0 + +(11) Window +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#14], [rank_col#11 ASC NULLS FIRST] + +(12) Filter [codegen id : 4] +Input [3]: [item_sk#10, rank_col#11, rnk#14] +Condition : ((rnk#14 < 11) AND isnotnull(item_sk#10)) + +(13) Project [codegen id : 4] +Output [2]: [item_sk#10, rnk#14] +Input [3]: [item_sk#10, rank_col#11, rnk#14] + +(14) Sort [codegen id : 4] +Input [2]: [item_sk#10, rnk#14] +Arguments: [rnk#14 ASC NULLS FIRST], false, 0 + +(15) ReusedExchange [Reuses operator id: 9] +Output [2]: [item_sk#15, rank_col#16] + +(16) Sort [codegen id : 7] +Input [2]: [item_sk#15, rank_col#16] +Arguments: [rank_col#16 DESC NULLS LAST], false, 0 + +(17) Window +Input [2]: [item_sk#15, rank_col#16] +Arguments: [rank(rank_col#16) windowspecdefinition(rank_col#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#17], [rank_col#16 DESC NULLS LAST] + +(18) Filter [codegen id : 8] +Input [3]: [item_sk#15, rank_col#16, rnk#17] +Condition : ((rnk#17 < 11) AND isnotnull(item_sk#15)) + +(19) Project [codegen id : 8] +Output [2]: [item_sk#15, rnk#17] +Input [3]: [item_sk#15, rank_col#16, rnk#17] + +(20) Sort [codegen id : 8] +Input [2]: [item_sk#15, rnk#17] +Arguments: [rnk#17 ASC NULLS FIRST], false, 0 + +(21) SortMergeJoin [codegen id : 11] +Left keys [1]: [rnk#14] +Right keys [1]: [rnk#17] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 11] +Output [3]: [item_sk#10, rnk#14, item_sk#15] +Input [4]: [item_sk#10, rnk#14, item_sk#15, rnk#17] + +(23) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#18, i_product_name#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 9] +Input [2]: [i_item_sk#18, i_product_name#19] + +(25) Filter [codegen id : 9] +Input [2]: [i_item_sk#18, i_product_name#19] +Condition : isnotnull(i_item_sk#18) + +(26) BroadcastExchange +Input [2]: [i_item_sk#18, i_product_name#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(27) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [item_sk#10] +Right keys [1]: [i_item_sk#18] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 11] +Output [3]: [rnk#14, item_sk#15, i_product_name#19] +Input [5]: [item_sk#10, rnk#14, item_sk#15, i_item_sk#18, i_product_name#19] + +(29) ReusedExchange [Reuses operator id: 26] +Output [2]: [i_item_sk#20, i_product_name#21] + +(30) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [item_sk#15] +Right keys [1]: [i_item_sk#20] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 11] +Output [3]: [rnk#14, i_product_name#19 AS best_performing#22, i_product_name#21 AS worst_performing#23] +Input [5]: [rnk#14, item_sk#15, i_product_name#19, i_item_sk#20, i_product_name#21] + +(32) TakeOrderedAndProject +Input [3]: [rnk#14, best_performing#22, worst_performing#23] +Arguments: 100, [rnk#14 ASC NULLS FIRST], [rnk#14, best_performing#22, worst_performing#23] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#12, [id=#13] +* HashAggregate (39) ++- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * Filter (35) + +- * ColumnarToRow (34) + +- Scan parquet spark_catalog.default.store_sales (33) + + +(33) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 1] +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] + +(35) Filter [codegen id : 1] +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] +Condition : ((isnotnull(ss_store_sk#25) AND (ss_store_sk#25 = 4)) AND isnull(ss_addr_sk#24)) + +(36) Project [codegen id : 1] +Output [2]: [ss_store_sk#25, ss_net_profit#26] +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] + +(37) HashAggregate [codegen id : 1] +Input [2]: [ss_store_sk#25, ss_net_profit#26] +Keys [1]: [ss_store_sk#25] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#26))] +Aggregate Attributes [2]: [sum#28, count#29] +Results [3]: [ss_store_sk#25, sum#30, count#31] + +(38) Exchange +Input [3]: [ss_store_sk#25, sum#30, count#31] +Arguments: hashpartitioning(ss_store_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(39) HashAggregate [codegen id : 2] +Input [3]: [ss_store_sk#25, sum#30, count#31] +Keys [1]: [ss_store_sk#25] +Functions [1]: [avg(UnscaledValue(ss_net_profit#26))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#26))#32] +Results [1]: [cast((avg(UnscaledValue(ss_net_profit#26))#32 / 100.0) as decimal(11,6)) AS rank_col#33] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_datafusion/simplified.txt new file mode 100644 index 0000000000..46d14b650b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_datafusion/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [rnk,best_performing,worst_performing] + WholeStageCodegen (11) + Project [rnk,i_product_name,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [rnk,item_sk,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [item_sk,rnk,item_sk] + SortMergeJoin [rnk,rnk] + InputAdapter + WholeStageCodegen (4) + Sort [rnk] + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WholeStageCodegen (3) + Sort [rank_col] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + Filter [rank_col] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [ss_store_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] + InputAdapter + Exchange [ss_store_sk] #3 + WholeStageCodegen (1) + HashAggregate [ss_store_sk,ss_net_profit] [sum,count,sum,count] + Project [ss_store_sk,ss_net_profit] + Filter [ss_store_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + HashAggregate [ss_item_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),item_sk,rank_col,sum,count] + InputAdapter + Exchange [ss_item_sk] #2 + WholeStageCodegen (1) + HashAggregate [ss_item_sk,ss_net_profit] [sum,count,sum,count] + Project [ss_item_sk,ss_net_profit] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (8) + Sort [rnk] + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WholeStageCodegen (7) + Sort [rank_col] + InputAdapter + ReusedExchange [item_sk,rank_col] #1 + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (9) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_product_name] + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..4eda34a7e5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_iceberg_compat/explain.txt @@ -0,0 +1,226 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * BroadcastHashJoin Inner BuildRight (30) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * SortMergeJoin Inner (21) + : : :- * Sort (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- Window (11) + : : : +- * Sort (10) + : : : +- Exchange (9) + : : : +- * Filter (8) + : : : +- * HashAggregate (7) + : : : +- Exchange (6) + : : : +- * HashAggregate (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- * Sort (20) + : : +- * Project (19) + : : +- * Filter (18) + : : +- Window (17) + : : +- * Sort (16) + : : +- ReusedExchange (15) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.item (23) + +- ReusedExchange (29) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) + +(4) Project [codegen id : 1] +Output [2]: [ss_item_sk#1, ss_net_profit#3] +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4] + +(5) HashAggregate [codegen id : 1] +Input [2]: [ss_item_sk#1, ss_net_profit#3] +Keys [1]: [ss_item_sk#1] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#5, count#6] +Results [3]: [ss_item_sk#1, sum#7, count#8] + +(6) Exchange +Input [3]: [ss_item_sk#1, sum#7, count#8] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(7) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#1, sum#7, count#8] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#9] +Results [2]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS rank_col#11] + +(8) Filter [codegen id : 2] +Input [2]: [item_sk#10, rank_col#11] +Condition : (isnotnull(rank_col#11) AND (cast(rank_col#11 as decimal(13,7)) > (0.9 * Subquery scalar-subquery#12, [id=#13]))) + +(9) Exchange +Input [2]: [item_sk#10, rank_col#11] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(10) Sort [codegen id : 3] +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], false, 0 + +(11) Window +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#14], [rank_col#11 ASC NULLS FIRST] + +(12) Filter [codegen id : 4] +Input [3]: [item_sk#10, rank_col#11, rnk#14] +Condition : ((rnk#14 < 11) AND isnotnull(item_sk#10)) + +(13) Project [codegen id : 4] +Output [2]: [item_sk#10, rnk#14] +Input [3]: [item_sk#10, rank_col#11, rnk#14] + +(14) Sort [codegen id : 4] +Input [2]: [item_sk#10, rnk#14] +Arguments: [rnk#14 ASC NULLS FIRST], false, 0 + +(15) ReusedExchange [Reuses operator id: 9] +Output [2]: [item_sk#15, rank_col#16] + +(16) Sort [codegen id : 7] +Input [2]: [item_sk#15, rank_col#16] +Arguments: [rank_col#16 DESC NULLS LAST], false, 0 + +(17) Window +Input [2]: [item_sk#15, rank_col#16] +Arguments: [rank(rank_col#16) windowspecdefinition(rank_col#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#17], [rank_col#16 DESC NULLS LAST] + +(18) Filter [codegen id : 8] +Input [3]: [item_sk#15, rank_col#16, rnk#17] +Condition : ((rnk#17 < 11) AND isnotnull(item_sk#15)) + +(19) Project [codegen id : 8] +Output [2]: [item_sk#15, rnk#17] +Input [3]: [item_sk#15, rank_col#16, rnk#17] + +(20) Sort [codegen id : 8] +Input [2]: [item_sk#15, rnk#17] +Arguments: [rnk#17 ASC NULLS FIRST], false, 0 + +(21) SortMergeJoin [codegen id : 11] +Left keys [1]: [rnk#14] +Right keys [1]: [rnk#17] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 11] +Output [3]: [item_sk#10, rnk#14, item_sk#15] +Input [4]: [item_sk#10, rnk#14, item_sk#15, rnk#17] + +(23) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#18, i_product_name#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 9] +Input [2]: [i_item_sk#18, i_product_name#19] + +(25) Filter [codegen id : 9] +Input [2]: [i_item_sk#18, i_product_name#19] +Condition : isnotnull(i_item_sk#18) + +(26) BroadcastExchange +Input [2]: [i_item_sk#18, i_product_name#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(27) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [item_sk#10] +Right keys [1]: [i_item_sk#18] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 11] +Output [3]: [rnk#14, item_sk#15, i_product_name#19] +Input [5]: [item_sk#10, rnk#14, item_sk#15, i_item_sk#18, i_product_name#19] + +(29) ReusedExchange [Reuses operator id: 26] +Output [2]: [i_item_sk#20, i_product_name#21] + +(30) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [item_sk#15] +Right keys [1]: [i_item_sk#20] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 11] +Output [3]: [rnk#14, i_product_name#19 AS best_performing#22, i_product_name#21 AS worst_performing#23] +Input [5]: [rnk#14, item_sk#15, i_product_name#19, i_item_sk#20, i_product_name#21] + +(32) TakeOrderedAndProject +Input [3]: [rnk#14, best_performing#22, worst_performing#23] +Arguments: 100, [rnk#14 ASC NULLS FIRST], [rnk#14, best_performing#22, worst_performing#23] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#12, [id=#13] +* HashAggregate (39) ++- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * Filter (35) + +- * ColumnarToRow (34) + +- Scan parquet spark_catalog.default.store_sales (33) + + +(33) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 1] +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] + +(35) Filter [codegen id : 1] +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] +Condition : ((isnotnull(ss_store_sk#25) AND (ss_store_sk#25 = 4)) AND isnull(ss_addr_sk#24)) + +(36) Project [codegen id : 1] +Output [2]: [ss_store_sk#25, ss_net_profit#26] +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] + +(37) HashAggregate [codegen id : 1] +Input [2]: [ss_store_sk#25, ss_net_profit#26] +Keys [1]: [ss_store_sk#25] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#26))] +Aggregate Attributes [2]: [sum#28, count#29] +Results [3]: [ss_store_sk#25, sum#30, count#31] + +(38) Exchange +Input [3]: [ss_store_sk#25, sum#30, count#31] +Arguments: hashpartitioning(ss_store_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(39) HashAggregate [codegen id : 2] +Input [3]: [ss_store_sk#25, sum#30, count#31] +Keys [1]: [ss_store_sk#25] +Functions [1]: [avg(UnscaledValue(ss_net_profit#26))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#26))#32] +Results [1]: [cast((avg(UnscaledValue(ss_net_profit#26))#32 / 100.0) as decimal(11,6)) AS rank_col#33] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..46d14b650b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [rnk,best_performing,worst_performing] + WholeStageCodegen (11) + Project [rnk,i_product_name,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [rnk,item_sk,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [item_sk,rnk,item_sk] + SortMergeJoin [rnk,rnk] + InputAdapter + WholeStageCodegen (4) + Sort [rnk] + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WholeStageCodegen (3) + Sort [rank_col] + InputAdapter + Exchange #1 + WholeStageCodegen (2) + Filter [rank_col] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [ss_store_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] + InputAdapter + Exchange [ss_store_sk] #3 + WholeStageCodegen (1) + HashAggregate [ss_store_sk,ss_net_profit] [sum,count,sum,count] + Project [ss_store_sk,ss_net_profit] + Filter [ss_store_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + HashAggregate [ss_item_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),item_sk,rank_col,sum,count] + InputAdapter + Exchange [ss_item_sk] #2 + WholeStageCodegen (1) + HashAggregate [ss_item_sk,ss_net_profit] [sum,count,sum,count] + Project [ss_item_sk,ss_net_profit] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (8) + Sort [rnk] + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WholeStageCodegen (7) + Sort [rank_col] + InputAdapter + ReusedExchange [item_sk,rank_col] #1 + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (9) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_product_name] + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_datafusion/explain.txt new file mode 100644 index 0000000000..0350ba69bd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_datafusion/explain.txt @@ -0,0 +1,232 @@ +== Physical Plan == +TakeOrderedAndProject (40) ++- * HashAggregate (39) + +- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * Filter (35) + +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (34) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.customer (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.customer_address (10) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.date_dim (16) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.item (23) + +- BroadcastExchange (33) + +- * Project (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet spark_catalog.default.item (29) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#5)] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] + +(3) Filter [codegen id : 6] +Input [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Condition : (isnotnull(ws_bill_customer_sk#3) AND isnotnull(ws_item_sk#2)) + +(4) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#6, c_current_addr_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Condition : (isnotnull(c_customer_sk#6) AND isnotnull(c_current_addr_sk#7)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_bill_customer_sk#3] +Right keys [1]: [c_customer_sk#6] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 6] +Output [4]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7] +Input [6]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5, c_customer_sk#6, c_current_addr_sk#7] + +(10) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Condition : isnotnull(ca_address_sk#8) + +(13) BroadcastExchange +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#7] +Right keys [1]: [ca_address_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 6] +Output [5]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10] +Input [7]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7, ca_address_sk#8, ca_city#9, ca_zip#10] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Condition : ((((isnotnull(d_qoy#13) AND isnotnull(d_year#12)) AND (d_qoy#13 = 2)) AND (d_year#12 = 2001)) AND isnotnull(d_date_sk#11)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#11] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] + +(20) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#5] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 6] +Output [4]: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10] +Input [6]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10, d_date_sk#11] + +(23) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#14, i_item_id#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#14, i_item_id#15] + +(25) Filter [codegen id : 4] +Input [2]: [i_item_sk#14, i_item_id#15] +Condition : isnotnull(i_item_sk#14) + +(26) BroadcastExchange +Input [2]: [i_item_sk#14, i_item_id#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#14] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 6] +Output [4]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15] +Input [6]: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10, i_item_sk#14, i_item_id#15] + +(29) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#16, i_item_id#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_item_sk, [11,13,17,19,2,23,29,3,5,7])] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [i_item_sk#16, i_item_id#17] + +(31) Filter [codegen id : 5] +Input [2]: [i_item_sk#16, i_item_id#17] +Condition : i_item_sk#16 IN (2,3,5,7,11,13,17,19,23,29) + +(32) Project [codegen id : 5] +Output [1]: [i_item_id#17] +Input [2]: [i_item_sk#16, i_item_id#17] + +(33) BroadcastExchange +Input [1]: [i_item_id#17] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_id#15] +Right keys [1]: [i_item_id#17] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(35) Filter [codegen id : 6] +Input [5]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15, exists#1] +Condition : (substr(ca_zip#10, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) + +(36) Project [codegen id : 6] +Output [3]: [ws_sales_price#4, ca_city#9, ca_zip#10] +Input [5]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15, exists#1] + +(37) HashAggregate [codegen id : 6] +Input [3]: [ws_sales_price#4, ca_city#9, ca_zip#10] +Keys [2]: [ca_zip#10, ca_city#9] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [ca_zip#10, ca_city#9, sum#19] + +(38) Exchange +Input [3]: [ca_zip#10, ca_city#9, sum#19] +Arguments: hashpartitioning(ca_zip#10, ca_city#9, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(39) HashAggregate [codegen id : 7] +Input [3]: [ca_zip#10, ca_city#9, sum#19] +Keys [2]: [ca_zip#10, ca_city#9] +Functions [1]: [sum(UnscaledValue(ws_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#4))#20] +Results [3]: [ca_zip#10, ca_city#9, MakeDecimal(sum(UnscaledValue(ws_sales_price#4))#20,17,2) AS sum(ws_sales_price)#21] + +(40) TakeOrderedAndProject +Input [3]: [ca_zip#10, ca_city#9, sum(ws_sales_price)#21] +Arguments: 100, [ca_zip#10 ASC NULLS FIRST, ca_city#9 ASC NULLS FIRST], [ca_zip#10, ca_city#9, sum(ws_sales_price)#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a36bd2c3ca --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_datafusion/simplified.txt @@ -0,0 +1,59 @@ +TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] + WholeStageCodegen (7) + HashAggregate [ca_zip,ca_city,sum] [sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price),sum] + InputAdapter + Exchange [ca_zip,ca_city] #1 + WholeStageCodegen (6) + HashAggregate [ca_zip,ca_city,ws_sales_price] [sum,sum] + Project [ws_sales_price,ca_city,ca_zip] + Filter [ca_zip,exists] + BroadcastHashJoin [i_item_id,i_item_id] + Project [ws_sales_price,ca_city,ca_zip,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_sales_price,ca_city,ca_zip] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_sales_price,ws_sold_date_sk,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_sales_price,ws_sold_date_sk,c_current_addr_sk] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Filter [ws_bill_customer_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city,ca_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..0350ba69bd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_iceberg_compat/explain.txt @@ -0,0 +1,232 @@ +== Physical Plan == +TakeOrderedAndProject (40) ++- * HashAggregate (39) + +- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * Filter (35) + +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (34) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.customer (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.customer_address (10) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.date_dim (16) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.item (23) + +- BroadcastExchange (33) + +- * Project (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet spark_catalog.default.item (29) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#5)] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] + +(3) Filter [codegen id : 6] +Input [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5] +Condition : (isnotnull(ws_bill_customer_sk#3) AND isnotnull(ws_item_sk#2)) + +(4) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#6, c_current_addr_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Condition : (isnotnull(c_customer_sk#6) AND isnotnull(c_current_addr_sk#7)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_bill_customer_sk#3] +Right keys [1]: [c_customer_sk#6] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 6] +Output [4]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7] +Input [6]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5, c_customer_sk#6, c_current_addr_sk#7] + +(10) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Condition : isnotnull(ca_address_sk#8) + +(13) BroadcastExchange +Input [3]: [ca_address_sk#8, ca_city#9, ca_zip#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#7] +Right keys [1]: [ca_address_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 6] +Output [5]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10] +Input [7]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#7, ca_address_sk#8, ca_city#9, ca_zip#10] + +(16) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Condition : ((((isnotnull(d_qoy#13) AND isnotnull(d_year#12)) AND (d_qoy#13 = 2)) AND (d_year#12 = 2001)) AND isnotnull(d_date_sk#11)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#11] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] + +(20) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#5] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 6] +Output [4]: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10] +Input [6]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#9, ca_zip#10, d_date_sk#11] + +(23) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#14, i_item_id#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#14, i_item_id#15] + +(25) Filter [codegen id : 4] +Input [2]: [i_item_sk#14, i_item_id#15] +Condition : isnotnull(i_item_sk#14) + +(26) BroadcastExchange +Input [2]: [i_item_sk#14, i_item_id#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#14] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 6] +Output [4]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15] +Input [6]: [ws_item_sk#2, ws_sales_price#4, ca_city#9, ca_zip#10, i_item_sk#14, i_item_id#15] + +(29) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#16, i_item_id#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_item_sk, [11,13,17,19,2,23,29,3,5,7])] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [i_item_sk#16, i_item_id#17] + +(31) Filter [codegen id : 5] +Input [2]: [i_item_sk#16, i_item_id#17] +Condition : i_item_sk#16 IN (2,3,5,7,11,13,17,19,23,29) + +(32) Project [codegen id : 5] +Output [1]: [i_item_id#17] +Input [2]: [i_item_sk#16, i_item_id#17] + +(33) BroadcastExchange +Input [1]: [i_item_id#17] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_id#15] +Right keys [1]: [i_item_id#17] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(35) Filter [codegen id : 6] +Input [5]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15, exists#1] +Condition : (substr(ca_zip#10, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) + +(36) Project [codegen id : 6] +Output [3]: [ws_sales_price#4, ca_city#9, ca_zip#10] +Input [5]: [ws_sales_price#4, ca_city#9, ca_zip#10, i_item_id#15, exists#1] + +(37) HashAggregate [codegen id : 6] +Input [3]: [ws_sales_price#4, ca_city#9, ca_zip#10] +Keys [2]: [ca_zip#10, ca_city#9] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [ca_zip#10, ca_city#9, sum#19] + +(38) Exchange +Input [3]: [ca_zip#10, ca_city#9, sum#19] +Arguments: hashpartitioning(ca_zip#10, ca_city#9, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(39) HashAggregate [codegen id : 7] +Input [3]: [ca_zip#10, ca_city#9, sum#19] +Keys [2]: [ca_zip#10, ca_city#9] +Functions [1]: [sum(UnscaledValue(ws_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#4))#20] +Results [3]: [ca_zip#10, ca_city#9, MakeDecimal(sum(UnscaledValue(ws_sales_price#4))#20,17,2) AS sum(ws_sales_price)#21] + +(40) TakeOrderedAndProject +Input [3]: [ca_zip#10, ca_city#9, sum(ws_sales_price)#21] +Arguments: 100, [ca_zip#10 ASC NULLS FIRST, ca_city#9 ASC NULLS FIRST], [ca_zip#10, ca_city#9, sum(ws_sales_price)#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a36bd2c3ca --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.native_iceberg_compat/simplified.txt @@ -0,0 +1,59 @@ +TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] + WholeStageCodegen (7) + HashAggregate [ca_zip,ca_city,sum] [sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price),sum] + InputAdapter + Exchange [ca_zip,ca_city] #1 + WholeStageCodegen (6) + HashAggregate [ca_zip,ca_city,ws_sales_price] [sum,sum] + Project [ws_sales_price,ca_city,ca_zip] + Filter [ca_zip,exists] + BroadcastHashJoin [i_item_id,i_item_id] + Project [ws_sales_price,ca_city,ca_zip,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_sales_price,ca_city,ca_zip] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_sales_price,ws_sold_date_sk,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_sales_price,ws_sold_date_sk,c_current_addr_sk] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Filter [ws_bill_customer_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city,ca_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_datafusion/explain.txt new file mode 100644 index 0000000000..245d1a3d5b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_datafusion/explain.txt @@ -0,0 +1,248 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet spark_catalog.default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet spark_catalog.default.household_demographics (18) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet spark_catalog.default.customer_address (25) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet spark_catalog.default.customer (34) + +- ReusedExchange (40) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_dow, [0,6]), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : ((d_dow#11 IN (6,0) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#9] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#12, s_city#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_city#13] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_city#13] +Condition : (s_city#13 IN (Fairview,Midway) AND isnotnull(s_store_sk#12)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#12] +Input [2]: [s_store_sk#12, s_city#13] + +(15) BroadcastExchange +Input [1]: [s_store_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#12] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#12] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : (((hd_dep_count#15 = 4) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#14] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#14] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 5] +Output [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#14] + +(25) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_city#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#17, ca_city#18] + +(27) Filter [codegen id : 4] +Input [2]: [ca_address_sk#17, ca_city#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_city#18)) + +(28) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_city#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#17, ca_city#18] + +(31) HashAggregate [codegen id : 5] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum#19, sum#20] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#21, sum#22] + +(32) Exchange +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#21, sum#22] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 8] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#21, sum#22] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#23, sum(UnscaledValue(ss_net_profit#7))#24] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#18 AS bought_city#25, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#23,17,2) AS amt#26, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#24,17,2) AS profit#27] + +(34) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] + +(36) Filter [codegen id : 6] +Input [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Condition : (isnotnull(c_customer_sk#28) AND isnotnull(c_current_addr_sk#29)) + +(37) BroadcastExchange +Input [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#28] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 8] +Output [7]: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#25, amt#26, profit#27, c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] + +(40) ReusedExchange [Reuses operator id: 28] +Output [2]: [ca_address_sk#32, ca_city#33] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#29] +Right keys [1]: [ca_address_sk#32] +Join type: Inner +Join condition: NOT (ca_city#33 = bought_city#25) + +(42) Project [codegen id : 8] +Output [7]: [c_last_name#31, c_first_name#30, ca_city#33, bought_city#25, ss_ticket_number#5, amt#26, profit#27] +Input [9]: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#29, c_first_name#30, c_last_name#31, ca_address_sk#32, ca_city#33] + +(43) TakeOrderedAndProject +Input [7]: [c_last_name#31, c_first_name#30, ca_city#33, bought_city#25, ss_ticket_number#5, amt#26, profit#27] +Arguments: 100, [c_last_name#31 ASC NULLS FIRST, c_first_name#30 ASC NULLS FIRST, ca_city#33 ASC NULLS FIRST, bought_city#25 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#31, c_first_name#30, ca_city#33, bought_city#25, ss_ticket_number#5, amt#26, profit#27] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a6a56fe693 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_datafusion/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + WholeStageCodegen (8) + Project [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [ss_ticket_number,bought_city,amt,profit,c_current_addr_sk,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum] [sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),bought_city,amt,profit,sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen (5) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,ss_coupon_amt,ss_net_profit] [sum,sum,sum,sum] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ca_city] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dow,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..245d1a3d5b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_iceberg_compat/explain.txt @@ -0,0 +1,248 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet spark_catalog.default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet spark_catalog.default.household_demographics (18) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet spark_catalog.default.customer_address (25) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet spark_catalog.default.customer (34) + +- ReusedExchange (40) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_dow, [0,6]), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : ((d_dow#11 IN (6,0) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#9] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#12, s_city#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_city#13] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_city#13] +Condition : (s_city#13 IN (Fairview,Midway) AND isnotnull(s_store_sk#12)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#12] +Input [2]: [s_store_sk#12, s_city#13] + +(15) BroadcastExchange +Input [1]: [s_store_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#12] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#12] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : (((hd_dep_count#15 = 4) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#14] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#14] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 5] +Output [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#14] + +(25) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_city#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#17, ca_city#18] + +(27) Filter [codegen id : 4] +Input [2]: [ca_address_sk#17, ca_city#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_city#18)) + +(28) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_city#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#17, ca_city#18] + +(31) HashAggregate [codegen id : 5] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#18] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum#19, sum#20] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#21, sum#22] + +(32) Exchange +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#21, sum#22] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 8] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#21, sum#22] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#23, sum(UnscaledValue(ss_net_profit#7))#24] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#18 AS bought_city#25, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#23,17,2) AS amt#26, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#24,17,2) AS profit#27] + +(34) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] + +(36) Filter [codegen id : 6] +Input [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Condition : (isnotnull(c_customer_sk#28) AND isnotnull(c_current_addr_sk#29)) + +(37) BroadcastExchange +Input [4]: [c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#28] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 8] +Output [7]: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#29, c_first_name#30, c_last_name#31] +Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#25, amt#26, profit#27, c_customer_sk#28, c_current_addr_sk#29, c_first_name#30, c_last_name#31] + +(40) ReusedExchange [Reuses operator id: 28] +Output [2]: [ca_address_sk#32, ca_city#33] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#29] +Right keys [1]: [ca_address_sk#32] +Join type: Inner +Join condition: NOT (ca_city#33 = bought_city#25) + +(42) Project [codegen id : 8] +Output [7]: [c_last_name#31, c_first_name#30, ca_city#33, bought_city#25, ss_ticket_number#5, amt#26, profit#27] +Input [9]: [ss_ticket_number#5, bought_city#25, amt#26, profit#27, c_current_addr_sk#29, c_first_name#30, c_last_name#31, ca_address_sk#32, ca_city#33] + +(43) TakeOrderedAndProject +Input [7]: [c_last_name#31, c_first_name#30, ca_city#33, bought_city#25, ss_ticket_number#5, amt#26, profit#27] +Arguments: 100, [c_last_name#31 ASC NULLS FIRST, c_first_name#30 ASC NULLS FIRST, ca_city#33 ASC NULLS FIRST, bought_city#25 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#31, c_first_name#30, ca_city#33, bought_city#25, ss_ticket_number#5, amt#26, profit#27] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a6a56fe693 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + WholeStageCodegen (8) + Project [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [ss_ticket_number,bought_city,amt,profit,c_current_addr_sk,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum] [sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),bought_city,amt,profit,sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen (5) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,ss_coupon_amt,ss_net_profit] [sum,sum,sum,sum] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ca_city] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dow,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_datafusion/explain.txt new file mode 100644 index 0000000000..fcd2b4779d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_datafusion/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (31) + : : +- * Filter (30) + : : +- Window (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.store (16) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- Window (36) + : +- * Sort (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- ReusedExchange (32) + +- BroadcastExchange (45) + +- * Project (44) + +- Window (43) + +- * Sort (42) + +- ReusedExchange (41) + + +(1) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) + +(7) BroadcastExchange +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] + +(16) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(18) Filter [codegen id : 3] +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Condition : ((isnotnull(s_store_sk#11) AND isnotnull(s_store_name#12)) AND isnotnull(s_company_name#13)) + +(19) BroadcastExchange +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 4] +Output [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10, s_store_sk#11, s_store_name#12, s_company_name#13] + +(22) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum#14] +Results [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] + +(23) Exchange +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) HashAggregate [codegen id : 5] +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#16] +Results [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#16,17,2) AS sum_sales#17, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#16,17,2) AS _w0#18] + +(25) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(26) Sort [codegen id : 6] +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST, s_company_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#19], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(28) Filter [codegen id : 7] +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(29) Window +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19] +Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9] + +(30) Filter [codegen id : 22] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN ((abs((sum_sales#17 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END) + +(31) Project [codegen id : 22] +Output [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] + +(32) ReusedExchange [Reuses operator id: 23] +Output [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum#27] + +(33) HashAggregate [codegen id : 12] +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum#27] +Keys [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26] +Functions [1]: [sum(UnscaledValue(ss_sales_price#28))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#28))#16] +Results [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, MakeDecimal(sum(UnscaledValue(ss_sales_price#28))#16,17,2) AS sum_sales#17] + +(34) Exchange +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: hashpartitioning(i_category#21, i_brand#22, s_store_name#23, s_company_name#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 13] +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, s_store_name#23 ASC NULLS FIRST, s_company_name#24 ASC NULLS FIRST, d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST], false, 0 + +(36) Window +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: [rank(d_year#25, d_moy#26) windowspecdefinition(i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#29], [i_category#21, i_brand#22, s_store_name#23, s_company_name#24], [d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST] + +(37) Project [codegen id : 14] +Output [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#17 AS sum_sales#30, rn#29] +Input [8]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17, rn#29] + +(38) BroadcastExchange +Input [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#30, rn#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=7] + +(39) BroadcastHashJoin [codegen id : 22] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#19] +Right keys [5]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, (rn#29 + 1)] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 22] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#30] +Input [15]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#30, rn#29] + +(41) ReusedExchange [Reuses operator id: 34] +Output [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] + +(42) Sort [codegen id : 20] +Input [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] +Arguments: [i_category#31 ASC NULLS FIRST, i_brand#32 ASC NULLS FIRST, s_store_name#33 ASC NULLS FIRST, s_company_name#34 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +(43) Window +Input [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] +Arguments: [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#31, i_brand#32, s_store_name#33, s_company_name#34], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] + +(44) Project [codegen id : 21] +Output [6]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#17 AS sum_sales#38, rn#37] +Input [8]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17, rn#37] + +(45) BroadcastExchange +Input [6]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#38, rn#37] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=8] + +(46) BroadcastHashJoin [codegen id : 22] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#19] +Right keys [5]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, (rn#37 - 1)] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 22] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, sum_sales#30 AS psum#39, sum_sales#38 AS nsum#40] +Input [16]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#30, i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#38, rn#37] + +(48) TakeOrderedAndProject +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, psum#39, nsum#40] +Arguments: 100, [(sum_sales#17 - avg_monthly_sales#20) ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, psum#39, nsum#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_datafusion/simplified.txt new file mode 100644 index 0000000000..20a2eeef3a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_datafusion/simplified.txt @@ -0,0 +1,79 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_brand,s_company_name,d_year,d_moy,psum,nsum] + WholeStageCodegen (22) + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] + WholeStageCodegen (7) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum] + Project [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk,i_category,i_brand] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk,s_store_name,s_company_name] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (14) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (13) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #7 + WholeStageCodegen (12) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (21) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (20) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..fcd2b4779d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_iceberg_compat/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (31) + : : +- * Filter (30) + : : +- Window (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.store (16) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- Window (36) + : +- * Sort (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- ReusedExchange (32) + +- BroadcastExchange (45) + +- * Project (44) + +- Window (43) + +- * Sort (42) + +- ReusedExchange (41) + + +(1) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) + +(7) BroadcastExchange +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] + +(16) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(18) Filter [codegen id : 3] +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Condition : ((isnotnull(s_store_sk#11) AND isnotnull(s_store_name#12)) AND isnotnull(s_company_name#13)) + +(19) BroadcastExchange +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 4] +Output [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10, s_store_sk#11, s_store_name#12, s_company_name#13] + +(22) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum#14] +Results [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] + +(23) Exchange +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) HashAggregate [codegen id : 5] +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#16] +Results [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#16,17,2) AS sum_sales#17, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#16,17,2) AS _w0#18] + +(25) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(26) Sort [codegen id : 6] +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST, s_company_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#19], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(28) Filter [codegen id : 7] +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(29) Window +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19] +Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9] + +(30) Filter [codegen id : 22] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN ((abs((sum_sales#17 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END) + +(31) Project [codegen id : 22] +Output [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] + +(32) ReusedExchange [Reuses operator id: 23] +Output [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum#27] + +(33) HashAggregate [codegen id : 12] +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum#27] +Keys [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26] +Functions [1]: [sum(UnscaledValue(ss_sales_price#28))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#28))#16] +Results [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, MakeDecimal(sum(UnscaledValue(ss_sales_price#28))#16,17,2) AS sum_sales#17] + +(34) Exchange +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: hashpartitioning(i_category#21, i_brand#22, s_store_name#23, s_company_name#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 13] +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, s_store_name#23 ASC NULLS FIRST, s_company_name#24 ASC NULLS FIRST, d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST], false, 0 + +(36) Window +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: [rank(d_year#25, d_moy#26) windowspecdefinition(i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#29], [i_category#21, i_brand#22, s_store_name#23, s_company_name#24], [d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST] + +(37) Project [codegen id : 14] +Output [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#17 AS sum_sales#30, rn#29] +Input [8]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17, rn#29] + +(38) BroadcastExchange +Input [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#30, rn#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=7] + +(39) BroadcastHashJoin [codegen id : 22] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#19] +Right keys [5]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, (rn#29 + 1)] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 22] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#30] +Input [15]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#30, rn#29] + +(41) ReusedExchange [Reuses operator id: 34] +Output [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] + +(42) Sort [codegen id : 20] +Input [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] +Arguments: [i_category#31 ASC NULLS FIRST, i_brand#32 ASC NULLS FIRST, s_store_name#33 ASC NULLS FIRST, s_company_name#34 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +(43) Window +Input [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] +Arguments: [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#31, i_brand#32, s_store_name#33, s_company_name#34], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] + +(44) Project [codegen id : 21] +Output [6]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#17 AS sum_sales#38, rn#37] +Input [8]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17, rn#37] + +(45) BroadcastExchange +Input [6]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#38, rn#37] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=8] + +(46) BroadcastHashJoin [codegen id : 22] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#19] +Right keys [5]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, (rn#37 - 1)] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 22] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, sum_sales#30 AS psum#39, sum_sales#38 AS nsum#40] +Input [16]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#30, i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#38, rn#37] + +(48) TakeOrderedAndProject +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, psum#39, nsum#40] +Arguments: 100, [(sum_sales#17 - avg_monthly_sales#20) ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, psum#39, nsum#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..20a2eeef3a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.native_iceberg_compat/simplified.txt @@ -0,0 +1,79 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_brand,s_company_name,d_year,d_moy,psum,nsum] + WholeStageCodegen (22) + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] + WholeStageCodegen (7) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum] + Project [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk,i_category,i_brand] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk,s_store_name,s_company_name] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (14) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (13) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #7 + WholeStageCodegen (12) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (21) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (20) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_datafusion/explain.txt new file mode 100644 index 0000000000..c0cb413c85 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_datafusion/explain.txt @@ -0,0 +1,188 @@ +== Physical Plan == +* HashAggregate (32) ++- Exchange (31) + +- * HashAggregate (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.store (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.customer_demographics (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.customer_address (16) + +- BroadcastExchange (27) + +- * Project (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet spark_catalog.default.date_dim (23) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ss_net_profit,0.00),LessThanOrEqual(ss_net_profit,2000.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,3000.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,25000.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(3) Filter [codegen id : 5] +Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((((isnotnull(ss_store_sk#3) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_addr_sk#2)) AND ((((ss_sales_price#5 >= 100.00) AND (ss_sales_price#5 <= 150.00)) OR ((ss_sales_price#5 >= 50.00) AND (ss_sales_price#5 <= 100.00))) OR ((ss_sales_price#5 >= 150.00) AND (ss_sales_price#5 <= 200.00)))) AND ((((ss_net_profit#6 >= 0.00) AND (ss_net_profit#6 <= 2000.00)) OR ((ss_net_profit#6 >= 150.00) AND (ss_net_profit#6 <= 3000.00))) OR ((ss_net_profit#6 >= 50.00) AND (ss_net_profit#6 <= 25000.00)))) + +(4) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [s_store_sk#8] + +(6) Filter [codegen id : 1] +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(7) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [6]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Input [8]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, s_store_sk#8] + +(10) Scan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,4 yr Degree )),And(EqualTo(cd_marital_status,D),EqualTo(cd_education_status,2 yr Degree ))),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College )))] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + +(12) Filter [codegen id : 2] +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Condition : (isnotnull(cd_demo_sk#9) AND ((((cd_marital_status#10 = M) AND (cd_education_status#11 = 4 yr Degree )) OR ((cd_marital_status#10 = D) AND (cd_education_status#11 = 2 yr Degree ))) OR ((cd_marital_status#10 = S) AND (cd_education_status#11 = College )))) + +(13) BroadcastExchange +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#1] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: ((((((cd_marital_status#10 = M) AND (cd_education_status#11 = 4 yr Degree )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#10 = D) AND (cd_education_status#11 = 2 yr Degree )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#10 = S) AND (cd_education_status#11 = College )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00))) + +(15) Project [codegen id : 5] +Output [4]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7] +Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + +(16) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [CO,OH,TX]),In(ca_state, [KY,MN,OR])),In(ca_state, [CA,MS,VA]))] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] + +(18) Filter [codegen id : 3] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Condition : (((isnotnull(ca_country#14) AND (ca_country#14 = United States)) AND isnotnull(ca_address_sk#12)) AND ((ca_state#13 IN (CO,OH,TX) OR ca_state#13 IN (OR,MN,KY)) OR ca_state#13 IN (VA,CA,MS))) + +(19) Project [codegen id : 3] +Output [2]: [ca_address_sk#12, ca_state#13] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] + +(20) BroadcastExchange +Input [2]: [ca_address_sk#12, ca_state#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#12] +Join type: Inner +Join condition: ((((ca_state#13 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#13 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#13 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00))) + +(22) Project [codegen id : 5] +Output [2]: [ss_quantity#4, ss_sold_date_sk#7] +Input [6]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7, ca_address_sk#12, ca_state#13] + +(23) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#15, d_year#16] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_year#16] + +(27) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [1]: [ss_quantity#4] +Input [3]: [ss_quantity#4, ss_sold_date_sk#7, d_date_sk#15] + +(30) HashAggregate [codegen id : 5] +Input [1]: [ss_quantity#4] +Keys: [] +Functions [1]: [partial_sum(ss_quantity#4)] +Aggregate Attributes [1]: [sum#17] +Results [1]: [sum#18] + +(31) Exchange +Input [1]: [sum#18] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [1]: [sum#18] +Keys: [] +Functions [1]: [sum(ss_quantity#4)] +Aggregate Attributes [1]: [sum(ss_quantity#4)#19] +Results [1]: [sum(ss_quantity#4)#19 AS sum(ss_quantity)#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2d1dbeadd4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_datafusion/simplified.txt @@ -0,0 +1,48 @@ +WholeStageCodegen (6) + HashAggregate [sum] [sum(ss_quantity),sum(ss_quantity),sum] + InputAdapter + Exchange #1 + WholeStageCodegen (5) + HashAggregate [ss_quantity] [sum,sum] + Project [ss_quantity] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] + Project [ss_addr_sk,ss_quantity,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price] + Project [ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sales_price,ss_net_profit] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..c0cb413c85 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_iceberg_compat/explain.txt @@ -0,0 +1,188 @@ +== Physical Plan == +* HashAggregate (32) ++- Exchange (31) + +- * HashAggregate (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.store (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.customer_demographics (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.customer_address (16) + +- BroadcastExchange (27) + +- * Project (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet spark_catalog.default.date_dim (23) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ss_net_profit,0.00),LessThanOrEqual(ss_net_profit,2000.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,3000.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,25000.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(3) Filter [codegen id : 5] +Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((((isnotnull(ss_store_sk#3) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_addr_sk#2)) AND ((((ss_sales_price#5 >= 100.00) AND (ss_sales_price#5 <= 150.00)) OR ((ss_sales_price#5 >= 50.00) AND (ss_sales_price#5 <= 100.00))) OR ((ss_sales_price#5 >= 150.00) AND (ss_sales_price#5 <= 200.00)))) AND ((((ss_net_profit#6 >= 0.00) AND (ss_net_profit#6 <= 2000.00)) OR ((ss_net_profit#6 >= 150.00) AND (ss_net_profit#6 <= 3000.00))) OR ((ss_net_profit#6 >= 50.00) AND (ss_net_profit#6 <= 25000.00)))) + +(4) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [s_store_sk#8] + +(6) Filter [codegen id : 1] +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(7) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#8] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [6]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Input [8]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, s_store_sk#8] + +(10) Scan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,4 yr Degree )),And(EqualTo(cd_marital_status,D),EqualTo(cd_education_status,2 yr Degree ))),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College )))] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + +(12) Filter [codegen id : 2] +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Condition : (isnotnull(cd_demo_sk#9) AND ((((cd_marital_status#10 = M) AND (cd_education_status#11 = 4 yr Degree )) OR ((cd_marital_status#10 = D) AND (cd_education_status#11 = 2 yr Degree ))) OR ((cd_marital_status#10 = S) AND (cd_education_status#11 = College )))) + +(13) BroadcastExchange +Input [3]: [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#1] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: ((((((cd_marital_status#10 = M) AND (cd_education_status#11 = 4 yr Degree )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#10 = D) AND (cd_education_status#11 = 2 yr Degree )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#10 = S) AND (cd_education_status#11 = College )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00))) + +(15) Project [codegen id : 5] +Output [4]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7] +Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + +(16) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [CO,OH,TX]),In(ca_state, [KY,MN,OR])),In(ca_state, [CA,MS,VA]))] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] + +(18) Filter [codegen id : 3] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] +Condition : (((isnotnull(ca_country#14) AND (ca_country#14 = United States)) AND isnotnull(ca_address_sk#12)) AND ((ca_state#13 IN (CO,OH,TX) OR ca_state#13 IN (OR,MN,KY)) OR ca_state#13 IN (VA,CA,MS))) + +(19) Project [codegen id : 3] +Output [2]: [ca_address_sk#12, ca_state#13] +Input [3]: [ca_address_sk#12, ca_state#13, ca_country#14] + +(20) BroadcastExchange +Input [2]: [ca_address_sk#12, ca_state#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#12] +Join type: Inner +Join condition: ((((ca_state#13 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#13 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#13 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00))) + +(22) Project [codegen id : 5] +Output [2]: [ss_quantity#4, ss_sold_date_sk#7] +Input [6]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7, ca_address_sk#12, ca_state#13] + +(23) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_year#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#15, d_year#16] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#15, d_year#16] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_year#16] + +(27) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [1]: [ss_quantity#4] +Input [3]: [ss_quantity#4, ss_sold_date_sk#7, d_date_sk#15] + +(30) HashAggregate [codegen id : 5] +Input [1]: [ss_quantity#4] +Keys: [] +Functions [1]: [partial_sum(ss_quantity#4)] +Aggregate Attributes [1]: [sum#17] +Results [1]: [sum#18] + +(31) Exchange +Input [1]: [sum#18] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [1]: [sum#18] +Keys: [] +Functions [1]: [sum(ss_quantity#4)] +Aggregate Attributes [1]: [sum(ss_quantity#4)#19] +Results [1]: [sum(ss_quantity#4)#19 AS sum(ss_quantity)#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..2d1dbeadd4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.native_iceberg_compat/simplified.txt @@ -0,0 +1,48 @@ +WholeStageCodegen (6) + HashAggregate [sum] [sum(ss_quantity),sum(ss_quantity),sum] + InputAdapter + Exchange #1 + WholeStageCodegen (5) + HashAggregate [ss_quantity] [sum,sum] + Project [ss_quantity] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] + Project [ss_addr_sk,ss_quantity,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price] + Project [ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sales_price,ss_net_profit] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_datafusion/explain.txt new file mode 100644 index 0000000000..b024c6eed2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_datafusion/explain.txt @@ -0,0 +1,457 @@ +== Physical Plan == +TakeOrderedAndProject (81) ++- * HashAggregate (80) + +- Exchange (79) + +- * HashAggregate (78) + +- Union (77) + :- * Project (28) + : +- * Filter (27) + : +- Window (26) + : +- * Sort (25) + : +- Window (24) + : +- * Sort (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- Exchange (20) + : +- * HashAggregate (19) + : +- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildLeft (10) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : +- * Project (9) + : : +- * Filter (8) + : : +- * ColumnarToRow (7) + : : +- Scan parquet spark_catalog.default.web_returns (6) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet spark_catalog.default.date_dim (12) + :- * Project (52) + : +- * Filter (51) + : +- Window (50) + : +- * Sort (49) + : +- Window (48) + : +- * Sort (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildLeft (38) + : : :- BroadcastExchange (33) + : : : +- * Project (32) + : : : +- * Filter (31) + : : : +- * ColumnarToRow (30) + : : : +- Scan parquet spark_catalog.default.catalog_sales (29) + : : +- * Project (37) + : : +- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet spark_catalog.default.catalog_returns (34) + : +- ReusedExchange (40) + +- * Project (76) + +- * Filter (75) + +- Window (74) + +- * Sort (73) + +- Window (72) + +- * Sort (71) + +- Exchange (70) + +- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Project (66) + +- * BroadcastHashJoin Inner BuildRight (65) + :- * Project (63) + : +- * BroadcastHashJoin Inner BuildLeft (62) + : :- BroadcastExchange (57) + : : +- * Project (56) + : : +- * Filter (55) + : : +- * ColumnarToRow (54) + : : +- Scan parquet spark_catalog.default.store_sales (53) + : +- * Project (61) + : +- * Filter (60) + : +- * ColumnarToRow (59) + : +- Scan parquet spark_catalog.default.store_returns (58) + +- ReusedExchange (64) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#6)] +PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_order_number), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(3) Filter [codegen id : 1] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Condition : (((((((isnotnull(ws_net_profit#5) AND isnotnull(ws_net_paid#4)) AND isnotnull(ws_quantity#3)) AND (ws_net_profit#5 > 1.00)) AND (ws_net_paid#4 > 0.00)) AND (ws_quantity#3 > 0)) AND isnotnull(ws_order_number#2)) AND isnotnull(ws_item_sk#1)) + +(4) Project [codegen id : 1] +Output [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(5) BroadcastExchange +Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=1] + +(6) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(8) Filter +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(9) Project +Output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(10) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [ws_order_number#2, ws_item_sk#1] +Right keys [2]: [wr_order_number#8, wr_item_sk#7] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 3] +Output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(12) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(14) Filter [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#12] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(16) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#6] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 3] +Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] + +(19) HashAggregate [codegen id : 3] +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#1] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#9, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Results [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] + +(20) Exchange +Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(21) HashAggregate [codegen id : 4] +Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Keys [1]: [ws_item_sk#1] +Functions [4]: [sum(coalesce(wr_return_quantity#9, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#9, 0))#27, sum(coalesce(ws_quantity#3, 0))#28, sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#29, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#30] +Results [3]: [ws_item_sk#1 AS item#31, (cast(sum(coalesce(wr_return_quantity#9, 0))#27 as decimal(15,4)) / cast(sum(coalesce(ws_quantity#3, 0))#28 as decimal(15,4))) AS return_ratio#32, (cast(sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#29 as decimal(15,4)) / cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#30 as decimal(15,4))) AS currency_ratio#33] + +(22) Exchange +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(23) Sort [codegen id : 5] +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: [return_ratio#32 ASC NULLS FIRST], false, 0 + +(24) Window +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: [rank(return_ratio#32) windowspecdefinition(return_ratio#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#34], [return_ratio#32 ASC NULLS FIRST] + +(25) Sort [codegen id : 6] +Input [4]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34] +Arguments: [currency_ratio#33 ASC NULLS FIRST], false, 0 + +(26) Window +Input [4]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34] +Arguments: [rank(currency_ratio#33) windowspecdefinition(currency_ratio#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#35], [currency_ratio#33 ASC NULLS FIRST] + +(27) Filter [codegen id : 7] +Input [5]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34, currency_rank#35] +Condition : ((return_rank#34 <= 10) OR (currency_rank#35 <= 10)) + +(28) Project [codegen id : 7] +Output [5]: [web AS channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Input [5]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34, currency_rank#35] + +(29) Scan parquet spark_catalog.default.catalog_sales +Output [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#42)] +PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 8] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] + +(31) Filter [codegen id : 8] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] +Condition : (((((((isnotnull(cs_net_profit#41) AND isnotnull(cs_net_paid#40)) AND isnotnull(cs_quantity#39)) AND (cs_net_profit#41 > 1.00)) AND (cs_net_paid#40 > 0.00)) AND (cs_quantity#39 > 0)) AND isnotnull(cs_order_number#38)) AND isnotnull(cs_item_sk#37)) + +(32) Project [codegen id : 8] +Output [5]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] + +(33) BroadcastExchange +Input [5]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=5] + +(34) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(35) ColumnarToRow +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] + +(36) Filter +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] +Condition : (((isnotnull(cr_return_amount#46) AND (cr_return_amount#46 > 10000.00)) AND isnotnull(cr_order_number#44)) AND isnotnull(cr_item_sk#43)) + +(37) Project +Output [4]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [cs_order_number#38, cs_item_sk#37] +Right keys [2]: [cr_order_number#44, cr_item_sk#43] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [6]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_return_quantity#45, cr_return_amount#46] +Input [9]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] + +(40) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#48] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#42] +Right keys [1]: [d_date_sk#48] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [5]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cr_return_quantity#45, cr_return_amount#46] +Input [7]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_return_quantity#45, cr_return_amount#46, d_date_sk#48] + +(43) HashAggregate [codegen id : 10] +Input [5]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cr_return_quantity#45, cr_return_amount#46] +Keys [1]: [cs_item_sk#37] +Functions [4]: [partial_sum(coalesce(cr_return_quantity#45, 0)), partial_sum(coalesce(cs_quantity#39, 0)), partial_sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#49, sum#50, sum#51, isEmpty#52, sum#53, isEmpty#54] +Results [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] + +(44) Exchange +Input [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Arguments: hashpartitioning(cs_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(45) HashAggregate [codegen id : 11] +Input [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Keys [1]: [cs_item_sk#37] +Functions [4]: [sum(coalesce(cr_return_quantity#45, 0)), sum(coalesce(cs_quantity#39, 0)), sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#45, 0))#61, sum(coalesce(cs_quantity#39, 0))#62, sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#63, sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))#64] +Results [3]: [cs_item_sk#37 AS item#65, (cast(sum(coalesce(cr_return_quantity#45, 0))#61 as decimal(15,4)) / cast(sum(coalesce(cs_quantity#39, 0))#62 as decimal(15,4))) AS return_ratio#66, (cast(sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#63 as decimal(15,4)) / cast(sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))#64 as decimal(15,4))) AS currency_ratio#67] + +(46) Exchange +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(47) Sort [codegen id : 12] +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: [return_ratio#66 ASC NULLS FIRST], false, 0 + +(48) Window +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: [rank(return_ratio#66) windowspecdefinition(return_ratio#66 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#68], [return_ratio#66 ASC NULLS FIRST] + +(49) Sort [codegen id : 13] +Input [4]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68] +Arguments: [currency_ratio#67 ASC NULLS FIRST], false, 0 + +(50) Window +Input [4]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68] +Arguments: [rank(currency_ratio#67) windowspecdefinition(currency_ratio#67 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#69], [currency_ratio#67 ASC NULLS FIRST] + +(51) Filter [codegen id : 14] +Input [5]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68, currency_rank#69] +Condition : ((return_rank#68 <= 10) OR (currency_rank#69 <= 10)) + +(52) Project [codegen id : 14] +Output [5]: [catalog AS channel#70, item#65, return_ratio#66, return_rank#68, currency_rank#69] +Input [5]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68, currency_rank#69] + +(53) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#76)] +PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 15] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] + +(55) Filter [codegen id : 15] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] +Condition : (((((((isnotnull(ss_net_profit#75) AND isnotnull(ss_net_paid#74)) AND isnotnull(ss_quantity#73)) AND (ss_net_profit#75 > 1.00)) AND (ss_net_paid#74 > 0.00)) AND (ss_quantity#73 > 0)) AND isnotnull(ss_ticket_number#72)) AND isnotnull(ss_item_sk#71)) + +(56) Project [codegen id : 15] +Output [5]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] + +(57) BroadcastExchange +Input [5]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=8] + +(58) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(59) ColumnarToRow +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] + +(60) Filter +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] +Condition : (((isnotnull(sr_return_amt#80) AND (sr_return_amt#80 > 10000.00)) AND isnotnull(sr_ticket_number#78)) AND isnotnull(sr_item_sk#77)) + +(61) Project +Output [4]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [2]: [ss_ticket_number#72, ss_item_sk#71] +Right keys [2]: [sr_ticket_number#78, sr_item_sk#77] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 17] +Output [6]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_return_quantity#79, sr_return_amt#80] +Input [9]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] + +(64) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#82] + +(65) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#76] +Right keys [1]: [d_date_sk#82] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 17] +Output [5]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, sr_return_quantity#79, sr_return_amt#80] +Input [7]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_return_quantity#79, sr_return_amt#80, d_date_sk#82] + +(67) HashAggregate [codegen id : 17] +Input [5]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, sr_return_quantity#79, sr_return_amt#80] +Keys [1]: [ss_item_sk#71] +Functions [4]: [partial_sum(coalesce(sr_return_quantity#79, 0)), partial_sum(coalesce(ss_quantity#73, 0)), partial_sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#83, sum#84, sum#85, isEmpty#86, sum#87, isEmpty#88] +Results [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] + +(68) Exchange +Input [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] +Arguments: hashpartitioning(ss_item_sk#71, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(69) HashAggregate [codegen id : 18] +Input [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] +Keys [1]: [ss_item_sk#71] +Functions [4]: [sum(coalesce(sr_return_quantity#79, 0)), sum(coalesce(ss_quantity#73, 0)), sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#79, 0))#95, sum(coalesce(ss_quantity#73, 0))#96, sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#97, sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))#98] +Results [3]: [ss_item_sk#71 AS item#99, (cast(sum(coalesce(sr_return_quantity#79, 0))#95 as decimal(15,4)) / cast(sum(coalesce(ss_quantity#73, 0))#96 as decimal(15,4))) AS return_ratio#100, (cast(sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#97 as decimal(15,4)) / cast(sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))#98 as decimal(15,4))) AS currency_ratio#101] + +(70) Exchange +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(71) Sort [codegen id : 19] +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: [return_ratio#100 ASC NULLS FIRST], false, 0 + +(72) Window +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: [rank(return_ratio#100) windowspecdefinition(return_ratio#100 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#102], [return_ratio#100 ASC NULLS FIRST] + +(73) Sort [codegen id : 20] +Input [4]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102] +Arguments: [currency_ratio#101 ASC NULLS FIRST], false, 0 + +(74) Window +Input [4]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102] +Arguments: [rank(currency_ratio#101) windowspecdefinition(currency_ratio#101 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#103], [currency_ratio#101 ASC NULLS FIRST] + +(75) Filter [codegen id : 21] +Input [5]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102, currency_rank#103] +Condition : ((return_rank#102 <= 10) OR (currency_rank#103 <= 10)) + +(76) Project [codegen id : 21] +Output [5]: [store AS channel#104, item#99, return_ratio#100, return_rank#102, currency_rank#103] +Input [5]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102, currency_rank#103] + +(77) Union + +(78) HashAggregate [codegen id : 22] +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Keys [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + +(79) Exchange +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Arguments: hashpartitioning(channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(80) HashAggregate [codegen id : 23] +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Keys [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + +(81) TakeOrderedAndProject +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Arguments: 100, [channel#36 ASC NULLS FIRST, return_rank#34 ASC NULLS FIRST, currency_rank#35 ASC NULLS FIRST], [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_datafusion/simplified.txt new file mode 100644 index 0000000000..018748b274 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_datafusion/simplified.txt @@ -0,0 +1,129 @@ +TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] + WholeStageCodegen (23) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 + WholeStageCodegen (22) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Union + WholeStageCodegen (7) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (6) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (5) + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen (4) + HashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(wr_return_quantity, 0)),sum(coalesce(ws_quantity, 0)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] + Filter [ws_net_profit,ws_net_paid,ws_quantity,ws_order_number,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Filter [wr_return_amt,wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (14) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (13) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (12) + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen (11) + HashAggregate [cs_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(cr_return_quantity, 0)),sum(coalesce(cs_quantity, 0)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen (10) + HashAggregate [cs_item_sk,cr_return_quantity,cs_quantity,cr_return_amount,cs_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [cs_item_sk,cs_quantity,cs_net_paid,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_net_paid,cs_sold_date_sk,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_sold_date_sk] + Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_order_number,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk] + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_return_amount,cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (21) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (20) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (19) + Sort [return_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen (18) + HashAggregate [ss_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(sr_return_quantity, 0)),sum(coalesce(ss_quantity, 0)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (17) + HashAggregate [ss_item_sk,sr_return_quantity,ss_quantity,sr_return_amt,ss_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [ss_item_sk,ss_quantity,ss_net_paid,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_quantity,ss_net_paid,ss_sold_date_sk,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (15) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_sold_date_sk] + Filter [ss_net_profit,ss_net_paid,ss_quantity,ss_ticket_number,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk] + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_return_amt,sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..b024c6eed2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_iceberg_compat/explain.txt @@ -0,0 +1,457 @@ +== Physical Plan == +TakeOrderedAndProject (81) ++- * HashAggregate (80) + +- Exchange (79) + +- * HashAggregate (78) + +- Union (77) + :- * Project (28) + : +- * Filter (27) + : +- Window (26) + : +- * Sort (25) + : +- Window (24) + : +- * Sort (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- Exchange (20) + : +- * HashAggregate (19) + : +- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildLeft (10) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : +- * Project (9) + : : +- * Filter (8) + : : +- * ColumnarToRow (7) + : : +- Scan parquet spark_catalog.default.web_returns (6) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet spark_catalog.default.date_dim (12) + :- * Project (52) + : +- * Filter (51) + : +- Window (50) + : +- * Sort (49) + : +- Window (48) + : +- * Sort (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildLeft (38) + : : :- BroadcastExchange (33) + : : : +- * Project (32) + : : : +- * Filter (31) + : : : +- * ColumnarToRow (30) + : : : +- Scan parquet spark_catalog.default.catalog_sales (29) + : : +- * Project (37) + : : +- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet spark_catalog.default.catalog_returns (34) + : +- ReusedExchange (40) + +- * Project (76) + +- * Filter (75) + +- Window (74) + +- * Sort (73) + +- Window (72) + +- * Sort (71) + +- Exchange (70) + +- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Project (66) + +- * BroadcastHashJoin Inner BuildRight (65) + :- * Project (63) + : +- * BroadcastHashJoin Inner BuildLeft (62) + : :- BroadcastExchange (57) + : : +- * Project (56) + : : +- * Filter (55) + : : +- * ColumnarToRow (54) + : : +- Scan parquet spark_catalog.default.store_sales (53) + : +- * Project (61) + : +- * Filter (60) + : +- * ColumnarToRow (59) + : +- Scan parquet spark_catalog.default.store_returns (58) + +- ReusedExchange (64) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#6)] +PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_order_number), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(3) Filter [codegen id : 1] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Condition : (((((((isnotnull(ws_net_profit#5) AND isnotnull(ws_net_paid#4)) AND isnotnull(ws_quantity#3)) AND (ws_net_profit#5 > 1.00)) AND (ws_net_paid#4 > 0.00)) AND (ws_quantity#3 > 0)) AND isnotnull(ws_order_number#2)) AND isnotnull(ws_item_sk#1)) + +(4) Project [codegen id : 1] +Output [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(5) BroadcastExchange +Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=1] + +(6) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(8) Filter +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(9) Project +Output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(10) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [ws_order_number#2, ws_item_sk#1] +Right keys [2]: [wr_order_number#8, wr_item_sk#7] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 3] +Output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(12) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(14) Filter [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#12] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(16) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#6] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 3] +Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] + +(19) HashAggregate [codegen id : 3] +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#1] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#9, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Results [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] + +(20) Exchange +Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(21) HashAggregate [codegen id : 4] +Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Keys [1]: [ws_item_sk#1] +Functions [4]: [sum(coalesce(wr_return_quantity#9, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#9, 0))#27, sum(coalesce(ws_quantity#3, 0))#28, sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#29, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#30] +Results [3]: [ws_item_sk#1 AS item#31, (cast(sum(coalesce(wr_return_quantity#9, 0))#27 as decimal(15,4)) / cast(sum(coalesce(ws_quantity#3, 0))#28 as decimal(15,4))) AS return_ratio#32, (cast(sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#29 as decimal(15,4)) / cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#30 as decimal(15,4))) AS currency_ratio#33] + +(22) Exchange +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(23) Sort [codegen id : 5] +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: [return_ratio#32 ASC NULLS FIRST], false, 0 + +(24) Window +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: [rank(return_ratio#32) windowspecdefinition(return_ratio#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#34], [return_ratio#32 ASC NULLS FIRST] + +(25) Sort [codegen id : 6] +Input [4]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34] +Arguments: [currency_ratio#33 ASC NULLS FIRST], false, 0 + +(26) Window +Input [4]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34] +Arguments: [rank(currency_ratio#33) windowspecdefinition(currency_ratio#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#35], [currency_ratio#33 ASC NULLS FIRST] + +(27) Filter [codegen id : 7] +Input [5]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34, currency_rank#35] +Condition : ((return_rank#34 <= 10) OR (currency_rank#35 <= 10)) + +(28) Project [codegen id : 7] +Output [5]: [web AS channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Input [5]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34, currency_rank#35] + +(29) Scan parquet spark_catalog.default.catalog_sales +Output [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#42)] +PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 8] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] + +(31) Filter [codegen id : 8] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] +Condition : (((((((isnotnull(cs_net_profit#41) AND isnotnull(cs_net_paid#40)) AND isnotnull(cs_quantity#39)) AND (cs_net_profit#41 > 1.00)) AND (cs_net_paid#40 > 0.00)) AND (cs_quantity#39 > 0)) AND isnotnull(cs_order_number#38)) AND isnotnull(cs_item_sk#37)) + +(32) Project [codegen id : 8] +Output [5]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] + +(33) BroadcastExchange +Input [5]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=5] + +(34) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(35) ColumnarToRow +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] + +(36) Filter +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] +Condition : (((isnotnull(cr_return_amount#46) AND (cr_return_amount#46 > 10000.00)) AND isnotnull(cr_order_number#44)) AND isnotnull(cr_item_sk#43)) + +(37) Project +Output [4]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [cs_order_number#38, cs_item_sk#37] +Right keys [2]: [cr_order_number#44, cr_item_sk#43] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [6]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_return_quantity#45, cr_return_amount#46] +Input [9]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] + +(40) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#48] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#42] +Right keys [1]: [d_date_sk#48] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [5]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cr_return_quantity#45, cr_return_amount#46] +Input [7]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_return_quantity#45, cr_return_amount#46, d_date_sk#48] + +(43) HashAggregate [codegen id : 10] +Input [5]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cr_return_quantity#45, cr_return_amount#46] +Keys [1]: [cs_item_sk#37] +Functions [4]: [partial_sum(coalesce(cr_return_quantity#45, 0)), partial_sum(coalesce(cs_quantity#39, 0)), partial_sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#49, sum#50, sum#51, isEmpty#52, sum#53, isEmpty#54] +Results [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] + +(44) Exchange +Input [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Arguments: hashpartitioning(cs_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(45) HashAggregate [codegen id : 11] +Input [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Keys [1]: [cs_item_sk#37] +Functions [4]: [sum(coalesce(cr_return_quantity#45, 0)), sum(coalesce(cs_quantity#39, 0)), sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#45, 0))#61, sum(coalesce(cs_quantity#39, 0))#62, sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#63, sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))#64] +Results [3]: [cs_item_sk#37 AS item#65, (cast(sum(coalesce(cr_return_quantity#45, 0))#61 as decimal(15,4)) / cast(sum(coalesce(cs_quantity#39, 0))#62 as decimal(15,4))) AS return_ratio#66, (cast(sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#63 as decimal(15,4)) / cast(sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))#64 as decimal(15,4))) AS currency_ratio#67] + +(46) Exchange +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(47) Sort [codegen id : 12] +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: [return_ratio#66 ASC NULLS FIRST], false, 0 + +(48) Window +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: [rank(return_ratio#66) windowspecdefinition(return_ratio#66 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#68], [return_ratio#66 ASC NULLS FIRST] + +(49) Sort [codegen id : 13] +Input [4]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68] +Arguments: [currency_ratio#67 ASC NULLS FIRST], false, 0 + +(50) Window +Input [4]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68] +Arguments: [rank(currency_ratio#67) windowspecdefinition(currency_ratio#67 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#69], [currency_ratio#67 ASC NULLS FIRST] + +(51) Filter [codegen id : 14] +Input [5]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68, currency_rank#69] +Condition : ((return_rank#68 <= 10) OR (currency_rank#69 <= 10)) + +(52) Project [codegen id : 14] +Output [5]: [catalog AS channel#70, item#65, return_ratio#66, return_rank#68, currency_rank#69] +Input [5]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68, currency_rank#69] + +(53) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#76)] +PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 15] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] + +(55) Filter [codegen id : 15] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] +Condition : (((((((isnotnull(ss_net_profit#75) AND isnotnull(ss_net_paid#74)) AND isnotnull(ss_quantity#73)) AND (ss_net_profit#75 > 1.00)) AND (ss_net_paid#74 > 0.00)) AND (ss_quantity#73 > 0)) AND isnotnull(ss_ticket_number#72)) AND isnotnull(ss_item_sk#71)) + +(56) Project [codegen id : 15] +Output [5]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] + +(57) BroadcastExchange +Input [5]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=8] + +(58) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(59) ColumnarToRow +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] + +(60) Filter +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] +Condition : (((isnotnull(sr_return_amt#80) AND (sr_return_amt#80 > 10000.00)) AND isnotnull(sr_ticket_number#78)) AND isnotnull(sr_item_sk#77)) + +(61) Project +Output [4]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [2]: [ss_ticket_number#72, ss_item_sk#71] +Right keys [2]: [sr_ticket_number#78, sr_item_sk#77] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 17] +Output [6]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_return_quantity#79, sr_return_amt#80] +Input [9]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] + +(64) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#82] + +(65) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#76] +Right keys [1]: [d_date_sk#82] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 17] +Output [5]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, sr_return_quantity#79, sr_return_amt#80] +Input [7]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_return_quantity#79, sr_return_amt#80, d_date_sk#82] + +(67) HashAggregate [codegen id : 17] +Input [5]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, sr_return_quantity#79, sr_return_amt#80] +Keys [1]: [ss_item_sk#71] +Functions [4]: [partial_sum(coalesce(sr_return_quantity#79, 0)), partial_sum(coalesce(ss_quantity#73, 0)), partial_sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#83, sum#84, sum#85, isEmpty#86, sum#87, isEmpty#88] +Results [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] + +(68) Exchange +Input [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] +Arguments: hashpartitioning(ss_item_sk#71, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(69) HashAggregate [codegen id : 18] +Input [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] +Keys [1]: [ss_item_sk#71] +Functions [4]: [sum(coalesce(sr_return_quantity#79, 0)), sum(coalesce(ss_quantity#73, 0)), sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#79, 0))#95, sum(coalesce(ss_quantity#73, 0))#96, sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#97, sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))#98] +Results [3]: [ss_item_sk#71 AS item#99, (cast(sum(coalesce(sr_return_quantity#79, 0))#95 as decimal(15,4)) / cast(sum(coalesce(ss_quantity#73, 0))#96 as decimal(15,4))) AS return_ratio#100, (cast(sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#97 as decimal(15,4)) / cast(sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))#98 as decimal(15,4))) AS currency_ratio#101] + +(70) Exchange +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(71) Sort [codegen id : 19] +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: [return_ratio#100 ASC NULLS FIRST], false, 0 + +(72) Window +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: [rank(return_ratio#100) windowspecdefinition(return_ratio#100 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#102], [return_ratio#100 ASC NULLS FIRST] + +(73) Sort [codegen id : 20] +Input [4]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102] +Arguments: [currency_ratio#101 ASC NULLS FIRST], false, 0 + +(74) Window +Input [4]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102] +Arguments: [rank(currency_ratio#101) windowspecdefinition(currency_ratio#101 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#103], [currency_ratio#101 ASC NULLS FIRST] + +(75) Filter [codegen id : 21] +Input [5]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102, currency_rank#103] +Condition : ((return_rank#102 <= 10) OR (currency_rank#103 <= 10)) + +(76) Project [codegen id : 21] +Output [5]: [store AS channel#104, item#99, return_ratio#100, return_rank#102, currency_rank#103] +Input [5]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102, currency_rank#103] + +(77) Union + +(78) HashAggregate [codegen id : 22] +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Keys [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + +(79) Exchange +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Arguments: hashpartitioning(channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(80) HashAggregate [codegen id : 23] +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Keys [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + +(81) TakeOrderedAndProject +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Arguments: 100, [channel#36 ASC NULLS FIRST, return_rank#34 ASC NULLS FIRST, currency_rank#35 ASC NULLS FIRST], [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..018748b274 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.native_iceberg_compat/simplified.txt @@ -0,0 +1,129 @@ +TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] + WholeStageCodegen (23) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 + WholeStageCodegen (22) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Union + WholeStageCodegen (7) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (6) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (5) + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen (4) + HashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(wr_return_quantity, 0)),sum(coalesce(ws_quantity, 0)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] + Filter [ws_net_profit,ws_net_paid,ws_quantity,ws_order_number,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Filter [wr_return_amt,wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (14) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (13) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (12) + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen (11) + HashAggregate [cs_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(cr_return_quantity, 0)),sum(coalesce(cs_quantity, 0)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen (10) + HashAggregate [cs_item_sk,cr_return_quantity,cs_quantity,cr_return_amount,cs_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [cs_item_sk,cs_quantity,cs_net_paid,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_net_paid,cs_sold_date_sk,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_sold_date_sk] + Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_order_number,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk] + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_return_amount,cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (21) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (20) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (19) + Sort [return_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen (18) + HashAggregate [ss_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(sr_return_quantity, 0)),sum(coalesce(ss_quantity, 0)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (17) + HashAggregate [ss_item_sk,sr_return_quantity,ss_quantity,sr_return_amt,ss_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [ss_item_sk,ss_quantity,ss_net_paid,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_quantity,ss_net_paid,ss_sold_date_sk,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (15) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_sold_date_sk] + Filter [ss_net_profit,ss_net_paid,ss_quantity,ss_ticket_number,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk] + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_return_amt,sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_datafusion/explain.txt new file mode 100644 index 0000000000..31072e61eb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_datafusion/explain.txt @@ -0,0 +1,448 @@ +== Physical Plan == +TakeOrderedAndProject (78) ++- * HashAggregate (77) + +- Exchange (76) + +- * HashAggregate (75) + +- * Expand (74) + +- Union (73) + :- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- Union (9) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet spark_catalog.default.store_returns (5) + : : +- BroadcastExchange (14) + : : +- * Project (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.date_dim (10) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.store (17) + :- * HashAggregate (46) + : +- Exchange (45) + : +- * HashAggregate (44) + : +- * Project (43) + : +- * BroadcastHashJoin Inner BuildRight (42) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- Union (34) + : : : :- * Project (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (26) + : : : +- * Project (33) + : : : +- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet spark_catalog.default.catalog_returns (30) + : : +- ReusedExchange (35) + : +- BroadcastExchange (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet spark_catalog.default.catalog_page (38) + +- * HashAggregate (72) + +- Exchange (71) + +- * HashAggregate (70) + +- * Project (69) + +- * BroadcastHashJoin Inner BuildRight (68) + :- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- Union (60) + : : :- * Project (50) + : : : +- * Filter (49) + : : : +- * ColumnarToRow (48) + : : : +- Scan parquet spark_catalog.default.web_sales (47) + : : +- * Project (59) + : : +- * BroadcastHashJoin Inner BuildLeft (58) + : : :- BroadcastExchange (53) + : : : +- * ColumnarToRow (52) + : : : +- Scan parquet spark_catalog.default.web_returns (51) + : : +- * Project (57) + : : +- * Filter (56) + : : +- * ColumnarToRow (55) + : : +- Scan parquet spark_catalog.default.web_sales (54) + : +- ReusedExchange (61) + +- BroadcastExchange (67) + +- * Filter (66) + +- * ColumnarToRow (65) + +- Scan parquet spark_catalog.default.web_site (64) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(4) Project [codegen id : 1] +Output [6]: [ss_store_sk#1 AS store_sk#5, ss_sold_date_sk#4 AS date_sk#6, ss_ext_sales_price#2 AS sales_price#7, ss_net_profit#3 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(5) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#14)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(7) Filter [codegen id : 2] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#11) + +(8) Project [codegen id : 2] +Output [6]: [sr_store_sk#11 AS store_sk#15, sr_returned_date_sk#14 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#12 AS return_amt#19, sr_net_loss#13 AS net_loss#20] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(9) Union + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-08-23)) AND (d_date#22 <= 2000-09-06)) AND isnotnull(d_date_sk#21)) + +(13) Project [codegen id : 3] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_date#22] + +(14) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [date_sk#6] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#23, s_store_id#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [2]: [s_store_sk#23, s_store_id#24] + +(19) Filter [codegen id : 4] +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(20) BroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [s_store_sk#23] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#23, s_store_id#24] + +(23) HashAggregate [codegen id : 5] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum#25, sum#26, sum#27, sum#28] +Results [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] + +(24) Exchange +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(25) HashAggregate [codegen id : 6] +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Keys [1]: [s_store_id#24] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#33, sum(UnscaledValue(return_amt#9))#34, sum(UnscaledValue(profit#8))#35, sum(UnscaledValue(net_loss#10))#36] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#33,17,2) AS sales#37, MakeDecimal(sum(UnscaledValue(return_amt#9))#34,17,2) AS returns#38, (MakeDecimal(sum(UnscaledValue(profit#8))#35,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#10))#36,17,2)) AS profit#39, store channel AS channel#40, concat(store, s_store_id#24) AS id#41] + +(26) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#45)] +PushedFilters: [IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(28) Filter [codegen id : 7] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : isnotnull(cs_catalog_page_sk#42) + +(29) Project [codegen id : 7] +Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(30) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#55)] +PushedFilters: [IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] + +(32) Filter [codegen id : 8] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Condition : isnotnull(cr_catalog_page_sk#52) + +(33) Project [codegen id : 8] +Output [6]: [cr_catalog_page_sk#52 AS page_sk#56, cr_returned_date_sk#55 AS date_sk#57, 0.00 AS sales_price#58, 0.00 AS profit#59, cr_return_amount#53 AS return_amt#60, cr_net_loss#54 AS net_loss#61] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] + +(34) Union + +(35) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#62] + +(36) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#47] +Right keys [1]: [d_date_sk#62] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 11] +Output [5]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51] +Input [7]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, d_date_sk#62] + +(38) Scan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] + +(40) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Condition : isnotnull(cp_catalog_page_sk#63) + +(41) BroadcastExchange +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [page_sk#46] +Right keys [1]: [cp_catalog_page_sk#63] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 11] +Output [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Input [7]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_sk#63, cp_catalog_page_id#64] + +(44) HashAggregate [codegen id : 11] +Input [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [partial_sum(UnscaledValue(sales_price#48)), partial_sum(UnscaledValue(return_amt#50)), partial_sum(UnscaledValue(profit#49)), partial_sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum#65, sum#66, sum#67, sum#68] +Results [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] + +(45) Exchange +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Arguments: hashpartitioning(cp_catalog_page_id#64, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(46) HashAggregate [codegen id : 12] +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#77, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#78, (MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2)) AS profit#79, catalog channel AS channel#80, concat(catalog_page, cp_catalog_page_id#64) AS id#81] + +(47) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#85)] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] + +(49) Filter [codegen id : 13] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Condition : isnotnull(ws_web_site_sk#82) + +(50) Project [codegen id : 13] +Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] + +(51) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#96)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 14] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] + +(53) BroadcastExchange +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [plan_id=6] + +(54) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(55) ColumnarToRow +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] + +(56) Filter +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Condition : ((isnotnull(ws_item_sk#97) AND isnotnull(ws_order_number#99)) AND isnotnull(ws_web_site_sk#98)) + +(57) Project +Output [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] + +(58) BroadcastHashJoin [codegen id : 15] +Left keys [2]: [wr_item_sk#92, wr_order_number#93] +Right keys [2]: [ws_item_sk#97, ws_order_number#99] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 15] +Output [6]: [ws_web_site_sk#98 AS wsr_web_site_sk#101, wr_returned_date_sk#96 AS date_sk#102, 0.00 AS sales_price#103, 0.00 AS profit#104, wr_return_amt#94 AS return_amt#105, wr_net_loss#95 AS net_loss#106] +Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96, ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] + +(60) Union + +(61) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#107] + +(62) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [date_sk#87] +Right keys [1]: [d_date_sk#107] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 18] +Output [5]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91] +Input [7]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, d_date_sk#107] + +(64) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#108, web_site_id#109] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 17] +Input [2]: [web_site_sk#108, web_site_id#109] + +(66) Filter [codegen id : 17] +Input [2]: [web_site_sk#108, web_site_id#109] +Condition : isnotnull(web_site_sk#108) + +(67) BroadcastExchange +Input [2]: [web_site_sk#108, web_site_id#109] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(68) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [wsr_web_site_sk#86] +Right keys [1]: [web_site_sk#108] +Join type: Inner +Join condition: None + +(69) Project [codegen id : 18] +Output [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Input [7]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_sk#108, web_site_id#109] + +(70) HashAggregate [codegen id : 18] +Input [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Keys [1]: [web_site_id#109] +Functions [4]: [partial_sum(UnscaledValue(sales_price#88)), partial_sum(UnscaledValue(return_amt#90)), partial_sum(UnscaledValue(profit#89)), partial_sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum#110, sum#111, sum#112, sum#113] +Results [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] + +(71) Exchange +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Arguments: hashpartitioning(web_site_id#109, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(72) HashAggregate [codegen id : 19] +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Keys [1]: [web_site_id#109] +Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#123, (MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2)) AS profit#124, web channel AS channel#125, concat(web_site, web_site_id#109) AS id#126] + +(73) Union + +(74) Expand [codegen id : 20] +Input [5]: [sales#37, returns#38, profit#39, channel#40, id#41] +Arguments: [[sales#37, returns#38, profit#39, channel#40, id#41, 0], [sales#37, returns#38, profit#39, channel#40, null, 1], [sales#37, returns#38, profit#39, null, null, 3]], [sales#37, returns#38, profit#39, channel#127, id#128, spark_grouping_id#129] + +(75) HashAggregate [codegen id : 20] +Input [6]: [sales#37, returns#38, profit#39, channel#127, id#128, spark_grouping_id#129] +Keys [3]: [channel#127, id#128, spark_grouping_id#129] +Functions [3]: [partial_sum(sales#37), partial_sum(returns#38), partial_sum(profit#39)] +Aggregate Attributes [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] +Results [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] + +(76) Exchange +Input [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Arguments: hashpartitioning(channel#127, id#128, spark_grouping_id#129, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(77) HashAggregate [codegen id : 21] +Input [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Keys [3]: [channel#127, id#128, spark_grouping_id#129] +Functions [3]: [sum(sales#37), sum(returns#38), sum(profit#39)] +Aggregate Attributes [3]: [sum(sales#37)#142, sum(returns#38)#143, sum(profit#39)#144] +Results [5]: [channel#127, id#128, sum(sales#37)#142 AS sales#145, sum(returns#38)#143 AS returns#146, sum(profit#39)#144 AS profit#147] + +(78) TakeOrderedAndProject +Input [5]: [channel#127, id#128, sales#145, returns#146, profit#147] +Arguments: 100, [channel#127 ASC NULLS FIRST, id#128 ASC NULLS FIRST], [channel#127, id#128, sales#145, returns#146, profit#147] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9893a05dca --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_datafusion/simplified.txt @@ -0,0 +1,123 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (21) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (20) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen (5) + HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] + Project [store_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + WholeStageCodegen (2) + Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] + Filter [sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + InputAdapter + Exchange [cp_catalog_page_id] #5 + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] + Project [page_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen (19) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + InputAdapter + Exchange [web_site_id] #7 + WholeStageCodegen (18) + HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] + Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (13) + Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] + Filter [ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + WholeStageCodegen (15) + Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (14) + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + Project [ws_item_sk,ws_web_site_sk,ws_order_number] + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..31072e61eb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_iceberg_compat/explain.txt @@ -0,0 +1,448 @@ +== Physical Plan == +TakeOrderedAndProject (78) ++- * HashAggregate (77) + +- Exchange (76) + +- * HashAggregate (75) + +- * Expand (74) + +- Union (73) + :- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- Union (9) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet spark_catalog.default.store_returns (5) + : : +- BroadcastExchange (14) + : : +- * Project (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.date_dim (10) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.store (17) + :- * HashAggregate (46) + : +- Exchange (45) + : +- * HashAggregate (44) + : +- * Project (43) + : +- * BroadcastHashJoin Inner BuildRight (42) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- Union (34) + : : : :- * Project (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (26) + : : : +- * Project (33) + : : : +- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet spark_catalog.default.catalog_returns (30) + : : +- ReusedExchange (35) + : +- BroadcastExchange (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet spark_catalog.default.catalog_page (38) + +- * HashAggregate (72) + +- Exchange (71) + +- * HashAggregate (70) + +- * Project (69) + +- * BroadcastHashJoin Inner BuildRight (68) + :- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- Union (60) + : : :- * Project (50) + : : : +- * Filter (49) + : : : +- * ColumnarToRow (48) + : : : +- Scan parquet spark_catalog.default.web_sales (47) + : : +- * Project (59) + : : +- * BroadcastHashJoin Inner BuildLeft (58) + : : :- BroadcastExchange (53) + : : : +- * ColumnarToRow (52) + : : : +- Scan parquet spark_catalog.default.web_returns (51) + : : +- * Project (57) + : : +- * Filter (56) + : : +- * ColumnarToRow (55) + : : +- Scan parquet spark_catalog.default.web_sales (54) + : +- ReusedExchange (61) + +- BroadcastExchange (67) + +- * Filter (66) + +- * ColumnarToRow (65) + +- Scan parquet spark_catalog.default.web_site (64) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(4) Project [codegen id : 1] +Output [6]: [ss_store_sk#1 AS store_sk#5, ss_sold_date_sk#4 AS date_sk#6, ss_ext_sales_price#2 AS sales_price#7, ss_net_profit#3 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(5) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#14)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(7) Filter [codegen id : 2] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#11) + +(8) Project [codegen id : 2] +Output [6]: [sr_store_sk#11 AS store_sk#15, sr_returned_date_sk#14 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#12 AS return_amt#19, sr_net_loss#13 AS net_loss#20] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(9) Union + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-08-23)) AND (d_date#22 <= 2000-09-06)) AND isnotnull(d_date_sk#21)) + +(13) Project [codegen id : 3] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_date#22] + +(14) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [date_sk#6] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#23, s_store_id#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [2]: [s_store_sk#23, s_store_id#24] + +(19) Filter [codegen id : 4] +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(20) BroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [s_store_sk#23] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#23, s_store_id#24] + +(23) HashAggregate [codegen id : 5] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum#25, sum#26, sum#27, sum#28] +Results [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] + +(24) Exchange +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(25) HashAggregate [codegen id : 6] +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Keys [1]: [s_store_id#24] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#33, sum(UnscaledValue(return_amt#9))#34, sum(UnscaledValue(profit#8))#35, sum(UnscaledValue(net_loss#10))#36] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#33,17,2) AS sales#37, MakeDecimal(sum(UnscaledValue(return_amt#9))#34,17,2) AS returns#38, (MakeDecimal(sum(UnscaledValue(profit#8))#35,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#10))#36,17,2)) AS profit#39, store channel AS channel#40, concat(store, s_store_id#24) AS id#41] + +(26) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#45)] +PushedFilters: [IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(28) Filter [codegen id : 7] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : isnotnull(cs_catalog_page_sk#42) + +(29) Project [codegen id : 7] +Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(30) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#55)] +PushedFilters: [IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] + +(32) Filter [codegen id : 8] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Condition : isnotnull(cr_catalog_page_sk#52) + +(33) Project [codegen id : 8] +Output [6]: [cr_catalog_page_sk#52 AS page_sk#56, cr_returned_date_sk#55 AS date_sk#57, 0.00 AS sales_price#58, 0.00 AS profit#59, cr_return_amount#53 AS return_amt#60, cr_net_loss#54 AS net_loss#61] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] + +(34) Union + +(35) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#62] + +(36) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#47] +Right keys [1]: [d_date_sk#62] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 11] +Output [5]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51] +Input [7]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, d_date_sk#62] + +(38) Scan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] + +(40) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Condition : isnotnull(cp_catalog_page_sk#63) + +(41) BroadcastExchange +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [page_sk#46] +Right keys [1]: [cp_catalog_page_sk#63] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 11] +Output [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Input [7]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_sk#63, cp_catalog_page_id#64] + +(44) HashAggregate [codegen id : 11] +Input [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [partial_sum(UnscaledValue(sales_price#48)), partial_sum(UnscaledValue(return_amt#50)), partial_sum(UnscaledValue(profit#49)), partial_sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum#65, sum#66, sum#67, sum#68] +Results [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] + +(45) Exchange +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Arguments: hashpartitioning(cp_catalog_page_id#64, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(46) HashAggregate [codegen id : 12] +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#77, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#78, (MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2)) AS profit#79, catalog channel AS channel#80, concat(catalog_page, cp_catalog_page_id#64) AS id#81] + +(47) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#85)] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] + +(49) Filter [codegen id : 13] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Condition : isnotnull(ws_web_site_sk#82) + +(50) Project [codegen id : 13] +Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] + +(51) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#96)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 14] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] + +(53) BroadcastExchange +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [plan_id=6] + +(54) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(55) ColumnarToRow +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] + +(56) Filter +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Condition : ((isnotnull(ws_item_sk#97) AND isnotnull(ws_order_number#99)) AND isnotnull(ws_web_site_sk#98)) + +(57) Project +Output [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] + +(58) BroadcastHashJoin [codegen id : 15] +Left keys [2]: [wr_item_sk#92, wr_order_number#93] +Right keys [2]: [ws_item_sk#97, ws_order_number#99] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 15] +Output [6]: [ws_web_site_sk#98 AS wsr_web_site_sk#101, wr_returned_date_sk#96 AS date_sk#102, 0.00 AS sales_price#103, 0.00 AS profit#104, wr_return_amt#94 AS return_amt#105, wr_net_loss#95 AS net_loss#106] +Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96, ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] + +(60) Union + +(61) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#107] + +(62) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [date_sk#87] +Right keys [1]: [d_date_sk#107] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 18] +Output [5]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91] +Input [7]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, d_date_sk#107] + +(64) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#108, web_site_id#109] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 17] +Input [2]: [web_site_sk#108, web_site_id#109] + +(66) Filter [codegen id : 17] +Input [2]: [web_site_sk#108, web_site_id#109] +Condition : isnotnull(web_site_sk#108) + +(67) BroadcastExchange +Input [2]: [web_site_sk#108, web_site_id#109] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(68) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [wsr_web_site_sk#86] +Right keys [1]: [web_site_sk#108] +Join type: Inner +Join condition: None + +(69) Project [codegen id : 18] +Output [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Input [7]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_sk#108, web_site_id#109] + +(70) HashAggregate [codegen id : 18] +Input [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Keys [1]: [web_site_id#109] +Functions [4]: [partial_sum(UnscaledValue(sales_price#88)), partial_sum(UnscaledValue(return_amt#90)), partial_sum(UnscaledValue(profit#89)), partial_sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum#110, sum#111, sum#112, sum#113] +Results [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] + +(71) Exchange +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Arguments: hashpartitioning(web_site_id#109, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(72) HashAggregate [codegen id : 19] +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Keys [1]: [web_site_id#109] +Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#123, (MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2)) AS profit#124, web channel AS channel#125, concat(web_site, web_site_id#109) AS id#126] + +(73) Union + +(74) Expand [codegen id : 20] +Input [5]: [sales#37, returns#38, profit#39, channel#40, id#41] +Arguments: [[sales#37, returns#38, profit#39, channel#40, id#41, 0], [sales#37, returns#38, profit#39, channel#40, null, 1], [sales#37, returns#38, profit#39, null, null, 3]], [sales#37, returns#38, profit#39, channel#127, id#128, spark_grouping_id#129] + +(75) HashAggregate [codegen id : 20] +Input [6]: [sales#37, returns#38, profit#39, channel#127, id#128, spark_grouping_id#129] +Keys [3]: [channel#127, id#128, spark_grouping_id#129] +Functions [3]: [partial_sum(sales#37), partial_sum(returns#38), partial_sum(profit#39)] +Aggregate Attributes [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] +Results [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] + +(76) Exchange +Input [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Arguments: hashpartitioning(channel#127, id#128, spark_grouping_id#129, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(77) HashAggregate [codegen id : 21] +Input [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Keys [3]: [channel#127, id#128, spark_grouping_id#129] +Functions [3]: [sum(sales#37), sum(returns#38), sum(profit#39)] +Aggregate Attributes [3]: [sum(sales#37)#142, sum(returns#38)#143, sum(profit#39)#144] +Results [5]: [channel#127, id#128, sum(sales#37)#142 AS sales#145, sum(returns#38)#143 AS returns#146, sum(profit#39)#144 AS profit#147] + +(78) TakeOrderedAndProject +Input [5]: [channel#127, id#128, sales#145, returns#146, profit#147] +Arguments: 100, [channel#127 ASC NULLS FIRST, id#128 ASC NULLS FIRST], [channel#127, id#128, sales#145, returns#146, profit#147] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9893a05dca --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.native_iceberg_compat/simplified.txt @@ -0,0 +1,123 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (21) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (20) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen (5) + HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] + Project [store_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + WholeStageCodegen (2) + Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] + Filter [sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + InputAdapter + Exchange [cp_catalog_page_id] #5 + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] + Project [page_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen (19) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum] + InputAdapter + Exchange [web_site_id] #7 + WholeStageCodegen (18) + HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] + Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (13) + Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] + Filter [ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + WholeStageCodegen (15) + Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (14) + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + Project [ws_item_sk,ws_web_site_sk,ws_order_number] + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_datafusion/explain.txt new file mode 100644 index 0000000000..7214b3b58e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_datafusion/explain.txt @@ -0,0 +1,189 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.store_returns (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.store (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.date_dim (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet spark_catalog.default.date_dim (22) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#9)] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(6) Filter [codegen id : 1] +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : ((isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#6)) AND isnotnull(sr_customer_sk#7)) + +(7) BroadcastExchange +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(input[2, int, false], input[0, int, false], input[1, int, false]),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [3]: [ss_ticket_number#4, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [sr_ticket_number#8, sr_item_sk#6, sr_customer_sk#7] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [3]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(10) Scan parquet spark_catalog.default.store +Output [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(12) Filter [codegen id : 2] +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Condition : isnotnull(s_store_sk#10) + +(13) BroadcastExchange +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Input [14]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(16) Scan parquet spark_catalog.default.date_dim +Output [1]: [d_date_sk#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [1]: [d_date_sk#21] + +(18) Filter [codegen id : 3] +Input [1]: [d_date_sk#21] +Condition : isnotnull(d_date_sk#21) + +(19) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, d_date_sk#21] + +(22) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#22, d_year#23, d_moy#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(24) Filter [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Condition : ((((isnotnull(d_year#23) AND isnotnull(d_moy#24)) AND (d_year#23 = 2001)) AND (d_moy#24 = 8)) AND isnotnull(d_date_sk#22)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#22] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(26) BroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#9] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, d_date_sk#22] + +(29) HashAggregate [codegen id : 5] +Input [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Keys [10]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum#25, sum#26, sum#27, sum#28, sum#29] +Results [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#30, sum#31, sum#32, sum#33, sum#34] + +(30) Exchange +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#30, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(31) HashAggregate [codegen id : 6] +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#30, sum#31, sum#32, sum#33, sum#34] +Keys [10]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#35, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#36, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#37, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#38, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#39] +Results [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#35 AS 30 days #40, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#36 AS 31 - 60 days #41, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#37 AS 61 - 90 days #42, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#38 AS 91 - 120 days #43, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#39 AS >120 days #44] + +(32) TakeOrderedAndProject +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #40, 31 - 60 days #41, 61 - 90 days #42, 91 - 120 days #43, >120 days #44] +Arguments: 100, [s_store_name#11 ASC NULLS FIRST, s_company_id#12 ASC NULLS FIRST, s_street_number#13 ASC NULLS FIRST, s_street_name#14 ASC NULLS FIRST, s_street_type#15 ASC NULLS FIRST, s_suite_number#16 ASC NULLS FIRST, s_city#17 ASC NULLS FIRST, s_county#18 ASC NULLS FIRST, s_state#19 ASC NULLS FIRST, s_zip#20 ASC NULLS FIRST], [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #40, 31 - 60 days #41, 61 - 90 days #42, 91 - 120 days #43, >120 days #44] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_datafusion/simplified.txt new file mode 100644 index 0000000000..0108f64701 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_datafusion/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (6) + HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(CASE WHEN ((sr_returned_date_sk - ss_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 30) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 60) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 90) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((sr_returned_date_sk - ss_sold_date_sk) > 120) THEN 1 ELSE 0 END),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1 + WholeStageCodegen (5) + HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sr_returned_date_sk,ss_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,ss_customer_sk,sr_ticket_number,sr_item_sk,sr_customer_sk] + Filter [ss_ticket_number,ss_item_sk,ss_customer_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_ticket_number,sr_item_sk,sr_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..7214b3b58e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_iceberg_compat/explain.txt @@ -0,0 +1,189 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.store_returns (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.store (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.date_dim (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet spark_catalog.default.date_dim (22) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#9)] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(6) Filter [codegen id : 1] +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : ((isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#6)) AND isnotnull(sr_customer_sk#7)) + +(7) BroadcastExchange +Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(input[2, int, false], input[0, int, false], input[1, int, false]),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [3]: [ss_ticket_number#4, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [sr_ticket_number#8, sr_item_sk#6, sr_customer_sk#7] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [3]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(10) Scan parquet spark_catalog.default.store +Output [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(12) Filter [codegen id : 2] +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Condition : isnotnull(s_store_sk#10) + +(13) BroadcastExchange +Input [11]: [s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Input [14]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_sk#10, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] + +(16) Scan parquet spark_catalog.default.date_dim +Output [1]: [d_date_sk#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [1]: [d_date_sk#21] + +(18) Filter [codegen id : 3] +Input [1]: [d_date_sk#21] +Condition : isnotnull(d_date_sk#21) + +(19) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, d_date_sk#21] + +(22) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#22, d_year#23, d_moy#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(24) Filter [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Condition : ((((isnotnull(d_year#23) AND isnotnull(d_moy#24)) AND (d_year#23 = 2001)) AND (d_moy#24 = 8)) AND isnotnull(d_date_sk#22)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#22] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(26) BroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#9] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, d_date_sk#22] + +(29) HashAggregate [codegen id : 5] +Input [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Keys [10]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum#25, sum#26, sum#27, sum#28, sum#29] +Results [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#30, sum#31, sum#32, sum#33, sum#34] + +(30) Exchange +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#30, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(31) HashAggregate [codegen id : 6] +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum#30, sum#31, sum#32, sum#33, sum#34] +Keys [10]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20] +Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#35, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#36, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#37, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#38, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#39] +Results [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#35 AS 30 days #40, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#36 AS 31 - 60 days #41, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#37 AS 61 - 90 days #42, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#38 AS 91 - 120 days #43, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#39 AS >120 days #44] + +(32) TakeOrderedAndProject +Input [15]: [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #40, 31 - 60 days #41, 61 - 90 days #42, 91 - 120 days #43, >120 days #44] +Arguments: 100, [s_store_name#11 ASC NULLS FIRST, s_company_id#12 ASC NULLS FIRST, s_street_number#13 ASC NULLS FIRST, s_street_name#14 ASC NULLS FIRST, s_street_type#15 ASC NULLS FIRST, s_suite_number#16 ASC NULLS FIRST, s_city#17 ASC NULLS FIRST, s_county#18 ASC NULLS FIRST, s_state#19 ASC NULLS FIRST, s_zip#20 ASC NULLS FIRST], [s_store_name#11, s_company_id#12, s_street_number#13, s_street_name#14, s_street_type#15, s_suite_number#16, s_city#17, s_county#18, s_state#19, s_zip#20, 30 days #40, 31 - 60 days #41, 61 - 90 days #42, 91 - 120 days #43, >120 days #44] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..0108f64701 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.native_iceberg_compat/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (6) + HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(CASE WHEN ((sr_returned_date_sk - ss_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 30) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 60) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_sk - ss_sold_date_sk) > 90) AND ((sr_returned_date_sk - ss_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((sr_returned_date_sk - ss_sold_date_sk) > 120) THEN 1 ELSE 0 END),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1 + WholeStageCodegen (5) + HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sr_returned_date_sk,ss_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,ss_customer_sk,sr_ticket_number,sr_item_sk,sr_customer_sk] + Filter [ss_ticket_number,ss_item_sk,ss_customer_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [sr_ticket_number,sr_item_sk,sr_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_datafusion/explain.txt new file mode 100644 index 0000000000..3bb6c1d637 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_datafusion/explain.txt @@ -0,0 +1,233 @@ +== Physical Plan == +TakeOrderedAndProject (41) ++- * Filter (40) + +- Window (39) + +- * Sort (38) + +- Exchange (37) + +- * Project (36) + +- * SortMergeJoin FullOuter (35) + :- * Sort (19) + : +- Exchange (18) + : +- * Project (17) + : +- Window (16) + : +- * Sort (15) + : +- Exchange (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.date_dim (4) + +- * Sort (34) + +- Exchange (33) + +- * Project (32) + +- Window (31) + +- * Sort (30) + +- Exchange (29) + +- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet spark_catalog.default.store_sales (20) + +- ReusedExchange (23) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#4, d_date#5] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(8) BroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Input [5]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3, d_date_sk#4, d_date#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#2))] +Aggregate Attributes [1]: [sum#7] +Results [3]: [ws_item_sk#1, d_date#5, sum#8] + +(12) Exchange +Input [3]: [ws_item_sk#1, d_date#5, sum#8] +Arguments: hashpartitioning(ws_item_sk#1, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(13) HashAggregate [codegen id : 3] +Input [3]: [ws_item_sk#1, d_date#5, sum#8] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#9] +Results [4]: [ws_item_sk#1 AS item_sk#10, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#9,17,2) AS _w0#11, ws_item_sk#1] + +(14) Exchange +Input [4]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(15) Sort [codegen id : 4] +Input [4]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1] +Arguments: [ws_item_sk#1 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(16) Window +Input [4]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1] +Arguments: [sum(_w0#11) windowspecdefinition(ws_item_sk#1, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ws_item_sk#1], [d_date#5 ASC NULLS FIRST] + +(17) Project [codegen id : 5] +Output [3]: [item_sk#10, d_date#5, cume_sales#12] +Input [5]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1, cume_sales#12] + +(18) Exchange +Input [3]: [item_sk#10, d_date#5, cume_sales#12] +Arguments: hashpartitioning(item_sk#10, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(19) Sort [codegen id : 6] +Input [3]: [item_sk#10, d_date#5, cume_sales#12] +Arguments: [item_sk#10 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(20) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#15)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 8] +Input [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] + +(22) Filter [codegen id : 8] +Input [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] +Condition : isnotnull(ss_item_sk#13) + +(23) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#16, d_date#17] + +(24) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 8] +Output [3]: [ss_item_sk#13, ss_sales_price#14, d_date#17] +Input [5]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15, d_date_sk#16, d_date#17] + +(26) HashAggregate [codegen id : 8] +Input [3]: [ss_item_sk#13, ss_sales_price#14, d_date#17] +Keys [2]: [ss_item_sk#13, d_date#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [ss_item_sk#13, d_date#17, sum#19] + +(27) Exchange +Input [3]: [ss_item_sk#13, d_date#17, sum#19] +Arguments: hashpartitioning(ss_item_sk#13, d_date#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(28) HashAggregate [codegen id : 9] +Input [3]: [ss_item_sk#13, d_date#17, sum#19] +Keys [2]: [ss_item_sk#13, d_date#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#14))#20] +Results [4]: [ss_item_sk#13 AS item_sk#21, d_date#17, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#20,17,2) AS _w0#22, ss_item_sk#13] + +(29) Exchange +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: hashpartitioning(ss_item_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(30) Sort [codegen id : 10] +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: [ss_item_sk#13 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0 + +(31) Window +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: [sum(_w0#22) windowspecdefinition(ss_item_sk#13, d_date#17 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#23], [ss_item_sk#13], [d_date#17 ASC NULLS FIRST] + +(32) Project [codegen id : 11] +Output [3]: [item_sk#21, d_date#17, cume_sales#23] +Input [5]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13, cume_sales#23] + +(33) Exchange +Input [3]: [item_sk#21, d_date#17, cume_sales#23] +Arguments: hashpartitioning(item_sk#21, d_date#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(34) Sort [codegen id : 12] +Input [3]: [item_sk#21, d_date#17, cume_sales#23] +Arguments: [item_sk#21 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin [codegen id : 13] +Left keys [2]: [item_sk#10, d_date#5] +Right keys [2]: [item_sk#21, d_date#17] +Join type: FullOuter +Join condition: None + +(36) Project [codegen id : 13] +Output [4]: [CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#21 END AS item_sk#24, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#17 END AS d_date#25, cume_sales#12 AS web_sales#26, cume_sales#23 AS store_sales#27] +Input [6]: [item_sk#10, d_date#5, cume_sales#12, item_sk#21, d_date#17, cume_sales#23] + +(37) Exchange +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: hashpartitioning(item_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(38) Sort [codegen id : 14] +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: [item_sk#24 ASC NULLS FIRST, d_date#25 ASC NULLS FIRST], false, 0 + +(39) Window +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: [max(web_sales#26) windowspecdefinition(item_sk#24, d_date#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#28, max(store_sales#27) windowspecdefinition(item_sk#24, d_date#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#29], [item_sk#24], [d_date#25 ASC NULLS FIRST] + +(40) Filter [codegen id : 15] +Input [6]: [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] +Condition : ((isnotnull(web_cumulative#28) AND isnotnull(store_cumulative#29)) AND (web_cumulative#28 > store_cumulative#29)) + +(41) TakeOrderedAndProject +Input [6]: [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] +Arguments: 100, [item_sk#24 ASC NULLS FIRST, d_date#25 ASC NULLS FIRST], [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_datafusion/simplified.txt new file mode 100644 index 0000000000..dd597f4e7b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_datafusion/simplified.txt @@ -0,0 +1,72 @@ +TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] + WholeStageCodegen (15) + Filter [web_cumulative,store_cumulative] + InputAdapter + Window [web_sales,item_sk,d_date,store_sales] + WholeStageCodegen (14) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (13) + Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] + SortMergeJoin [item_sk,d_date,item_sk,d_date] + InputAdapter + WholeStageCodegen (6) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen (5) + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ws_item_sk,d_date] + WholeStageCodegen (4) + Sort [ws_item_sk,d_date] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,d_date,sum] [sum(UnscaledValue(ws_sales_price)),item_sk,_w0,sum] + InputAdapter + Exchange [ws_item_sk,d_date] #4 + WholeStageCodegen (2) + HashAggregate [ws_item_sk,d_date,ws_sales_price] [sum,sum] + Project [ws_item_sk,ws_sales_price,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + WholeStageCodegen (12) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #6 + WholeStageCodegen (11) + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ss_item_sk,d_date] + WholeStageCodegen (10) + Sort [ss_item_sk,d_date] + InputAdapter + Exchange [ss_item_sk] #7 + WholeStageCodegen (9) + HashAggregate [ss_item_sk,d_date,sum] [sum(UnscaledValue(ss_sales_price)),item_sk,_w0,sum] + InputAdapter + Exchange [ss_item_sk,d_date] #8 + WholeStageCodegen (8) + HashAggregate [ss_item_sk,d_date,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_sales_price,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..3bb6c1d637 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_iceberg_compat/explain.txt @@ -0,0 +1,233 @@ +== Physical Plan == +TakeOrderedAndProject (41) ++- * Filter (40) + +- Window (39) + +- * Sort (38) + +- Exchange (37) + +- * Project (36) + +- * SortMergeJoin FullOuter (35) + :- * Sort (19) + : +- Exchange (18) + : +- * Project (17) + : +- Window (16) + : +- * Sort (15) + : +- Exchange (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.date_dim (4) + +- * Sort (34) + +- Exchange (33) + +- * Project (32) + +- Window (31) + +- * Sort (30) + +- Exchange (29) + +- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet spark_catalog.default.store_sales (20) + +- ReusedExchange (23) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#4, d_date#5] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(8) BroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Input [5]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3, d_date_sk#4, d_date#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#2))] +Aggregate Attributes [1]: [sum#7] +Results [3]: [ws_item_sk#1, d_date#5, sum#8] + +(12) Exchange +Input [3]: [ws_item_sk#1, d_date#5, sum#8] +Arguments: hashpartitioning(ws_item_sk#1, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(13) HashAggregate [codegen id : 3] +Input [3]: [ws_item_sk#1, d_date#5, sum#8] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#9] +Results [4]: [ws_item_sk#1 AS item_sk#10, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#9,17,2) AS _w0#11, ws_item_sk#1] + +(14) Exchange +Input [4]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(15) Sort [codegen id : 4] +Input [4]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1] +Arguments: [ws_item_sk#1 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(16) Window +Input [4]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1] +Arguments: [sum(_w0#11) windowspecdefinition(ws_item_sk#1, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ws_item_sk#1], [d_date#5 ASC NULLS FIRST] + +(17) Project [codegen id : 5] +Output [3]: [item_sk#10, d_date#5, cume_sales#12] +Input [5]: [item_sk#10, d_date#5, _w0#11, ws_item_sk#1, cume_sales#12] + +(18) Exchange +Input [3]: [item_sk#10, d_date#5, cume_sales#12] +Arguments: hashpartitioning(item_sk#10, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(19) Sort [codegen id : 6] +Input [3]: [item_sk#10, d_date#5, cume_sales#12] +Arguments: [item_sk#10 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(20) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#15)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 8] +Input [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] + +(22) Filter [codegen id : 8] +Input [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] +Condition : isnotnull(ss_item_sk#13) + +(23) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#16, d_date#17] + +(24) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 8] +Output [3]: [ss_item_sk#13, ss_sales_price#14, d_date#17] +Input [5]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15, d_date_sk#16, d_date#17] + +(26) HashAggregate [codegen id : 8] +Input [3]: [ss_item_sk#13, ss_sales_price#14, d_date#17] +Keys [2]: [ss_item_sk#13, d_date#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [ss_item_sk#13, d_date#17, sum#19] + +(27) Exchange +Input [3]: [ss_item_sk#13, d_date#17, sum#19] +Arguments: hashpartitioning(ss_item_sk#13, d_date#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(28) HashAggregate [codegen id : 9] +Input [3]: [ss_item_sk#13, d_date#17, sum#19] +Keys [2]: [ss_item_sk#13, d_date#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#14))#20] +Results [4]: [ss_item_sk#13 AS item_sk#21, d_date#17, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#20,17,2) AS _w0#22, ss_item_sk#13] + +(29) Exchange +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: hashpartitioning(ss_item_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(30) Sort [codegen id : 10] +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: [ss_item_sk#13 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0 + +(31) Window +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: [sum(_w0#22) windowspecdefinition(ss_item_sk#13, d_date#17 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#23], [ss_item_sk#13], [d_date#17 ASC NULLS FIRST] + +(32) Project [codegen id : 11] +Output [3]: [item_sk#21, d_date#17, cume_sales#23] +Input [5]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13, cume_sales#23] + +(33) Exchange +Input [3]: [item_sk#21, d_date#17, cume_sales#23] +Arguments: hashpartitioning(item_sk#21, d_date#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(34) Sort [codegen id : 12] +Input [3]: [item_sk#21, d_date#17, cume_sales#23] +Arguments: [item_sk#21 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin [codegen id : 13] +Left keys [2]: [item_sk#10, d_date#5] +Right keys [2]: [item_sk#21, d_date#17] +Join type: FullOuter +Join condition: None + +(36) Project [codegen id : 13] +Output [4]: [CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#21 END AS item_sk#24, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#17 END AS d_date#25, cume_sales#12 AS web_sales#26, cume_sales#23 AS store_sales#27] +Input [6]: [item_sk#10, d_date#5, cume_sales#12, item_sk#21, d_date#17, cume_sales#23] + +(37) Exchange +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: hashpartitioning(item_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(38) Sort [codegen id : 14] +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: [item_sk#24 ASC NULLS FIRST, d_date#25 ASC NULLS FIRST], false, 0 + +(39) Window +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: [max(web_sales#26) windowspecdefinition(item_sk#24, d_date#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#28, max(store_sales#27) windowspecdefinition(item_sk#24, d_date#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#29], [item_sk#24], [d_date#25 ASC NULLS FIRST] + +(40) Filter [codegen id : 15] +Input [6]: [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] +Condition : ((isnotnull(web_cumulative#28) AND isnotnull(store_cumulative#29)) AND (web_cumulative#28 > store_cumulative#29)) + +(41) TakeOrderedAndProject +Input [6]: [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] +Arguments: 100, [item_sk#24 ASC NULLS FIRST, d_date#25 ASC NULLS FIRST], [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..dd597f4e7b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.native_iceberg_compat/simplified.txt @@ -0,0 +1,72 @@ +TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] + WholeStageCodegen (15) + Filter [web_cumulative,store_cumulative] + InputAdapter + Window [web_sales,item_sk,d_date,store_sales] + WholeStageCodegen (14) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (13) + Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] + SortMergeJoin [item_sk,d_date,item_sk,d_date] + InputAdapter + WholeStageCodegen (6) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen (5) + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ws_item_sk,d_date] + WholeStageCodegen (4) + Sort [ws_item_sk,d_date] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,d_date,sum] [sum(UnscaledValue(ws_sales_price)),item_sk,_w0,sum] + InputAdapter + Exchange [ws_item_sk,d_date] #4 + WholeStageCodegen (2) + HashAggregate [ws_item_sk,d_date,ws_sales_price] [sum,sum] + Project [ws_item_sk,ws_sales_price,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + WholeStageCodegen (12) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #6 + WholeStageCodegen (11) + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ss_item_sk,d_date] + WholeStageCodegen (10) + Sort [ss_item_sk,d_date] + InputAdapter + Exchange [ss_item_sk] #7 + WholeStageCodegen (9) + HashAggregate [ss_item_sk,d_date,sum] [sum(UnscaledValue(ss_sales_price)),item_sk,_w0,sum] + InputAdapter + Exchange [ss_item_sk,d_date] #8 + WholeStageCodegen (8) + HashAggregate [ss_item_sk,d_date,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_sales_price,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_datafusion/explain.txt new file mode 100644 index 0000000000..2cb91028d9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_datafusion/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet spark_catalog.default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.item (11) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(8) BroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(15) BroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] + +(19) Exchange +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS ext_price#16] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#14, brand#15, ext_price#16] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#2, brand_id#14, brand#15, ext_price#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_datafusion/simplified.txt new file mode 100644 index 0000000000..16e313682b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_datafusion/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [d_year,ext_price,brand_id,brand] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..2cb91028d9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_iceberg_compat/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet spark_catalog.default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.item (11) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(8) BroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(15) BroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] + +(19) Exchange +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS ext_price#16] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#14, brand#15, ext_price#16] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#2, brand_id#14, brand#15, ext_price#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..16e313682b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.native_iceberg_compat/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [d_year,ext_price,brand_id,brand] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_datafusion/explain.txt new file mode 100644 index 0000000000..dea00fce8d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_datafusion/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.item (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet spark_catalog.default.store_sales (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.date_dim (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet spark_catalog.default.store (18) + + +(1) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(And(In(i_category, [Books ,Children ,Electronics ]),In(i_class, [personal ,portable ,reference ,self-help ])),In(i_brand, [exportiunivamalg #6 ,scholaramalgamalg #7 ,scholaramalgamalg #8 ,scholaramalgamalg #6 ])),And(And(In(i_category, [Men ,Music ,Women ]),In(i_class, [accessories ,classical ,fragrances ,pants ])),In(i_brand, [amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Condition : ((((i_category#4 IN (Books ,Children ,Electronics ) AND i_class#3 IN (personal ,portable ,reference ,self-help )) AND i_brand#2 IN (scholaramalgamalg #7 ,scholaramalgamalg #8 ,exportiunivamalg #6 ,scholaramalgamalg #6 )) OR ((i_category#4 IN (Women ,Music ,Men ) AND i_class#3 IN (accessories ,classical ,fragrances ,pants )) AND i_brand#2 IN (amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [2]: [i_item_sk#1, i_manufact_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(5) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#13)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(7) Filter [codegen id : 1] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) + +(8) BroadcastExchange +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#10] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(11) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_month_seq, [1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Condition : (d_month_seq#15 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#14, d_qoy#16] +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] + +(15) BroadcastExchange +Input [2]: [d_date_sk#14, d_qoy#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#13] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16] +Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_qoy#16] + +(18) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [s_store_sk#17] + +(20) Filter [codegen id : 3] +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) + +(21) BroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#11] +Right keys [1]: [s_store_sk#17] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16, s_store_sk#17] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Keys [2]: [i_manufact_id#5, d_qoy#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manufact_id#5, d_qoy#16, sum#19] + +(25) Exchange +Input [3]: [i_manufact_id#5, d_qoy#16, sum#19] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manufact_id#5, d_qoy#16, sum#19] +Keys [2]: [i_manufact_id#5, d_qoy#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] + +(27) Exchange +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(28) Sort [codegen id : 6] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#23], [i_manufact_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] +Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_quarterly_sales#23)) / avg_quarterly_sales#23) > 0.1000000000000000) ELSE false END + +(31) Project [codegen id : 7] +Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] + +(32) TakeOrderedAndProject +Input [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Arguments: 100, [avg_quarterly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_datafusion/simplified.txt new file mode 100644 index 0000000000..3fa8af3ff4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_datafusion/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] + WholeStageCodegen (7) + Project [i_manufact_id,sum_sales,avg_quarterly_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen (6) + Sort [i_manufact_id] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (5) + HashAggregate [i_manufact_id,d_qoy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_manufact_id,d_qoy] #2 + WholeStageCodegen (4) + HashAggregate [i_manufact_id,d_qoy,ss_sales_price] [sum,sum] + Project [i_manufact_id,ss_sales_price,d_qoy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_manufact_id,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manufact_id] + Filter [i_category,i_class,i_brand,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_qoy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_qoy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..dea00fce8d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_iceberg_compat/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.item (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet spark_catalog.default.store_sales (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.date_dim (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet spark_catalog.default.store (18) + + +(1) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(And(In(i_category, [Books ,Children ,Electronics ]),In(i_class, [personal ,portable ,reference ,self-help ])),In(i_brand, [exportiunivamalg #6 ,scholaramalgamalg #7 ,scholaramalgamalg #8 ,scholaramalgamalg #6 ])),And(And(In(i_category, [Men ,Music ,Women ]),In(i_class, [accessories ,classical ,fragrances ,pants ])),In(i_brand, [amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Condition : ((((i_category#4 IN (Books ,Children ,Electronics ) AND i_class#3 IN (personal ,portable ,reference ,self-help )) AND i_brand#2 IN (scholaramalgamalg #7 ,scholaramalgamalg #8 ,exportiunivamalg #6 ,scholaramalgamalg #6 )) OR ((i_category#4 IN (Women ,Music ,Men ) AND i_class#3 IN (accessories ,classical ,fragrances ,pants )) AND i_brand#2 IN (amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [2]: [i_item_sk#1, i_manufact_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(5) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#13)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(7) Filter [codegen id : 1] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) + +(8) BroadcastExchange +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#10] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(11) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_month_seq, [1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] +Condition : (d_month_seq#15 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#14, d_qoy#16] +Input [3]: [d_date_sk#14, d_month_seq#15, d_qoy#16] + +(15) BroadcastExchange +Input [2]: [d_date_sk#14, d_qoy#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#13] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16] +Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_qoy#16] + +(18) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [s_store_sk#17] + +(20) Filter [codegen id : 3] +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) + +(21) BroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#11] +Right keys [1]: [s_store_sk#17] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16, s_store_sk#17] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Keys [2]: [i_manufact_id#5, d_qoy#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manufact_id#5, d_qoy#16, sum#19] + +(25) Exchange +Input [3]: [i_manufact_id#5, d_qoy#16, sum#19] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manufact_id#5, d_qoy#16, sum#19] +Keys [2]: [i_manufact_id#5, d_qoy#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] + +(27) Exchange +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(28) Sort [codegen id : 6] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#23], [i_manufact_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] +Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_quarterly_sales#23)) / avg_quarterly_sales#23) > 0.1000000000000000) ELSE false END + +(31) Project [codegen id : 7] +Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] + +(32) TakeOrderedAndProject +Input [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Arguments: 100, [avg_quarterly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..3fa8af3ff4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.native_iceberg_compat/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] + WholeStageCodegen (7) + Project [i_manufact_id,sum_sales,avg_quarterly_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen (6) + Sort [i_manufact_id] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen (5) + HashAggregate [i_manufact_id,d_qoy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_manufact_id,d_qoy] #2 + WholeStageCodegen (4) + HashAggregate [i_manufact_id,d_qoy,ss_sales_price] [sum,sum] + Project [i_manufact_id,ss_sales_price,d_qoy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_manufact_id,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manufact_id] + Filter [i_category,i_class,i_brand,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_qoy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_qoy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_datafusion/explain.txt new file mode 100644 index 0000000000..7d04d75608 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_datafusion/explain.txt @@ -0,0 +1,469 @@ +== Physical Plan == +TakeOrderedAndProject (64) ++- * HashAggregate (63) + +- Exchange (62) + +- * HashAggregate (61) + +- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- * Project (57) + +- * BroadcastHashJoin Inner BuildRight (56) + :- * Project (50) + : +- * BroadcastHashJoin Inner BuildRight (49) + : :- * Project (44) + : : +- * BroadcastHashJoin Inner BuildRight (43) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * HashAggregate (32) + : : : : +- Exchange (31) + : : : : +- * HashAggregate (30) + : : : : +- * Project (29) + : : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : : :- * Project (23) + : : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : : :- * Project (16) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : : : :- Union (9) + : : : : : : : :- * Project (4) + : : : : : : : : +- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : : : : +- * Project (8) + : : : : : : : +- * Filter (7) + : : : : : : : +- * ColumnarToRow (6) + : : : : : : : +- Scan parquet spark_catalog.default.web_sales (5) + : : : : : : +- BroadcastExchange (14) + : : : : : : +- * Project (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet spark_catalog.default.item (10) + : : : : : +- BroadcastExchange (21) + : : : : : +- * Project (20) + : : : : : +- * Filter (19) + : : : : : +- * ColumnarToRow (18) + : : : : : +- Scan parquet spark_catalog.default.date_dim (17) + : : : : +- BroadcastExchange (27) + : : : : +- * Filter (26) + : : : : +- * ColumnarToRow (25) + : : : : +- Scan parquet spark_catalog.default.customer (24) + : : : +- BroadcastExchange (36) + : : : +- * Filter (35) + : : : +- * ColumnarToRow (34) + : : : +- Scan parquet spark_catalog.default.store_sales (33) + : : +- BroadcastExchange (42) + : : +- * Filter (41) + : : +- * ColumnarToRow (40) + : : +- Scan parquet spark_catalog.default.customer_address (39) + : +- BroadcastExchange (48) + : +- * Filter (47) + : +- * ColumnarToRow (46) + : +- Scan parquet spark_catalog.default.store (45) + +- BroadcastExchange (55) + +- * Project (54) + +- * Filter (53) + +- * ColumnarToRow (52) + +- Scan parquet spark_catalog.default.date_dim (51) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] + +(3) Filter [codegen id : 1] +Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_bill_customer_sk#1)) + +(4) Project [codegen id : 1] +Output [3]: [cs_sold_date_sk#3 AS sold_date_sk#4, cs_bill_customer_sk#1 AS customer_sk#5, cs_item_sk#2 AS item_sk#6] +Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] + +(5) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#9)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] + +(7) Filter [codegen id : 2] +Input [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] +Condition : (isnotnull(ws_item_sk#7) AND isnotnull(ws_bill_customer_sk#8)) + +(8) Project [codegen id : 2] +Output [3]: [ws_sold_date_sk#9 AS sold_date_sk#10, ws_bill_customer_sk#8 AS customer_sk#11, ws_item_sk#7 AS item_sk#12] +Input [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] + +(9) Union + +(10) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#13, i_class#14, i_category#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women ), EqualTo(i_class,maternity ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] + +(12) Filter [codegen id : 3] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] +Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women )) AND (i_class#14 = maternity )) AND isnotnull(i_item_sk#13)) + +(13) Project [codegen id : 3] +Output [1]: [i_item_sk#13] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] + +(14) BroadcastExchange +Input [1]: [i_item_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [item_sk#6] +Right keys [1]: [i_item_sk#13] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 6] +Output [2]: [sold_date_sk#4, customer_sk#5] +Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13] + +(17) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(19) Filter [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 12)) AND (d_year#17 = 1998)) AND isnotnull(d_date_sk#16)) + +(20) Project [codegen id : 4] +Output [1]: [d_date_sk#16] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(21) BroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sold_date_sk#4] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 6] +Output [1]: [customer_sk#5] +Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#16] + +(24) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 5] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] + +(26) Filter [codegen id : 5] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Condition : (isnotnull(c_customer_sk#19) AND isnotnull(c_current_addr_sk#20)) + +(27) BroadcastExchange +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [customer_sk#5] +Right keys [1]: [c_customer_sk#19] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 6] +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Input [3]: [customer_sk#5, c_customer_sk#19, c_current_addr_sk#20] + +(30) HashAggregate [codegen id : 6] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Keys [2]: [c_customer_sk#19, c_current_addr_sk#20] +Functions: [] +Aggregate Attributes: [] +Results [2]: [c_customer_sk#19, c_current_addr_sk#20] + +(31) Exchange +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: hashpartitioning(c_customer_sk#19, c_current_addr_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(32) HashAggregate [codegen id : 11] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Keys [2]: [c_customer_sk#19, c_current_addr_sk#20] +Functions: [] +Aggregate Attributes: [] +Results [2]: [c_customer_sk#19, c_current_addr_sk#20] + +(33) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#23)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(35) Filter [codegen id : 7] +Input [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Condition : isnotnull(ss_customer_sk#21) + +(36) BroadcastExchange +Input [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_customer_sk#19] +Right keys [1]: [ss_customer_sk#21] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [4]: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23] +Input [5]: [c_customer_sk#19, c_current_addr_sk#20, ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(39) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] + +(41) Filter [codegen id : 8] +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Condition : ((isnotnull(ca_address_sk#24) AND isnotnull(ca_county#25)) AND isnotnull(ca_state#26)) + +(42) BroadcastExchange +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#24] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 11] +Output [5]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26] +Input [7]: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_address_sk#24, ca_county#25, ca_state#26] + +(45) Scan parquet spark_catalog.default.store +Output [2]: [s_county#27, s_state#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 9] +Input [2]: [s_county#27, s_state#28] + +(47) Filter [codegen id : 9] +Input [2]: [s_county#27, s_state#28] +Condition : (isnotnull(s_county#27) AND isnotnull(s_state#28)) + +(48) BroadcastExchange +Input [2]: [s_county#27, s_state#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [plan_id=7] + +(49) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [ca_county#25, ca_state#26] +Right keys [2]: [s_county#27, s_state#28] +Join type: Inner +Join condition: None + +(50) Project [codegen id : 11] +Output [3]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23] +Input [7]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26, s_county#27, s_state#28] + +(51) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#29, d_month_seq#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 10] +Input [2]: [d_date_sk#29, d_month_seq#30] + +(53) Filter [codegen id : 10] +Input [2]: [d_date_sk#29, d_month_seq#30] +Condition : (((isnotnull(d_month_seq#30) AND (d_month_seq#30 >= Subquery scalar-subquery#31, [id=#32])) AND (d_month_seq#30 <= Subquery scalar-subquery#33, [id=#34])) AND isnotnull(d_date_sk#29)) + +(54) Project [codegen id : 10] +Output [1]: [d_date_sk#29] +Input [2]: [d_date_sk#29, d_month_seq#30] + +(55) BroadcastExchange +Input [1]: [d_date_sk#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(56) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#23] +Right keys [1]: [d_date_sk#29] +Join type: Inner +Join condition: None + +(57) Project [codegen id : 11] +Output [2]: [c_customer_sk#19, ss_ext_sales_price#22] +Input [4]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, d_date_sk#29] + +(58) HashAggregate [codegen id : 11] +Input [2]: [c_customer_sk#19, ss_ext_sales_price#22] +Keys [1]: [c_customer_sk#19] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#22))] +Aggregate Attributes [1]: [sum#35] +Results [2]: [c_customer_sk#19, sum#36] + +(59) Exchange +Input [2]: [c_customer_sk#19, sum#36] +Arguments: hashpartitioning(c_customer_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(60) HashAggregate [codegen id : 12] +Input [2]: [c_customer_sk#19, sum#36] +Keys [1]: [c_customer_sk#19] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#22))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#22))#37] +Results [1]: [cast((MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#22))#37,17,2) / 50) as int) AS segment#38] + +(61) HashAggregate [codegen id : 12] +Input [1]: [segment#38] +Keys [1]: [segment#38] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#39] +Results [2]: [segment#38, count#40] + +(62) Exchange +Input [2]: [segment#38, count#40] +Arguments: hashpartitioning(segment#38, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(63) HashAggregate [codegen id : 13] +Input [2]: [segment#38, count#40] +Keys [1]: [segment#38] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#41] +Results [3]: [segment#38, count(1)#41 AS num_customers#42, (segment#38 * 50) AS segment_base#43] + +(64) TakeOrderedAndProject +Input [3]: [segment#38, num_customers#42, segment_base#43] +Arguments: 100, [segment#38 ASC NULLS FIRST, num_customers#42 ASC NULLS FIRST], [segment#38, num_customers#42, segment_base#43] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#31, [id=#32] +* HashAggregate (71) ++- Exchange (70) + +- * HashAggregate (69) + +- * Project (68) + +- * Filter (67) + +- * ColumnarToRow (66) + +- Scan parquet spark_catalog.default.date_dim (65) + + +(65) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#44, d_year#45, d_moy#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#44, d_year#45, d_moy#46] + +(67) Filter [codegen id : 1] +Input [3]: [d_month_seq#44, d_year#45, d_moy#46] +Condition : (((isnotnull(d_year#45) AND isnotnull(d_moy#46)) AND (d_year#45 = 1998)) AND (d_moy#46 = 12)) + +(68) Project [codegen id : 1] +Output [1]: [(d_month_seq#44 + 1) AS (d_month_seq + 1)#47] +Input [3]: [d_month_seq#44, d_year#45, d_moy#46] + +(69) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 1)#47] +Keys [1]: [(d_month_seq + 1)#47] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 1)#47] + +(70) Exchange +Input [1]: [(d_month_seq + 1)#47] +Arguments: hashpartitioning((d_month_seq + 1)#47, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(71) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 1)#47] +Keys [1]: [(d_month_seq + 1)#47] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 1)#47] + +Subquery:2 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#33, [id=#34] +* HashAggregate (78) ++- Exchange (77) + +- * HashAggregate (76) + +- * Project (75) + +- * Filter (74) + +- * ColumnarToRow (73) + +- Scan parquet spark_catalog.default.date_dim (72) + + +(72) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#48, d_year#49, d_moy#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#48, d_year#49, d_moy#50] + +(74) Filter [codegen id : 1] +Input [3]: [d_month_seq#48, d_year#49, d_moy#50] +Condition : (((isnotnull(d_year#49) AND isnotnull(d_moy#50)) AND (d_year#49 = 1998)) AND (d_moy#50 = 12)) + +(75) Project [codegen id : 1] +Output [1]: [(d_month_seq#48 + 3) AS (d_month_seq + 3)#51] +Input [3]: [d_month_seq#48, d_year#49, d_moy#50] + +(76) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 3)#51] +Keys [1]: [(d_month_seq + 3)#51] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 3)#51] + +(77) Exchange +Input [1]: [(d_month_seq + 3)#51] +Arguments: hashpartitioning((d_month_seq + 3)#51, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(78) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 3)#51] +Keys [1]: [(d_month_seq + 3)#51] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 3)#51] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8f25066aa4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_datafusion/simplified.txt @@ -0,0 +1,121 @@ +TakeOrderedAndProject [segment,num_customers,segment_base] + WholeStageCodegen (13) + HashAggregate [segment,count] [count(1),num_customers,segment_base,count] + InputAdapter + Exchange [segment] #1 + WholeStageCodegen (12) + HashAggregate [segment] [count,count] + HashAggregate [c_customer_sk,sum] [sum(UnscaledValue(ss_ext_sales_price)),segment,sum] + InputAdapter + Exchange [c_customer_sk] #2 + WholeStageCodegen (11) + HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum] + Project [c_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ca_county,ca_state,s_county,s_state] + Project [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ca_county,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_customer_sk,c_current_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [c_customer_sk,c_current_addr_sk] + InputAdapter + Exchange [c_customer_sk,c_current_addr_sk] #3 + WholeStageCodegen (6) + HashAggregate [c_customer_sk,c_current_addr_sk] + Project [c_customer_sk,c_current_addr_sk] + BroadcastHashJoin [customer_sk,c_customer_sk] + Project [customer_sk] + BroadcastHashJoin [sold_date_sk,d_date_sk] + Project [sold_date_sk,customer_sk] + BroadcastHashJoin [item_sk,i_item_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Filter [cs_item_sk,cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + WholeStageCodegen (2) + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Filter [ws_item_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [i_item_sk] + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [ca_address_sk,ca_county,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Filter [s_county,s_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_county,s_state] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (10) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #11 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + Subquery #2 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #12 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..7d04d75608 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_iceberg_compat/explain.txt @@ -0,0 +1,469 @@ +== Physical Plan == +TakeOrderedAndProject (64) ++- * HashAggregate (63) + +- Exchange (62) + +- * HashAggregate (61) + +- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- * Project (57) + +- * BroadcastHashJoin Inner BuildRight (56) + :- * Project (50) + : +- * BroadcastHashJoin Inner BuildRight (49) + : :- * Project (44) + : : +- * BroadcastHashJoin Inner BuildRight (43) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * HashAggregate (32) + : : : : +- Exchange (31) + : : : : +- * HashAggregate (30) + : : : : +- * Project (29) + : : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : : :- * Project (23) + : : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : : :- * Project (16) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : : : :- Union (9) + : : : : : : : :- * Project (4) + : : : : : : : : +- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : : : : +- * Project (8) + : : : : : : : +- * Filter (7) + : : : : : : : +- * ColumnarToRow (6) + : : : : : : : +- Scan parquet spark_catalog.default.web_sales (5) + : : : : : : +- BroadcastExchange (14) + : : : : : : +- * Project (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet spark_catalog.default.item (10) + : : : : : +- BroadcastExchange (21) + : : : : : +- * Project (20) + : : : : : +- * Filter (19) + : : : : : +- * ColumnarToRow (18) + : : : : : +- Scan parquet spark_catalog.default.date_dim (17) + : : : : +- BroadcastExchange (27) + : : : : +- * Filter (26) + : : : : +- * ColumnarToRow (25) + : : : : +- Scan parquet spark_catalog.default.customer (24) + : : : +- BroadcastExchange (36) + : : : +- * Filter (35) + : : : +- * ColumnarToRow (34) + : : : +- Scan parquet spark_catalog.default.store_sales (33) + : : +- BroadcastExchange (42) + : : +- * Filter (41) + : : +- * ColumnarToRow (40) + : : +- Scan parquet spark_catalog.default.customer_address (39) + : +- BroadcastExchange (48) + : +- * Filter (47) + : +- * ColumnarToRow (46) + : +- Scan parquet spark_catalog.default.store (45) + +- BroadcastExchange (55) + +- * Project (54) + +- * Filter (53) + +- * ColumnarToRow (52) + +- Scan parquet spark_catalog.default.date_dim (51) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] + +(3) Filter [codegen id : 1] +Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_bill_customer_sk#1)) + +(4) Project [codegen id : 1] +Output [3]: [cs_sold_date_sk#3 AS sold_date_sk#4, cs_bill_customer_sk#1 AS customer_sk#5, cs_item_sk#2 AS item_sk#6] +Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3] + +(5) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#9)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] + +(7) Filter [codegen id : 2] +Input [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] +Condition : (isnotnull(ws_item_sk#7) AND isnotnull(ws_bill_customer_sk#8)) + +(8) Project [codegen id : 2] +Output [3]: [ws_sold_date_sk#9 AS sold_date_sk#10, ws_bill_customer_sk#8 AS customer_sk#11, ws_item_sk#7 AS item_sk#12] +Input [3]: [ws_item_sk#7, ws_bill_customer_sk#8, ws_sold_date_sk#9] + +(9) Union + +(10) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#13, i_class#14, i_category#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women ), EqualTo(i_class,maternity ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] + +(12) Filter [codegen id : 3] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] +Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women )) AND (i_class#14 = maternity )) AND isnotnull(i_item_sk#13)) + +(13) Project [codegen id : 3] +Output [1]: [i_item_sk#13] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] + +(14) BroadcastExchange +Input [1]: [i_item_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [item_sk#6] +Right keys [1]: [i_item_sk#13] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 6] +Output [2]: [sold_date_sk#4, customer_sk#5] +Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13] + +(17) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(19) Filter [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((((isnotnull(d_moy#18) AND isnotnull(d_year#17)) AND (d_moy#18 = 12)) AND (d_year#17 = 1998)) AND isnotnull(d_date_sk#16)) + +(20) Project [codegen id : 4] +Output [1]: [d_date_sk#16] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(21) BroadcastExchange +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sold_date_sk#4] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 6] +Output [1]: [customer_sk#5] +Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#16] + +(24) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 5] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] + +(26) Filter [codegen id : 5] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Condition : (isnotnull(c_customer_sk#19) AND isnotnull(c_current_addr_sk#20)) + +(27) BroadcastExchange +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [customer_sk#5] +Right keys [1]: [c_customer_sk#19] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 6] +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Input [3]: [customer_sk#5, c_customer_sk#19, c_current_addr_sk#20] + +(30) HashAggregate [codegen id : 6] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Keys [2]: [c_customer_sk#19, c_current_addr_sk#20] +Functions: [] +Aggregate Attributes: [] +Results [2]: [c_customer_sk#19, c_current_addr_sk#20] + +(31) Exchange +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: hashpartitioning(c_customer_sk#19, c_current_addr_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(32) HashAggregate [codegen id : 11] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Keys [2]: [c_customer_sk#19, c_current_addr_sk#20] +Functions: [] +Aggregate Attributes: [] +Results [2]: [c_customer_sk#19, c_current_addr_sk#20] + +(33) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#23)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(35) Filter [codegen id : 7] +Input [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Condition : isnotnull(ss_customer_sk#21) + +(36) BroadcastExchange +Input [3]: [ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_customer_sk#19] +Right keys [1]: [ss_customer_sk#21] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [4]: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23] +Input [5]: [c_customer_sk#19, c_current_addr_sk#20, ss_customer_sk#21, ss_ext_sales_price#22, ss_sold_date_sk#23] + +(39) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] + +(41) Filter [codegen id : 8] +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Condition : ((isnotnull(ca_address_sk#24) AND isnotnull(ca_county#25)) AND isnotnull(ca_state#26)) + +(42) BroadcastExchange +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#24] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 11] +Output [5]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26] +Input [7]: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_address_sk#24, ca_county#25, ca_state#26] + +(45) Scan parquet spark_catalog.default.store +Output [2]: [s_county#27, s_state#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 9] +Input [2]: [s_county#27, s_state#28] + +(47) Filter [codegen id : 9] +Input [2]: [s_county#27, s_state#28] +Condition : (isnotnull(s_county#27) AND isnotnull(s_state#28)) + +(48) BroadcastExchange +Input [2]: [s_county#27, s_state#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [plan_id=7] + +(49) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [ca_county#25, ca_state#26] +Right keys [2]: [s_county#27, s_state#28] +Join type: Inner +Join condition: None + +(50) Project [codegen id : 11] +Output [3]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23] +Input [7]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, ca_county#25, ca_state#26, s_county#27, s_state#28] + +(51) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#29, d_month_seq#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 10] +Input [2]: [d_date_sk#29, d_month_seq#30] + +(53) Filter [codegen id : 10] +Input [2]: [d_date_sk#29, d_month_seq#30] +Condition : (((isnotnull(d_month_seq#30) AND (d_month_seq#30 >= Subquery scalar-subquery#31, [id=#32])) AND (d_month_seq#30 <= Subquery scalar-subquery#33, [id=#34])) AND isnotnull(d_date_sk#29)) + +(54) Project [codegen id : 10] +Output [1]: [d_date_sk#29] +Input [2]: [d_date_sk#29, d_month_seq#30] + +(55) BroadcastExchange +Input [1]: [d_date_sk#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(56) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#23] +Right keys [1]: [d_date_sk#29] +Join type: Inner +Join condition: None + +(57) Project [codegen id : 11] +Output [2]: [c_customer_sk#19, ss_ext_sales_price#22] +Input [4]: [c_customer_sk#19, ss_ext_sales_price#22, ss_sold_date_sk#23, d_date_sk#29] + +(58) HashAggregate [codegen id : 11] +Input [2]: [c_customer_sk#19, ss_ext_sales_price#22] +Keys [1]: [c_customer_sk#19] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#22))] +Aggregate Attributes [1]: [sum#35] +Results [2]: [c_customer_sk#19, sum#36] + +(59) Exchange +Input [2]: [c_customer_sk#19, sum#36] +Arguments: hashpartitioning(c_customer_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(60) HashAggregate [codegen id : 12] +Input [2]: [c_customer_sk#19, sum#36] +Keys [1]: [c_customer_sk#19] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#22))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#22))#37] +Results [1]: [cast((MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#22))#37,17,2) / 50) as int) AS segment#38] + +(61) HashAggregate [codegen id : 12] +Input [1]: [segment#38] +Keys [1]: [segment#38] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#39] +Results [2]: [segment#38, count#40] + +(62) Exchange +Input [2]: [segment#38, count#40] +Arguments: hashpartitioning(segment#38, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(63) HashAggregate [codegen id : 13] +Input [2]: [segment#38, count#40] +Keys [1]: [segment#38] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#41] +Results [3]: [segment#38, count(1)#41 AS num_customers#42, (segment#38 * 50) AS segment_base#43] + +(64) TakeOrderedAndProject +Input [3]: [segment#38, num_customers#42, segment_base#43] +Arguments: 100, [segment#38 ASC NULLS FIRST, num_customers#42 ASC NULLS FIRST], [segment#38, num_customers#42, segment_base#43] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#31, [id=#32] +* HashAggregate (71) ++- Exchange (70) + +- * HashAggregate (69) + +- * Project (68) + +- * Filter (67) + +- * ColumnarToRow (66) + +- Scan parquet spark_catalog.default.date_dim (65) + + +(65) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#44, d_year#45, d_moy#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#44, d_year#45, d_moy#46] + +(67) Filter [codegen id : 1] +Input [3]: [d_month_seq#44, d_year#45, d_moy#46] +Condition : (((isnotnull(d_year#45) AND isnotnull(d_moy#46)) AND (d_year#45 = 1998)) AND (d_moy#46 = 12)) + +(68) Project [codegen id : 1] +Output [1]: [(d_month_seq#44 + 1) AS (d_month_seq + 1)#47] +Input [3]: [d_month_seq#44, d_year#45, d_moy#46] + +(69) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 1)#47] +Keys [1]: [(d_month_seq + 1)#47] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 1)#47] + +(70) Exchange +Input [1]: [(d_month_seq + 1)#47] +Arguments: hashpartitioning((d_month_seq + 1)#47, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(71) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 1)#47] +Keys [1]: [(d_month_seq + 1)#47] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 1)#47] + +Subquery:2 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#33, [id=#34] +* HashAggregate (78) ++- Exchange (77) + +- * HashAggregate (76) + +- * Project (75) + +- * Filter (74) + +- * ColumnarToRow (73) + +- Scan parquet spark_catalog.default.date_dim (72) + + +(72) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#48, d_year#49, d_moy#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#48, d_year#49, d_moy#50] + +(74) Filter [codegen id : 1] +Input [3]: [d_month_seq#48, d_year#49, d_moy#50] +Condition : (((isnotnull(d_year#49) AND isnotnull(d_moy#50)) AND (d_year#49 = 1998)) AND (d_moy#50 = 12)) + +(75) Project [codegen id : 1] +Output [1]: [(d_month_seq#48 + 3) AS (d_month_seq + 3)#51] +Input [3]: [d_month_seq#48, d_year#49, d_moy#50] + +(76) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 3)#51] +Keys [1]: [(d_month_seq + 3)#51] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 3)#51] + +(77) Exchange +Input [1]: [(d_month_seq + 3)#51] +Arguments: hashpartitioning((d_month_seq + 3)#51, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(78) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 3)#51] +Keys [1]: [(d_month_seq + 3)#51] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 3)#51] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8f25066aa4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.native_iceberg_compat/simplified.txt @@ -0,0 +1,121 @@ +TakeOrderedAndProject [segment,num_customers,segment_base] + WholeStageCodegen (13) + HashAggregate [segment,count] [count(1),num_customers,segment_base,count] + InputAdapter + Exchange [segment] #1 + WholeStageCodegen (12) + HashAggregate [segment] [count,count] + HashAggregate [c_customer_sk,sum] [sum(UnscaledValue(ss_ext_sales_price)),segment,sum] + InputAdapter + Exchange [c_customer_sk] #2 + WholeStageCodegen (11) + HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum] + Project [c_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ca_county,ca_state,s_county,s_state] + Project [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ca_county,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_customer_sk,c_current_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [c_customer_sk,c_current_addr_sk] + InputAdapter + Exchange [c_customer_sk,c_current_addr_sk] #3 + WholeStageCodegen (6) + HashAggregate [c_customer_sk,c_current_addr_sk] + Project [c_customer_sk,c_current_addr_sk] + BroadcastHashJoin [customer_sk,c_customer_sk] + Project [customer_sk] + BroadcastHashJoin [sold_date_sk,d_date_sk] + Project [sold_date_sk,customer_sk] + BroadcastHashJoin [item_sk,i_item_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Filter [cs_item_sk,cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + WholeStageCodegen (2) + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Filter [ws_item_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [i_item_sk] + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [ca_address_sk,ca_county,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Filter [s_county,s_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_county,s_state] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (10) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #11 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + Subquery #2 + WholeStageCodegen (2) + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #12 + WholeStageCodegen (1) + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_datafusion/explain.txt new file mode 100644 index 0000000000..ea07fa708f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_datafusion/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet spark_catalog.default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.item (11) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(8) BroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_item_sk#4, ss_ext_sales_price#5] +Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 28)) AND isnotnull(i_item_sk#7)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(15) BroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [5]: [ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] + +(18) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [2]: [i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [3]: [i_brand#9, i_brand_id#8, sum#12] + +(19) Exchange +Input [3]: [i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [3]: [i_brand#9, i_brand_id#8, sum#12] +Keys [2]: [i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [3]: [i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS ext_price#16] + +(21) TakeOrderedAndProject +Input [3]: [brand_id#14, brand#15, ext_price#16] +Arguments: 100, [ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [brand_id#14, brand#15, ext_price#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c5eb0a9649 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_datafusion/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [ext_price,brand_id,brand] + WholeStageCodegen (4) + HashAggregate [i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ea07fa708f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_iceberg_compat/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet spark_catalog.default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.item (11) + + +(1) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) + +(8) BroadcastExchange +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_item_sk#4, ss_ext_sales_price#5] +Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 28)) AND isnotnull(i_item_sk#7)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] + +(15) BroadcastExchange +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [5]: [ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] + +(18) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [2]: [i_brand#9, i_brand_id#8] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#11] +Results [3]: [i_brand#9, i_brand_id#8, sum#12] + +(19) Exchange +Input [3]: [i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [3]: [i_brand#9, i_brand_id#8, sum#12] +Keys [2]: [i_brand#9, i_brand_id#8] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [3]: [i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS ext_price#16] + +(21) TakeOrderedAndProject +Input [3]: [brand_id#14, brand#15, ext_price#16] +Arguments: 100, [ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [brand_id#14, brand#15, ext_price#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c5eb0a9649 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.native_iceberg_compat/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [ext_price,brand_id,brand] + WholeStageCodegen (4) + HashAggregate [i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [i_brand,i_brand_id] #1 + WholeStageCodegen (3) + HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_datafusion/explain.txt new file mode 100644 index 0000000000..66859a80d7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_datafusion/explain.txt @@ -0,0 +1,391 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet spark_catalog.default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet spark_catalog.default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet spark_catalog.default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 2)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#8] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#8] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] + +(18) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#10, i_item_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#10, i_item_id#11] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(21) Scan parquet spark_catalog.default.item +Output [2]: [i_item_id#12, i_color#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_color, [blanched ,burnished ,slate ])] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_id#12, i_color#13] + +(23) Filter [codegen id : 3] +Input [2]: [i_item_id#12, i_color#13] +Condition : i_color#13 IN (slate ,blanched ,burnished ) + +(24) Project [codegen id : 3] +Output [1]: [i_item_id#12] +Input [2]: [i_item_id#12, i_color#13] + +(25) BroadcastExchange +Input [1]: [i_item_id#12] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_id#11] +Right keys [1]: [i_item_id#12] +Join type: LeftSemi +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#10] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#3, i_item_id#11] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#3, i_item_id#11] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [2]: [i_item_id#11, sum#15] + +(31) Exchange +Input [2]: [i_item_id#11, sum#15] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#11, sum#15] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [2]: [i_item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS total_sales#17] + +(33) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#21)] +PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] + +(35) Filter [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Condition : (isnotnull(cs_bill_addr_sk#18) AND isnotnull(cs_item_sk#19)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#22] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Input [5]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21, d_date_sk#22] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#23] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#18] +Right keys [1]: [ca_address_sk#23] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#19, cs_ext_sales_price#20] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, ca_address_sk#23] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#24, i_item_id#25] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#24] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#20, i_item_id#25] +Input [4]: [cs_item_sk#19, cs_ext_sales_price#20, i_item_sk#24, i_item_id#25] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#20, i_item_id#25] +Keys [1]: [i_item_id#25] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum#26] +Results [2]: [i_item_id#25, sum#27] + +(46) Exchange +Input [2]: [i_item_id#25, sum#27] +Arguments: hashpartitioning(i_item_id#25, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_item_id#25, sum#27] +Keys [1]: [i_item_id#25] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#20))#28] +Results [2]: [i_item_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#20))#28,17,2) AS total_sales#29] + +(48) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#33)] +PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] + +(50) Filter [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Condition : (isnotnull(ws_bill_addr_sk#31) AND isnotnull(ws_item_sk#30)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#34] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#33] +Right keys [1]: [d_date_sk#34] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32] +Input [5]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33, d_date_sk#34] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#35] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#31] +Right keys [1]: [ca_address_sk#35] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#30, ws_ext_sales_price#32] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ca_address_sk#35] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#36, i_item_id#37] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#30] +Right keys [1]: [i_item_sk#36] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#32, i_item_id#37] +Input [4]: [ws_item_sk#30, ws_ext_sales_price#32, i_item_sk#36, i_item_id#37] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#32, i_item_id#37] +Keys [1]: [i_item_id#37] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum#38] +Results [2]: [i_item_id#37, sum#39] + +(61) Exchange +Input [2]: [i_item_id#37, sum#39] +Arguments: hashpartitioning(i_item_id#37, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#37, sum#39] +Keys [1]: [i_item_id#37] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#32))#40] +Results [2]: [i_item_id#37, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#32))#40,17,2) AS total_sales#41] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_item_id#11, total_sales#17] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(total_sales#17)] +Aggregate Attributes [2]: [sum#42, isEmpty#43] +Results [3]: [i_item_id#11, sum#44, isEmpty#45] + +(65) Exchange +Input [3]: [i_item_id#11, sum#44, isEmpty#45] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_item_id#11, sum#44, isEmpty#45] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(total_sales#17)] +Aggregate Attributes [1]: [sum(total_sales#17)#46] +Results [2]: [i_item_id#11, sum(total_sales#17)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_item_id#11, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_item_id#11, total_sales#47] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_datafusion/simplified.txt new file mode 100644 index 0000000000..63e5d1c484 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_datafusion/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [total_sales,i_item_id] + WholeStageCodegen (20) + HashAggregate [i_item_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (19) + HashAggregate [i_item_id,total_sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_item_id,i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_item_id] + Filter [i_color] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_id,i_color] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (11) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 + WholeStageCodegen (18) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..66859a80d7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_iceberg_compat/explain.txt @@ -0,0 +1,391 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet spark_catalog.default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet spark_catalog.default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet spark_catalog.default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 2)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#8] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#8] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] + +(18) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#10, i_item_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#10, i_item_id#11] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(21) Scan parquet spark_catalog.default.item +Output [2]: [i_item_id#12, i_color#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_color, [blanched ,burnished ,slate ])] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_id#12, i_color#13] + +(23) Filter [codegen id : 3] +Input [2]: [i_item_id#12, i_color#13] +Condition : i_color#13 IN (slate ,blanched ,burnished ) + +(24) Project [codegen id : 3] +Output [1]: [i_item_id#12] +Input [2]: [i_item_id#12, i_color#13] + +(25) BroadcastExchange +Input [1]: [i_item_id#12] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_id#11] +Right keys [1]: [i_item_id#12] +Join type: LeftSemi +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#10] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#3, i_item_id#11] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#3, i_item_id#11] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [2]: [i_item_id#11, sum#15] + +(31) Exchange +Input [2]: [i_item_id#11, sum#15] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#11, sum#15] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [2]: [i_item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS total_sales#17] + +(33) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#21)] +PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] + +(35) Filter [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Condition : (isnotnull(cs_bill_addr_sk#18) AND isnotnull(cs_item_sk#19)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#22] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Input [5]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21, d_date_sk#22] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#23] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#18] +Right keys [1]: [ca_address_sk#23] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#19, cs_ext_sales_price#20] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, ca_address_sk#23] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#24, i_item_id#25] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#24] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#20, i_item_id#25] +Input [4]: [cs_item_sk#19, cs_ext_sales_price#20, i_item_sk#24, i_item_id#25] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#20, i_item_id#25] +Keys [1]: [i_item_id#25] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum#26] +Results [2]: [i_item_id#25, sum#27] + +(46) Exchange +Input [2]: [i_item_id#25, sum#27] +Arguments: hashpartitioning(i_item_id#25, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_item_id#25, sum#27] +Keys [1]: [i_item_id#25] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#20))#28] +Results [2]: [i_item_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#20))#28,17,2) AS total_sales#29] + +(48) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#33)] +PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] + +(50) Filter [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Condition : (isnotnull(ws_bill_addr_sk#31) AND isnotnull(ws_item_sk#30)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#34] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#33] +Right keys [1]: [d_date_sk#34] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32] +Input [5]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33, d_date_sk#34] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#35] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#31] +Right keys [1]: [ca_address_sk#35] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#30, ws_ext_sales_price#32] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ca_address_sk#35] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#36, i_item_id#37] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#30] +Right keys [1]: [i_item_sk#36] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#32, i_item_id#37] +Input [4]: [ws_item_sk#30, ws_ext_sales_price#32, i_item_sk#36, i_item_id#37] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#32, i_item_id#37] +Keys [1]: [i_item_id#37] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum#38] +Results [2]: [i_item_id#37, sum#39] + +(61) Exchange +Input [2]: [i_item_id#37, sum#39] +Arguments: hashpartitioning(i_item_id#37, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#37, sum#39] +Keys [1]: [i_item_id#37] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#32))#40] +Results [2]: [i_item_id#37, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#32))#40,17,2) AS total_sales#41] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_item_id#11, total_sales#17] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(total_sales#17)] +Aggregate Attributes [2]: [sum#42, isEmpty#43] +Results [3]: [i_item_id#11, sum#44, isEmpty#45] + +(65) Exchange +Input [3]: [i_item_id#11, sum#44, isEmpty#45] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_item_id#11, sum#44, isEmpty#45] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(total_sales#17)] +Aggregate Attributes [1]: [sum(total_sales#17)#46] +Results [2]: [i_item_id#11, sum(total_sales#17)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_item_id#11, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_item_id#11, total_sales#47] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..63e5d1c484 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.native_iceberg_compat/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [total_sales,i_item_id] + WholeStageCodegen (20) + HashAggregate [i_item_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (19) + HashAggregate [i_item_id,total_sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_item_id,i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_item_id] + Filter [i_color] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_id,i_color] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (11) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 + WholeStageCodegen (18) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_datafusion/explain.txt new file mode 100644 index 0000000000..eef23bdd3c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_datafusion/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (31) + : : +- * Filter (30) + : : +- Window (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.call_center (16) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- Window (36) + : +- * Sort (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- ReusedExchange (32) + +- BroadcastExchange (45) + +- * Project (44) + +- Window (43) + +- * Sort (42) + +- ReusedExchange (41) + + +(1) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#7)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) + +(7) BroadcastExchange +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] + +(16) Scan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#11, cc_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#11, cc_name#12] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#11, cc_name#12] +Condition : (isnotnull(cc_call_center_sk#11) AND isnotnull(cc_name#12)) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_call_center_sk#4] +Right keys [1]: [cc_call_center_sk#11] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10, cc_call_center_sk#11, cc_name#12] + +(22) HashAggregate [codegen id : 4] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] + +(23) Exchange +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) HashAggregate [codegen id : 5] +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#6))#15] +Results [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#15,17,2) AS sum_sales#16, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#15,17,2) AS _w0#17] + +(25) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(26) Sort [codegen id : 6] +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#3, i_brand#2, cc_name#12], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(28) Filter [codegen id : 7] +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(29) Window +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18] +Arguments: [avg(_w0#17) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#19], [i_category#3, i_brand#2, cc_name#12, d_year#9] + +(30) Filter [codegen id : 22] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] +Condition : ((isnotnull(avg_monthly_sales#19) AND (avg_monthly_sales#19 > 0.000000)) AND CASE WHEN (avg_monthly_sales#19 > 0.000000) THEN ((abs((sum_sales#16 - avg_monthly_sales#19)) / avg_monthly_sales#19) > 0.1000000000000000) END) + +(31) Project [codegen id : 22] +Output [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] + +(32) ReusedExchange [Reuses operator id: 23] +Output [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum#25] + +(33) HashAggregate [codegen id : 12] +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum#25] +Keys [5]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24] +Functions [1]: [sum(UnscaledValue(cs_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#26))#15] +Results [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, MakeDecimal(sum(UnscaledValue(cs_sales_price#26))#15,17,2) AS sum_sales#16] + +(34) Exchange +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: hashpartitioning(i_category#20, i_brand#21, cc_name#22, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 13] +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: [i_category#20 ASC NULLS FIRST, i_brand#21 ASC NULLS FIRST, cc_name#22 ASC NULLS FIRST, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST], false, 0 + +(36) Window +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: [rank(d_year#23, d_moy#24) windowspecdefinition(i_category#20, i_brand#21, cc_name#22, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#27], [i_category#20, i_brand#21, cc_name#22], [d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(37) Project [codegen id : 14] +Output [5]: [i_category#20, i_brand#21, cc_name#22, sum_sales#16 AS sum_sales#28, rn#27] +Input [7]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16, rn#27] + +(38) BroadcastExchange +Input [5]: [i_category#20, i_brand#21, cc_name#22, sum_sales#28, rn#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=7] + +(39) BroadcastHashJoin [codegen id : 22] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#18] +Right keys [4]: [i_category#20, i_brand#21, cc_name#22, (rn#27 + 1)] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 22] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#28] +Input [13]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, cc_name#22, sum_sales#28, rn#27] + +(41) ReusedExchange [Reuses operator id: 34] +Output [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] + +(42) Sort [codegen id : 20] +Input [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] +Arguments: [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, cc_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + +(43) Window +Input [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] +Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#29, i_brand#30, cc_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#34], [i_category#29, i_brand#30, cc_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + +(44) Project [codegen id : 21] +Output [5]: [i_category#29, i_brand#30, cc_name#31, sum_sales#16 AS sum_sales#35, rn#34] +Input [7]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16, rn#34] + +(45) BroadcastExchange +Input [5]: [i_category#29, i_brand#30, cc_name#31, sum_sales#35, rn#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=8] + +(46) BroadcastHashJoin [codegen id : 22] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#18] +Right keys [4]: [i_category#29, i_brand#30, cc_name#31, (rn#34 - 1)] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 22] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, sum_sales#28 AS psum#36, sum_sales#35 AS nsum#37] +Input [14]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#28, i_category#29, i_brand#30, cc_name#31, sum_sales#35, rn#34] + +(48) TakeOrderedAndProject +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_datafusion/simplified.txt new file mode 100644 index 0000000000..7a8378ca00 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_datafusion/simplified.txt @@ -0,0 +1,79 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_year,d_moy,psum,nsum] + WholeStageCodegen (22) + Project [i_category,i_brand,cc_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,cc_name,d_year] + WholeStageCodegen (7) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,cc_name,d_year,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum] + Project [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk] + Filter [i_item_sk,i_category,i_brand] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [cs_item_sk,cs_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [cc_call_center_sk,cc_name] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (14) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (13) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #7 + WholeStageCodegen (12) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (21) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (20) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..eef23bdd3c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_iceberg_compat/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (31) + : : +- * Filter (30) + : : +- Window (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.call_center (16) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- Window (36) + : +- * Sort (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- ReusedExchange (32) + +- BroadcastExchange (45) + +- * Project (44) + +- Window (43) + +- * Sort (42) + +- ReusedExchange (41) + + +(1) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#7)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) + +(7) BroadcastExchange +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] + +(16) Scan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#11, cc_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#11, cc_name#12] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#11, cc_name#12] +Condition : (isnotnull(cc_call_center_sk#11) AND isnotnull(cc_name#12)) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_call_center_sk#4] +Right keys [1]: [cc_call_center_sk#11] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10, cc_call_center_sk#11, cc_name#12] + +(22) HashAggregate [codegen id : 4] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] + +(23) Exchange +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) HashAggregate [codegen id : 5] +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#6))#15] +Results [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#15,17,2) AS sum_sales#16, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#15,17,2) AS _w0#17] + +(25) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(26) Sort [codegen id : 6] +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#3, i_brand#2, cc_name#12], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(28) Filter [codegen id : 7] +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(29) Window +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18] +Arguments: [avg(_w0#17) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#19], [i_category#3, i_brand#2, cc_name#12, d_year#9] + +(30) Filter [codegen id : 22] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] +Condition : ((isnotnull(avg_monthly_sales#19) AND (avg_monthly_sales#19 > 0.000000)) AND CASE WHEN (avg_monthly_sales#19 > 0.000000) THEN ((abs((sum_sales#16 - avg_monthly_sales#19)) / avg_monthly_sales#19) > 0.1000000000000000) END) + +(31) Project [codegen id : 22] +Output [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] + +(32) ReusedExchange [Reuses operator id: 23] +Output [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum#25] + +(33) HashAggregate [codegen id : 12] +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum#25] +Keys [5]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24] +Functions [1]: [sum(UnscaledValue(cs_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#26))#15] +Results [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, MakeDecimal(sum(UnscaledValue(cs_sales_price#26))#15,17,2) AS sum_sales#16] + +(34) Exchange +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: hashpartitioning(i_category#20, i_brand#21, cc_name#22, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 13] +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: [i_category#20 ASC NULLS FIRST, i_brand#21 ASC NULLS FIRST, cc_name#22 ASC NULLS FIRST, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST], false, 0 + +(36) Window +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: [rank(d_year#23, d_moy#24) windowspecdefinition(i_category#20, i_brand#21, cc_name#22, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#27], [i_category#20, i_brand#21, cc_name#22], [d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(37) Project [codegen id : 14] +Output [5]: [i_category#20, i_brand#21, cc_name#22, sum_sales#16 AS sum_sales#28, rn#27] +Input [7]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16, rn#27] + +(38) BroadcastExchange +Input [5]: [i_category#20, i_brand#21, cc_name#22, sum_sales#28, rn#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=7] + +(39) BroadcastHashJoin [codegen id : 22] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#18] +Right keys [4]: [i_category#20, i_brand#21, cc_name#22, (rn#27 + 1)] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 22] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#28] +Input [13]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, cc_name#22, sum_sales#28, rn#27] + +(41) ReusedExchange [Reuses operator id: 34] +Output [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] + +(42) Sort [codegen id : 20] +Input [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] +Arguments: [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, cc_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + +(43) Window +Input [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] +Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#29, i_brand#30, cc_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#34], [i_category#29, i_brand#30, cc_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + +(44) Project [codegen id : 21] +Output [5]: [i_category#29, i_brand#30, cc_name#31, sum_sales#16 AS sum_sales#35, rn#34] +Input [7]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16, rn#34] + +(45) BroadcastExchange +Input [5]: [i_category#29, i_brand#30, cc_name#31, sum_sales#35, rn#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=8] + +(46) BroadcastHashJoin [codegen id : 22] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#18] +Right keys [4]: [i_category#29, i_brand#30, cc_name#31, (rn#34 - 1)] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 22] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, sum_sales#28 AS psum#36, sum_sales#35 AS nsum#37] +Input [14]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#28, i_category#29, i_brand#30, cc_name#31, sum_sales#35, rn#34] + +(48) TakeOrderedAndProject +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..7a8378ca00 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.native_iceberg_compat/simplified.txt @@ -0,0 +1,79 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_year,d_moy,psum,nsum] + WholeStageCodegen (22) + Project [i_category,i_brand,cc_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,cc_name,d_year] + WholeStageCodegen (7) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,cc_name,d_year,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum] + Project [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk] + Filter [i_item_sk,i_category,i_brand] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [cs_item_sk,cs_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [cc_call_center_sk,cc_name] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (14) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (13) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #7 + WholeStageCodegen (12) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (21) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (20) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_datafusion/explain.txt new file mode 100644 index 0000000000..6cefb2c4a1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_datafusion/explain.txt @@ -0,0 +1,373 @@ +== Physical Plan == +TakeOrderedAndProject (59) ++- * Project (58) + +- * BroadcastHashJoin Inner BuildRight (57) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Filter (26) + : : +- * HashAggregate (25) + : : +- Exchange (24) + : : +- * HashAggregate (23) + : : +- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.item (4) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * BroadcastHashJoin LeftSemi BuildRight (18) + : : :- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (17) + : : +- * Project (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet spark_catalog.default.date_dim (13) + : +- BroadcastExchange (40) + : +- * Filter (39) + : +- * HashAggregate (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Filter (29) + : : : +- * ColumnarToRow (28) + : : : +- Scan parquet spark_catalog.default.catalog_sales (27) + : : +- ReusedExchange (30) + : +- ReusedExchange (33) + +- BroadcastExchange (56) + +- * Filter (55) + +- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- * Project (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Filter (45) + : : +- * ColumnarToRow (44) + : : +- Scan parquet spark_catalog.default.web_sales (43) + : +- ReusedExchange (46) + +- ReusedExchange (49) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_item_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(7) BroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [3]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5] +Input [5]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_date#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#6, d_date#7] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#6, d_date#7] +Condition : isnotnull(d_date_sk#6) + +(13) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] + +(15) Filter [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = Subquery scalar-subquery#10, [id=#11])) + +(16) Project [codegen id : 2] +Output [1]: [d_date#8] +Input [2]: [d_date#8, d_week_seq#9] + +(17) BroadcastExchange +Input [1]: [d_date#8] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_id=2] + +(18) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date#7] +Right keys [1]: [d_date#8] +Join type: LeftSemi +Join condition: None + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_date#7] + +(20) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [2]: [ss_ext_sales_price#2, i_item_id#5] +Input [4]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, d_date_sk#6] + +(23) HashAggregate [codegen id : 4] +Input [2]: [ss_ext_sales_price#2, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [2]: [i_item_id#5, sum#13] + +(24) Exchange +Input [2]: [i_item_id#5, sum#13] +Arguments: hashpartitioning(i_item_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 15] +Input [2]: [i_item_id#5, sum#13] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [2]: [i_item_id#5 AS item_id#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS ss_item_rev#16] + +(26) Filter [codegen id : 15] +Input [2]: [item_id#15, ss_item_rev#16] +Condition : isnotnull(ss_item_rev#16) + +(27) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#17, cs_ext_sales_price#18, cs_sold_date_sk#19] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#19)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 8] +Input [3]: [cs_item_sk#17, cs_ext_sales_price#18, cs_sold_date_sk#19] + +(29) Filter [codegen id : 8] +Input [3]: [cs_item_sk#17, cs_ext_sales_price#18, cs_sold_date_sk#19] +Condition : isnotnull(cs_item_sk#17) + +(30) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#20, i_item_id#21] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#20] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 8] +Output [3]: [cs_ext_sales_price#18, cs_sold_date_sk#19, i_item_id#21] +Input [5]: [cs_item_sk#17, cs_ext_sales_price#18, cs_sold_date_sk#19, i_item_sk#20, i_item_id#21] + +(33) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#22] + +(34) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#19] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 8] +Output [2]: [cs_ext_sales_price#18, i_item_id#21] +Input [4]: [cs_ext_sales_price#18, cs_sold_date_sk#19, i_item_id#21, d_date_sk#22] + +(36) HashAggregate [codegen id : 8] +Input [2]: [cs_ext_sales_price#18, i_item_id#21] +Keys [1]: [i_item_id#21] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#18))] +Aggregate Attributes [1]: [sum#23] +Results [2]: [i_item_id#21, sum#24] + +(37) Exchange +Input [2]: [i_item_id#21, sum#24] +Arguments: hashpartitioning(i_item_id#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(38) HashAggregate [codegen id : 9] +Input [2]: [i_item_id#21, sum#24] +Keys [1]: [i_item_id#21] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#18))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#18))#25] +Results [2]: [i_item_id#21 AS item_id#26, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#18))#25,17,2) AS cs_item_rev#27] + +(39) Filter [codegen id : 9] +Input [2]: [item_id#26, cs_item_rev#27] +Condition : isnotnull(cs_item_rev#27) + +(40) BroadcastExchange +Input [2]: [item_id#26, cs_item_rev#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [item_id#15] +Right keys [1]: [item_id#26] +Join type: Inner +Join condition: ((((cast(ss_item_rev#16 as decimal(19,3)) >= (0.9 * cs_item_rev#27)) AND (cast(ss_item_rev#16 as decimal(20,3)) <= (1.1 * cs_item_rev#27))) AND (cast(cs_item_rev#27 as decimal(19,3)) >= (0.9 * ss_item_rev#16))) AND (cast(cs_item_rev#27 as decimal(20,3)) <= (1.1 * ss_item_rev#16))) + +(42) Project [codegen id : 15] +Output [3]: [item_id#15, ss_item_rev#16, cs_item_rev#27] +Input [4]: [item_id#15, ss_item_rev#16, item_id#26, cs_item_rev#27] + +(43) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#30)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 13] +Input [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] + +(45) Filter [codegen id : 13] +Input [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] +Condition : isnotnull(ws_item_sk#28) + +(46) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#31, i_item_id#32] + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_item_sk#28] +Right keys [1]: [i_item_sk#31] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 13] +Output [3]: [ws_ext_sales_price#29, ws_sold_date_sk#30, i_item_id#32] +Input [5]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30, i_item_sk#31, i_item_id#32] + +(49) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#33] + +(50) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#30] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 13] +Output [2]: [ws_ext_sales_price#29, i_item_id#32] +Input [4]: [ws_ext_sales_price#29, ws_sold_date_sk#30, i_item_id#32, d_date_sk#33] + +(52) HashAggregate [codegen id : 13] +Input [2]: [ws_ext_sales_price#29, i_item_id#32] +Keys [1]: [i_item_id#32] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#29))] +Aggregate Attributes [1]: [sum#34] +Results [2]: [i_item_id#32, sum#35] + +(53) Exchange +Input [2]: [i_item_id#32, sum#35] +Arguments: hashpartitioning(i_item_id#32, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(54) HashAggregate [codegen id : 14] +Input [2]: [i_item_id#32, sum#35] +Keys [1]: [i_item_id#32] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#29))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#29))#36] +Results [2]: [i_item_id#32 AS item_id#37, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#29))#36,17,2) AS ws_item_rev#38] + +(55) Filter [codegen id : 14] +Input [2]: [item_id#37, ws_item_rev#38] +Condition : isnotnull(ws_item_rev#38) + +(56) BroadcastExchange +Input [2]: [item_id#37, ws_item_rev#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=8] + +(57) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [item_id#15] +Right keys [1]: [item_id#37] +Join type: Inner +Join condition: ((((((((cast(ss_item_rev#16 as decimal(19,3)) >= (0.9 * ws_item_rev#38)) AND (cast(ss_item_rev#16 as decimal(20,3)) <= (1.1 * ws_item_rev#38))) AND (cast(cs_item_rev#27 as decimal(19,3)) >= (0.9 * ws_item_rev#38))) AND (cast(cs_item_rev#27 as decimal(20,3)) <= (1.1 * ws_item_rev#38))) AND (cast(ws_item_rev#38 as decimal(19,3)) >= (0.9 * ss_item_rev#16))) AND (cast(ws_item_rev#38 as decimal(20,3)) <= (1.1 * ss_item_rev#16))) AND (cast(ws_item_rev#38 as decimal(19,3)) >= (0.9 * cs_item_rev#27))) AND (cast(ws_item_rev#38 as decimal(20,3)) <= (1.1 * cs_item_rev#27))) + +(58) Project [codegen id : 15] +Output [8]: [item_id#15, ss_item_rev#16, (((ss_item_rev#16 / ((ss_item_rev#16 + cs_item_rev#27) + ws_item_rev#38)) / 3) * 100) AS ss_dev#39, cs_item_rev#27, (((cs_item_rev#27 / ((ss_item_rev#16 + cs_item_rev#27) + ws_item_rev#38)) / 3) * 100) AS cs_dev#40, ws_item_rev#38, (((ws_item_rev#38 / ((ss_item_rev#16 + cs_item_rev#27) + ws_item_rev#38)) / 3) * 100) AS ws_dev#41, (((ss_item_rev#16 + cs_item_rev#27) + ws_item_rev#38) / 3) AS average#42] +Input [5]: [item_id#15, ss_item_rev#16, cs_item_rev#27, item_id#37, ws_item_rev#38] + +(59) TakeOrderedAndProject +Input [8]: [item_id#15, ss_item_rev#16, ss_dev#39, cs_item_rev#27, cs_dev#40, ws_item_rev#38, ws_dev#41, average#42] +Arguments: 100, [item_id#15 ASC NULLS FIRST, ss_item_rev#16 ASC NULLS FIRST], [item_id#15, ss_item_rev#16, ss_dev#39, cs_item_rev#27, cs_dev#40, ws_item_rev#38, ws_dev#41, average#42] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 15 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* Project (63) ++- * Filter (62) + +- * ColumnarToRow (61) + +- Scan parquet spark_catalog.default.date_dim (60) + + +(60) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date#43, d_week_seq#44] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), EqualTo(d_date,2000-01-03)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 1] +Input [2]: [d_date#43, d_week_seq#44] + +(62) Filter [codegen id : 1] +Input [2]: [d_date#43, d_week_seq#44] +Condition : (isnotnull(d_date#43) AND (d_date#43 = 2000-01-03)) + +(63) Project [codegen id : 1] +Output [1]: [d_week_seq#44] +Input [2]: [d_date#43, d_week_seq#44] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ce068388c7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_datafusion/simplified.txt @@ -0,0 +1,93 @@ +TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev,ws_dev,average] + WholeStageCodegen (15) + Project [item_id,ss_item_rev,cs_item_rev,ws_item_rev] + BroadcastHashJoin [item_id,item_id,ss_item_rev,ws_item_rev,cs_item_rev] + Project [item_id,ss_item_rev,cs_item_rev] + BroadcastHashJoin [item_id,item_id,ss_item_rev,cs_item_rev] + Filter [ss_item_rev] + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),item_id,ss_item_rev,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_ext_sales_price,ss_sold_date_sk,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [i_item_sk,i_item_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date] + Filter [d_week_seq] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_date] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + Filter [cs_item_rev] + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),item_id,cs_item_rev,sum] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen (8) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (14) + Filter [ws_item_rev] + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),item_id,ws_item_rev,sum] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen (13) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ext_sales_price,ws_sold_date_sk,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..6cefb2c4a1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_iceberg_compat/explain.txt @@ -0,0 +1,373 @@ +== Physical Plan == +TakeOrderedAndProject (59) ++- * Project (58) + +- * BroadcastHashJoin Inner BuildRight (57) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Filter (26) + : : +- * HashAggregate (25) + : : +- Exchange (24) + : : +- * HashAggregate (23) + : : +- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.item (4) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * BroadcastHashJoin LeftSemi BuildRight (18) + : : :- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (17) + : : +- * Project (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet spark_catalog.default.date_dim (13) + : +- BroadcastExchange (40) + : +- * Filter (39) + : +- * HashAggregate (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Filter (29) + : : : +- * ColumnarToRow (28) + : : : +- Scan parquet spark_catalog.default.catalog_sales (27) + : : +- ReusedExchange (30) + : +- ReusedExchange (33) + +- BroadcastExchange (56) + +- * Filter (55) + +- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- * Project (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Filter (45) + : : +- * ColumnarToRow (44) + : : +- Scan parquet spark_catalog.default.web_sales (43) + : +- ReusedExchange (46) + +- ReusedExchange (49) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_item_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(7) BroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [3]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5] +Input [5]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_date#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#6, d_date#7] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#6, d_date#7] +Condition : isnotnull(d_date_sk#6) + +(13) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] + +(15) Filter [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = Subquery scalar-subquery#10, [id=#11])) + +(16) Project [codegen id : 2] +Output [1]: [d_date#8] +Input [2]: [d_date#8, d_week_seq#9] + +(17) BroadcastExchange +Input [1]: [d_date#8] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_id=2] + +(18) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date#7] +Right keys [1]: [d_date#8] +Join type: LeftSemi +Join condition: None + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_date#7] + +(20) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [2]: [ss_ext_sales_price#2, i_item_id#5] +Input [4]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, d_date_sk#6] + +(23) HashAggregate [codegen id : 4] +Input [2]: [ss_ext_sales_price#2, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [2]: [i_item_id#5, sum#13] + +(24) Exchange +Input [2]: [i_item_id#5, sum#13] +Arguments: hashpartitioning(i_item_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 15] +Input [2]: [i_item_id#5, sum#13] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [2]: [i_item_id#5 AS item_id#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS ss_item_rev#16] + +(26) Filter [codegen id : 15] +Input [2]: [item_id#15, ss_item_rev#16] +Condition : isnotnull(ss_item_rev#16) + +(27) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#17, cs_ext_sales_price#18, cs_sold_date_sk#19] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#19)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 8] +Input [3]: [cs_item_sk#17, cs_ext_sales_price#18, cs_sold_date_sk#19] + +(29) Filter [codegen id : 8] +Input [3]: [cs_item_sk#17, cs_ext_sales_price#18, cs_sold_date_sk#19] +Condition : isnotnull(cs_item_sk#17) + +(30) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#20, i_item_id#21] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#20] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 8] +Output [3]: [cs_ext_sales_price#18, cs_sold_date_sk#19, i_item_id#21] +Input [5]: [cs_item_sk#17, cs_ext_sales_price#18, cs_sold_date_sk#19, i_item_sk#20, i_item_id#21] + +(33) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#22] + +(34) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#19] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 8] +Output [2]: [cs_ext_sales_price#18, i_item_id#21] +Input [4]: [cs_ext_sales_price#18, cs_sold_date_sk#19, i_item_id#21, d_date_sk#22] + +(36) HashAggregate [codegen id : 8] +Input [2]: [cs_ext_sales_price#18, i_item_id#21] +Keys [1]: [i_item_id#21] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#18))] +Aggregate Attributes [1]: [sum#23] +Results [2]: [i_item_id#21, sum#24] + +(37) Exchange +Input [2]: [i_item_id#21, sum#24] +Arguments: hashpartitioning(i_item_id#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(38) HashAggregate [codegen id : 9] +Input [2]: [i_item_id#21, sum#24] +Keys [1]: [i_item_id#21] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#18))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#18))#25] +Results [2]: [i_item_id#21 AS item_id#26, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#18))#25,17,2) AS cs_item_rev#27] + +(39) Filter [codegen id : 9] +Input [2]: [item_id#26, cs_item_rev#27] +Condition : isnotnull(cs_item_rev#27) + +(40) BroadcastExchange +Input [2]: [item_id#26, cs_item_rev#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [item_id#15] +Right keys [1]: [item_id#26] +Join type: Inner +Join condition: ((((cast(ss_item_rev#16 as decimal(19,3)) >= (0.9 * cs_item_rev#27)) AND (cast(ss_item_rev#16 as decimal(20,3)) <= (1.1 * cs_item_rev#27))) AND (cast(cs_item_rev#27 as decimal(19,3)) >= (0.9 * ss_item_rev#16))) AND (cast(cs_item_rev#27 as decimal(20,3)) <= (1.1 * ss_item_rev#16))) + +(42) Project [codegen id : 15] +Output [3]: [item_id#15, ss_item_rev#16, cs_item_rev#27] +Input [4]: [item_id#15, ss_item_rev#16, item_id#26, cs_item_rev#27] + +(43) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#30)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 13] +Input [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] + +(45) Filter [codegen id : 13] +Input [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] +Condition : isnotnull(ws_item_sk#28) + +(46) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#31, i_item_id#32] + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_item_sk#28] +Right keys [1]: [i_item_sk#31] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 13] +Output [3]: [ws_ext_sales_price#29, ws_sold_date_sk#30, i_item_id#32] +Input [5]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30, i_item_sk#31, i_item_id#32] + +(49) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#33] + +(50) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#30] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 13] +Output [2]: [ws_ext_sales_price#29, i_item_id#32] +Input [4]: [ws_ext_sales_price#29, ws_sold_date_sk#30, i_item_id#32, d_date_sk#33] + +(52) HashAggregate [codegen id : 13] +Input [2]: [ws_ext_sales_price#29, i_item_id#32] +Keys [1]: [i_item_id#32] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#29))] +Aggregate Attributes [1]: [sum#34] +Results [2]: [i_item_id#32, sum#35] + +(53) Exchange +Input [2]: [i_item_id#32, sum#35] +Arguments: hashpartitioning(i_item_id#32, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(54) HashAggregate [codegen id : 14] +Input [2]: [i_item_id#32, sum#35] +Keys [1]: [i_item_id#32] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#29))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#29))#36] +Results [2]: [i_item_id#32 AS item_id#37, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#29))#36,17,2) AS ws_item_rev#38] + +(55) Filter [codegen id : 14] +Input [2]: [item_id#37, ws_item_rev#38] +Condition : isnotnull(ws_item_rev#38) + +(56) BroadcastExchange +Input [2]: [item_id#37, ws_item_rev#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=8] + +(57) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [item_id#15] +Right keys [1]: [item_id#37] +Join type: Inner +Join condition: ((((((((cast(ss_item_rev#16 as decimal(19,3)) >= (0.9 * ws_item_rev#38)) AND (cast(ss_item_rev#16 as decimal(20,3)) <= (1.1 * ws_item_rev#38))) AND (cast(cs_item_rev#27 as decimal(19,3)) >= (0.9 * ws_item_rev#38))) AND (cast(cs_item_rev#27 as decimal(20,3)) <= (1.1 * ws_item_rev#38))) AND (cast(ws_item_rev#38 as decimal(19,3)) >= (0.9 * ss_item_rev#16))) AND (cast(ws_item_rev#38 as decimal(20,3)) <= (1.1 * ss_item_rev#16))) AND (cast(ws_item_rev#38 as decimal(19,3)) >= (0.9 * cs_item_rev#27))) AND (cast(ws_item_rev#38 as decimal(20,3)) <= (1.1 * cs_item_rev#27))) + +(58) Project [codegen id : 15] +Output [8]: [item_id#15, ss_item_rev#16, (((ss_item_rev#16 / ((ss_item_rev#16 + cs_item_rev#27) + ws_item_rev#38)) / 3) * 100) AS ss_dev#39, cs_item_rev#27, (((cs_item_rev#27 / ((ss_item_rev#16 + cs_item_rev#27) + ws_item_rev#38)) / 3) * 100) AS cs_dev#40, ws_item_rev#38, (((ws_item_rev#38 / ((ss_item_rev#16 + cs_item_rev#27) + ws_item_rev#38)) / 3) * 100) AS ws_dev#41, (((ss_item_rev#16 + cs_item_rev#27) + ws_item_rev#38) / 3) AS average#42] +Input [5]: [item_id#15, ss_item_rev#16, cs_item_rev#27, item_id#37, ws_item_rev#38] + +(59) TakeOrderedAndProject +Input [8]: [item_id#15, ss_item_rev#16, ss_dev#39, cs_item_rev#27, cs_dev#40, ws_item_rev#38, ws_dev#41, average#42] +Arguments: 100, [item_id#15 ASC NULLS FIRST, ss_item_rev#16 ASC NULLS FIRST], [item_id#15, ss_item_rev#16, ss_dev#39, cs_item_rev#27, cs_dev#40, ws_item_rev#38, ws_dev#41, average#42] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 15 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* Project (63) ++- * Filter (62) + +- * ColumnarToRow (61) + +- Scan parquet spark_catalog.default.date_dim (60) + + +(60) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date#43, d_week_seq#44] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), EqualTo(d_date,2000-01-03)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 1] +Input [2]: [d_date#43, d_week_seq#44] + +(62) Filter [codegen id : 1] +Input [2]: [d_date#43, d_week_seq#44] +Condition : (isnotnull(d_date#43) AND (d_date#43 = 2000-01-03)) + +(63) Project [codegen id : 1] +Output [1]: [d_week_seq#44] +Input [2]: [d_date#43, d_week_seq#44] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..ce068388c7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.native_iceberg_compat/simplified.txt @@ -0,0 +1,93 @@ +TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev,ws_dev,average] + WholeStageCodegen (15) + Project [item_id,ss_item_rev,cs_item_rev,ws_item_rev] + BroadcastHashJoin [item_id,item_id,ss_item_rev,ws_item_rev,cs_item_rev] + Project [item_id,ss_item_rev,cs_item_rev] + BroadcastHashJoin [item_id,item_id,ss_item_rev,cs_item_rev] + Filter [ss_item_rev] + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),item_id,ss_item_rev,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_ext_sales_price,ss_sold_date_sk,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [i_item_sk,i_item_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date] + Filter [d_week_seq] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_date] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + Filter [cs_item_rev] + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),item_id,cs_item_rev,sum] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen (8) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (14) + Filter [ws_item_rev] + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),item_id,ws_item_rev,sum] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen (13) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ext_sales_price,ws_sold_date_sk,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_datafusion/explain.txt new file mode 100644 index 0000000000..ee3a0870f0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_datafusion/explain.txt @@ -0,0 +1,256 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (18) + : : +- * BroadcastHashJoin Inner BuildRight (17) + : : :- * HashAggregate (12) + : : : +- Exchange (11) + : : : +- * HashAggregate (10) + : : : +- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet spark_catalog.default.store (13) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet spark_catalog.default.date_dim (19) + +- BroadcastExchange (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * HashAggregate (27) + : : +- ReusedExchange (26) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet spark_catalog.default.store (28) + +- BroadcastExchange (38) + +- * Project (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.date_dim (34) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 2] +Output [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] +Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_week_seq#5, d_day_name#6] + +(10) HashAggregate [codegen id : 2] +Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] +Aggregate Attributes [7]: [sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Results [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] + +(11) Exchange +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) HashAggregate [codegen id : 10] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] + +(13) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] + +(15) Filter [codegen id : 3] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Condition : (isnotnull(s_store_sk#35) AND isnotnull(s_store_id#36)) + +(16) BroadcastExchange +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(17) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#35] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 10] +Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37] +Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37] + +(19) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_month_seq#38, d_week_seq#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [2]: [d_month_seq#38, d_week_seq#39] + +(21) Filter [codegen id : 4] +Input [2]: [d_month_seq#38, d_week_seq#39] +Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1212)) AND (d_month_seq#38 <= 1223)) AND isnotnull(d_week_seq#39)) + +(22) Project [codegen id : 4] +Output [1]: [d_week_seq#39] +Input [2]: [d_month_seq#38, d_week_seq#39] + +(23) BroadcastExchange +Input [1]: [d_week_seq#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(24) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#39] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 10] +Output [10]: [s_store_name#37 AS s_store_name1#40, d_week_seq#5 AS d_week_seq1#41, s_store_id#36 AS s_store_id1#42, sun_sales#28 AS sun_sales1#43, mon_sales#29 AS mon_sales1#44, tue_sales#30 AS tue_sales1#45, wed_sales#31 AS wed_sales1#46, thu_sales#32 AS thu_sales1#47, fri_sales#33 AS fri_sales1#48, sat_sales#34 AS sat_sales1#49] +Input [11]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37, d_week_seq#39] + +(26) ReusedExchange [Reuses operator id: 11] +Output [9]: [d_week_seq#5, ss_store_sk#1, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] + +(27) HashAggregate [codegen id : 9] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] + +(28) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#57, s_store_id#58] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#57, s_store_id#58] + +(30) Filter [codegen id : 7] +Input [2]: [s_store_sk#57, s_store_id#58] +Condition : (isnotnull(s_store_sk#57) AND isnotnull(s_store_id#58)) + +(31) BroadcastExchange +Input [2]: [s_store_sk#57, s_store_id#58] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#57] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [9]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58] +Input [11]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#57, s_store_id#58] + +(34) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_month_seq#59, d_week_seq#60] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [d_month_seq#59, d_week_seq#60] + +(36) Filter [codegen id : 8] +Input [2]: [d_month_seq#59, d_week_seq#60] +Condition : (((isnotnull(d_month_seq#59) AND (d_month_seq#59 >= 1224)) AND (d_month_seq#59 <= 1235)) AND isnotnull(d_week_seq#60)) + +(37) Project [codegen id : 8] +Output [1]: [d_week_seq#60] +Input [2]: [d_month_seq#59, d_week_seq#60] + +(38) BroadcastExchange +Input [1]: [d_week_seq#60] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#60] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 9] +Output [9]: [d_week_seq#5 AS d_week_seq2#61, s_store_id#58 AS s_store_id2#62, sun_sales#28 AS sun_sales2#63, mon_sales#29 AS mon_sales2#64, tue_sales#30 AS tue_sales2#65, wed_sales#31 AS wed_sales2#66, thu_sales#32 AS thu_sales2#67, fri_sales#33 AS fri_sales2#68, sat_sales#34 AS sat_sales2#69] +Input [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58, d_week_seq#60] + +(41) BroadcastExchange +Input [9]: [d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=7] + +(42) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [s_store_id1#42, d_week_seq1#41] +Right keys [2]: [s_store_id2#62, (d_week_seq2#61 - 52)] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 10] +Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1#43 / sun_sales2#63) AS (sun_sales1 / sun_sales2)#70, (mon_sales1#44 / mon_sales2#64) AS (mon_sales1 / mon_sales2)#71, (tue_sales1#45 / tue_sales2#65) AS (tue_sales1 / tue_sales2)#72, (wed_sales1#46 / wed_sales2#66) AS (wed_sales1 / wed_sales2)#73, (thu_sales1#47 / thu_sales2#67) AS (thu_sales1 / thu_sales2)#74, (fri_sales1#48 / fri_sales2#68) AS (fri_sales1 / fri_sales2)#75, (sat_sales1#49 / sat_sales2#69) AS (sat_sales1 / sat_sales2)#76] +Input [19]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69] + +(44) TakeOrderedAndProject +Input [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#70, (mon_sales1 / mon_sales2)#71, (tue_sales1 / tue_sales2)#72, (wed_sales1 / wed_sales2)#73, (thu_sales1 / thu_sales2)#74, (fri_sales1 / fri_sales2)#75, (sat_sales1 / sat_sales2)#76] +Arguments: 100, [s_store_name1#40 ASC NULLS FIRST, s_store_id1#42 ASC NULLS FIRST, d_week_seq1#41 ASC NULLS FIRST], [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#70, (mon_sales1 / mon_sales2)#71, (tue_sales1 / tue_sales2)#72, (wed_sales1 / wed_sales2)#73, (thu_sales1 / thu_sales2)#74, (fri_sales1 / fri_sales2)#75, (sat_sales1 / sat_sales2)#76] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_datafusion/simplified.txt new file mode 100644 index 0000000000..77b1718af1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_datafusion/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_sales2),(mon_sales1 / mon_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(sat_sales1 / sat_sales2)] + WholeStageCodegen (10) + Project [s_store_name1,s_store_id1,d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] + BroadcastHashJoin [s_store_id1,d_week_seq1,s_store_id2,d_week_seq2] + Project [s_store_name,d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #1 + WholeStageCodegen (2) + HashAggregate [d_week_seq,ss_store_sk,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [s_store_sk,s_store_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + Project [d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [s_store_sk,s_store_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ee3a0870f0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_iceberg_compat/explain.txt @@ -0,0 +1,256 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (18) + : : +- * BroadcastHashJoin Inner BuildRight (17) + : : :- * HashAggregate (12) + : : : +- Exchange (11) + : : : +- * HashAggregate (10) + : : : +- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet spark_catalog.default.store (13) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet spark_catalog.default.date_dim (19) + +- BroadcastExchange (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * HashAggregate (27) + : : +- ReusedExchange (26) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet spark_catalog.default.store (28) + +- BroadcastExchange (38) + +- * Project (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.date_dim (34) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 2] +Output [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] +Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_week_seq#5, d_day_name#6] + +(10) HashAggregate [codegen id : 2] +Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] +Aggregate Attributes [7]: [sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Results [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] + +(11) Exchange +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) HashAggregate [codegen id : 10] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] + +(13) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] + +(15) Filter [codegen id : 3] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Condition : (isnotnull(s_store_sk#35) AND isnotnull(s_store_id#36)) + +(16) BroadcastExchange +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(17) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#35] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 10] +Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37] +Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37] + +(19) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_month_seq#38, d_week_seq#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [2]: [d_month_seq#38, d_week_seq#39] + +(21) Filter [codegen id : 4] +Input [2]: [d_month_seq#38, d_week_seq#39] +Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1212)) AND (d_month_seq#38 <= 1223)) AND isnotnull(d_week_seq#39)) + +(22) Project [codegen id : 4] +Output [1]: [d_week_seq#39] +Input [2]: [d_month_seq#38, d_week_seq#39] + +(23) BroadcastExchange +Input [1]: [d_week_seq#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(24) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#39] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 10] +Output [10]: [s_store_name#37 AS s_store_name1#40, d_week_seq#5 AS d_week_seq1#41, s_store_id#36 AS s_store_id1#42, sun_sales#28 AS sun_sales1#43, mon_sales#29 AS mon_sales1#44, tue_sales#30 AS tue_sales1#45, wed_sales#31 AS wed_sales1#46, thu_sales#32 AS thu_sales1#47, fri_sales#33 AS fri_sales1#48, sat_sales#34 AS sat_sales1#49] +Input [11]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37, d_week_seq#39] + +(26) ReusedExchange [Reuses operator id: 11] +Output [9]: [d_week_seq#5, ss_store_sk#1, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] + +(27) HashAggregate [codegen id : 9] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] +Keys [2]: [d_week_seq#5, ss_store_sk#1] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] + +(28) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#57, s_store_id#58] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#57, s_store_id#58] + +(30) Filter [codegen id : 7] +Input [2]: [s_store_sk#57, s_store_id#58] +Condition : (isnotnull(s_store_sk#57) AND isnotnull(s_store_id#58)) + +(31) BroadcastExchange +Input [2]: [s_store_sk#57, s_store_id#58] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#57] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [9]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58] +Input [11]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#57, s_store_id#58] + +(34) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_month_seq#59, d_week_seq#60] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [d_month_seq#59, d_week_seq#60] + +(36) Filter [codegen id : 8] +Input [2]: [d_month_seq#59, d_week_seq#60] +Condition : (((isnotnull(d_month_seq#59) AND (d_month_seq#59 >= 1224)) AND (d_month_seq#59 <= 1235)) AND isnotnull(d_week_seq#60)) + +(37) Project [codegen id : 8] +Output [1]: [d_week_seq#60] +Input [2]: [d_month_seq#59, d_week_seq#60] + +(38) BroadcastExchange +Input [1]: [d_week_seq#60] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#60] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 9] +Output [9]: [d_week_seq#5 AS d_week_seq2#61, s_store_id#58 AS s_store_id2#62, sun_sales#28 AS sun_sales2#63, mon_sales#29 AS mon_sales2#64, tue_sales#30 AS tue_sales2#65, wed_sales#31 AS wed_sales2#66, thu_sales#32 AS thu_sales2#67, fri_sales#33 AS fri_sales2#68, sat_sales#34 AS sat_sales2#69] +Input [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58, d_week_seq#60] + +(41) BroadcastExchange +Input [9]: [d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=7] + +(42) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [s_store_id1#42, d_week_seq1#41] +Right keys [2]: [s_store_id2#62, (d_week_seq2#61 - 52)] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 10] +Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1#43 / sun_sales2#63) AS (sun_sales1 / sun_sales2)#70, (mon_sales1#44 / mon_sales2#64) AS (mon_sales1 / mon_sales2)#71, (tue_sales1#45 / tue_sales2#65) AS (tue_sales1 / tue_sales2)#72, (wed_sales1#46 / wed_sales2#66) AS (wed_sales1 / wed_sales2)#73, (thu_sales1#47 / thu_sales2#67) AS (thu_sales1 / thu_sales2)#74, (fri_sales1#48 / fri_sales2#68) AS (fri_sales1 / fri_sales2)#75, (sat_sales1#49 / sat_sales2#69) AS (sat_sales1 / sat_sales2)#76] +Input [19]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69] + +(44) TakeOrderedAndProject +Input [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#70, (mon_sales1 / mon_sales2)#71, (tue_sales1 / tue_sales2)#72, (wed_sales1 / wed_sales2)#73, (thu_sales1 / thu_sales2)#74, (fri_sales1 / fri_sales2)#75, (sat_sales1 / sat_sales2)#76] +Arguments: 100, [s_store_name1#40 ASC NULLS FIRST, s_store_id1#42 ASC NULLS FIRST, d_week_seq1#41 ASC NULLS FIRST], [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#70, (mon_sales1 / mon_sales2)#71, (tue_sales1 / tue_sales2)#72, (wed_sales1 / wed_sales2)#73, (thu_sales1 / thu_sales2)#74, (fri_sales1 / fri_sales2)#75, (sat_sales1 / sat_sales2)#76] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..77b1718af1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.native_iceberg_compat/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_sales2),(mon_sales1 / mon_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(sat_sales1 / sat_sales2)] + WholeStageCodegen (10) + Project [s_store_name1,s_store_id1,d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] + BroadcastHashJoin [s_store_id1,d_week_seq1,s_store_id2,d_week_seq2] + Project [s_store_name,d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #1 + WholeStageCodegen (2) + HashAggregate [d_week_seq,ss_store_sk,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (3) + Filter [s_store_sk,s_store_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + Project [d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + InputAdapter + ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [s_store_sk,s_store_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_datafusion/explain.txt new file mode 100644 index 0000000000..bd5ce97614 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_datafusion/explain.txt @@ -0,0 +1,302 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Filter (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.customer_address (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.customer (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.store_sales (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.date_dim (16) + +- BroadcastExchange (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.item (23) + +- BroadcastExchange (33) + +- * Filter (32) + +- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet spark_catalog.default.item (26) + + +(1) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#1, ca_state#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] + +(3) Filter [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(4) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ca_address_sk#1] +Right keys [1]: [c_current_addr_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 7] +Output [2]: [ca_state#2, c_customer_sk#3] +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] + +(10) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(12) Filter [codegen id : 2] +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_item_sk#5)) + +(13) BroadcastExchange +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#6] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 7] +Output [3]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] +Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(16) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_month_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#8, d_month_seq#9] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#8, d_month_seq#9] +Condition : ((isnotnull(d_month_seq#9) AND (d_month_seq#9 = Subquery scalar-subquery#10, [id=#11])) AND isnotnull(d_date_sk#8)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#8] +Input [2]: [d_date_sk#8, d_month_seq#9] + +(20) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 7] +Output [2]: [ca_state#2, ss_item_sk#5] +Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#8] + +(23) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_category), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] + +(25) Filter [codegen id : 6] +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Condition : ((isnotnull(i_current_price#13) AND isnotnull(i_category#14)) AND isnotnull(i_item_sk#12)) + +(26) Scan parquet spark_catalog.default.item +Output [2]: [i_current_price#15, i_category#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] + +(28) Filter [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] +Condition : isnotnull(i_category#16) + +(29) HashAggregate [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] +Keys [1]: [i_category#16] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#15))] +Aggregate Attributes [2]: [sum#17, count#18] +Results [3]: [i_category#16, sum#19, count#20] + +(30) Exchange +Input [3]: [i_category#16, sum#19, count#20] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 5] +Input [3]: [i_category#16, sum#19, count#20] +Keys [1]: [i_category#16] +Functions [1]: [avg(UnscaledValue(i_current_price#15))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#15))#21] +Results [2]: [cast((avg(UnscaledValue(i_current_price#15))#21 / 100.0) as decimal(11,6)) AS avg(i_current_price)#22, i_category#16] + +(32) Filter [codegen id : 5] +Input [2]: [avg(i_current_price)#22, i_category#16] +Condition : isnotnull(avg(i_current_price)#22) + +(33) BroadcastExchange +Input [2]: [avg(i_current_price)#22, i_category#16] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_category#14] +Right keys [1]: [i_category#16] +Join type: Inner +Join condition: (cast(i_current_price#13 as decimal(14,7)) > (1.2 * avg(i_current_price)#22)) + +(35) Project [codegen id : 6] +Output [1]: [i_item_sk#12] +Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#22, i_category#16] + +(36) BroadcastExchange +Input [1]: [i_item_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#12] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [1]: [ca_state#2] +Input [3]: [ca_state#2, ss_item_sk#5, i_item_sk#12] + +(39) HashAggregate [codegen id : 7] +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [2]: [ca_state#2, count#24] + +(40) Exchange +Input [2]: [ca_state#2, count#24] +Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(41) HashAggregate [codegen id : 8] +Input [2]: [ca_state#2, count#24] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [2]: [ca_state#2 AS state#26, count(1)#25 AS cnt#27] + +(42) Filter [codegen id : 8] +Input [2]: [state#26, cnt#27] +Condition : (cnt#27 >= 10) + +(43) TakeOrderedAndProject +Input [2]: [state#26, cnt#27] +Arguments: 100, [cnt#27 ASC NULLS FIRST], [state#26, cnt#27] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 18 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* HashAggregate (50) ++- Exchange (49) + +- * HashAggregate (48) + +- * Project (47) + +- * Filter (46) + +- * ColumnarToRow (45) + +- Scan parquet spark_catalog.default.date_dim (44) + + +(44) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#28, d_year#29, d_moy#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] + +(46) Filter [codegen id : 1] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] +Condition : (((isnotnull(d_year#29) AND isnotnull(d_moy#30)) AND (d_year#29 = 2000)) AND (d_moy#30 = 1)) + +(47) Project [codegen id : 1] +Output [1]: [d_month_seq#28] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] + +(48) HashAggregate [codegen id : 1] +Input [1]: [d_month_seq#28] +Keys [1]: [d_month_seq#28] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#28] + +(49) Exchange +Input [1]: [d_month_seq#28] +Arguments: hashpartitioning(d_month_seq#28, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(50) HashAggregate [codegen id : 2] +Input [1]: [d_month_seq#28] +Keys [1]: [d_month_seq#28] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#28] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_datafusion/simplified.txt new file mode 100644 index 0000000000..53f3d8dd0b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_datafusion/simplified.txt @@ -0,0 +1,76 @@ +TakeOrderedAndProject [cnt,state] + WholeStageCodegen (8) + Filter [cnt] + HashAggregate [ca_state,count] [count(1),state,cnt,count] + InputAdapter + Exchange [ca_state] #1 + WholeStageCodegen (7) + HashAggregate [ca_state] [count,count] + Project [ca_state] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ca_state,ss_item_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ca_state,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ca_state,c_customer_sk] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen (1) + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [i_item_sk] + BroadcastHashJoin [i_category,i_category,i_current_price,avg(i_current_price)] + Filter [i_current_price,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_category] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [avg(i_current_price)] + HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count] + InputAdapter + Exchange [i_category] #8 + WholeStageCodegen (4) + HashAggregate [i_category,i_current_price] [sum,count,sum,count] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_current_price,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..bd5ce97614 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_iceberg_compat/explain.txt @@ -0,0 +1,302 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Filter (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.customer_address (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.customer (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.store_sales (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.date_dim (16) + +- BroadcastExchange (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.item (23) + +- BroadcastExchange (33) + +- * Filter (32) + +- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet spark_catalog.default.item (26) + + +(1) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#1, ca_state#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] + +(3) Filter [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(4) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ca_address_sk#1] +Right keys [1]: [c_current_addr_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 7] +Output [2]: [ca_state#2, c_customer_sk#3] +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] + +(10) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(12) Filter [codegen id : 2] +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_item_sk#5)) + +(13) BroadcastExchange +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#6] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 7] +Output [3]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] +Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(16) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_month_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#8, d_month_seq#9] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#8, d_month_seq#9] +Condition : ((isnotnull(d_month_seq#9) AND (d_month_seq#9 = Subquery scalar-subquery#10, [id=#11])) AND isnotnull(d_date_sk#8)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#8] +Input [2]: [d_date_sk#8, d_month_seq#9] + +(20) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 7] +Output [2]: [ca_state#2, ss_item_sk#5] +Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#8] + +(23) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_category), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] + +(25) Filter [codegen id : 6] +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Condition : ((isnotnull(i_current_price#13) AND isnotnull(i_category#14)) AND isnotnull(i_item_sk#12)) + +(26) Scan parquet spark_catalog.default.item +Output [2]: [i_current_price#15, i_category#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] + +(28) Filter [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] +Condition : isnotnull(i_category#16) + +(29) HashAggregate [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] +Keys [1]: [i_category#16] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#15))] +Aggregate Attributes [2]: [sum#17, count#18] +Results [3]: [i_category#16, sum#19, count#20] + +(30) Exchange +Input [3]: [i_category#16, sum#19, count#20] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 5] +Input [3]: [i_category#16, sum#19, count#20] +Keys [1]: [i_category#16] +Functions [1]: [avg(UnscaledValue(i_current_price#15))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#15))#21] +Results [2]: [cast((avg(UnscaledValue(i_current_price#15))#21 / 100.0) as decimal(11,6)) AS avg(i_current_price)#22, i_category#16] + +(32) Filter [codegen id : 5] +Input [2]: [avg(i_current_price)#22, i_category#16] +Condition : isnotnull(avg(i_current_price)#22) + +(33) BroadcastExchange +Input [2]: [avg(i_current_price)#22, i_category#16] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_category#14] +Right keys [1]: [i_category#16] +Join type: Inner +Join condition: (cast(i_current_price#13 as decimal(14,7)) > (1.2 * avg(i_current_price)#22)) + +(35) Project [codegen id : 6] +Output [1]: [i_item_sk#12] +Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#22, i_category#16] + +(36) BroadcastExchange +Input [1]: [i_item_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#12] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [1]: [ca_state#2] +Input [3]: [ca_state#2, ss_item_sk#5, i_item_sk#12] + +(39) HashAggregate [codegen id : 7] +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [2]: [ca_state#2, count#24] + +(40) Exchange +Input [2]: [ca_state#2, count#24] +Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(41) HashAggregate [codegen id : 8] +Input [2]: [ca_state#2, count#24] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [2]: [ca_state#2 AS state#26, count(1)#25 AS cnt#27] + +(42) Filter [codegen id : 8] +Input [2]: [state#26, cnt#27] +Condition : (cnt#27 >= 10) + +(43) TakeOrderedAndProject +Input [2]: [state#26, cnt#27] +Arguments: 100, [cnt#27 ASC NULLS FIRST], [state#26, cnt#27] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 18 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* HashAggregate (50) ++- Exchange (49) + +- * HashAggregate (48) + +- * Project (47) + +- * Filter (46) + +- * ColumnarToRow (45) + +- Scan parquet spark_catalog.default.date_dim (44) + + +(44) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#28, d_year#29, d_moy#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] + +(46) Filter [codegen id : 1] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] +Condition : (((isnotnull(d_year#29) AND isnotnull(d_moy#30)) AND (d_year#29 = 2000)) AND (d_moy#30 = 1)) + +(47) Project [codegen id : 1] +Output [1]: [d_month_seq#28] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] + +(48) HashAggregate [codegen id : 1] +Input [1]: [d_month_seq#28] +Keys [1]: [d_month_seq#28] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#28] + +(49) Exchange +Input [1]: [d_month_seq#28] +Arguments: hashpartitioning(d_month_seq#28, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(50) HashAggregate [codegen id : 2] +Input [1]: [d_month_seq#28] +Keys [1]: [d_month_seq#28] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#28] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..53f3d8dd0b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.native_iceberg_compat/simplified.txt @@ -0,0 +1,76 @@ +TakeOrderedAndProject [cnt,state] + WholeStageCodegen (8) + Filter [cnt] + HashAggregate [ca_state,count] [count(1),state,cnt,count] + InputAdapter + Exchange [ca_state] #1 + WholeStageCodegen (7) + HashAggregate [ca_state] [count,count] + Project [ca_state] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ca_state,ss_item_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ca_state,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ca_state,c_customer_sk] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen (1) + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [i_item_sk] + BroadcastHashJoin [i_category,i_category,i_current_price,avg(i_current_price)] + Filter [i_current_price,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_category] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [avg(i_current_price)] + HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count] + InputAdapter + Exchange [i_category] #8 + WholeStageCodegen (4) + HashAggregate [i_category,i_current_price] [sum,count,sum,count] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_current_price,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_datafusion/explain.txt new file mode 100644 index 0000000000..abc1098de1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_datafusion/explain.txt @@ -0,0 +1,391 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet spark_catalog.default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet spark_catalog.default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet spark_catalog.default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 9)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#8] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#8] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] + +(18) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#10, i_item_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#10, i_item_id#11] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(21) Scan parquet spark_catalog.default.item +Output [2]: [i_item_id#12, i_category#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music )] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_id#12, i_category#13] + +(23) Filter [codegen id : 3] +Input [2]: [i_item_id#12, i_category#13] +Condition : (isnotnull(i_category#13) AND (i_category#13 = Music )) + +(24) Project [codegen id : 3] +Output [1]: [i_item_id#12] +Input [2]: [i_item_id#12, i_category#13] + +(25) BroadcastExchange +Input [1]: [i_item_id#12] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_id#11] +Right keys [1]: [i_item_id#12] +Join type: LeftSemi +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#10] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#3, i_item_id#11] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#3, i_item_id#11] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [2]: [i_item_id#11, sum#15] + +(31) Exchange +Input [2]: [i_item_id#11, sum#15] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#11, sum#15] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [2]: [i_item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS total_sales#17] + +(33) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#21)] +PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] + +(35) Filter [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Condition : (isnotnull(cs_bill_addr_sk#18) AND isnotnull(cs_item_sk#19)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#22] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Input [5]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21, d_date_sk#22] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#23] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#18] +Right keys [1]: [ca_address_sk#23] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#19, cs_ext_sales_price#20] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, ca_address_sk#23] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#24, i_item_id#25] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#24] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#20, i_item_id#25] +Input [4]: [cs_item_sk#19, cs_ext_sales_price#20, i_item_sk#24, i_item_id#25] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#20, i_item_id#25] +Keys [1]: [i_item_id#25] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum#26] +Results [2]: [i_item_id#25, sum#27] + +(46) Exchange +Input [2]: [i_item_id#25, sum#27] +Arguments: hashpartitioning(i_item_id#25, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_item_id#25, sum#27] +Keys [1]: [i_item_id#25] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#20))#28] +Results [2]: [i_item_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#20))#28,17,2) AS total_sales#29] + +(48) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#33)] +PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] + +(50) Filter [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Condition : (isnotnull(ws_bill_addr_sk#31) AND isnotnull(ws_item_sk#30)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#34] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#33] +Right keys [1]: [d_date_sk#34] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32] +Input [5]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33, d_date_sk#34] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#35] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#31] +Right keys [1]: [ca_address_sk#35] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#30, ws_ext_sales_price#32] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ca_address_sk#35] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#36, i_item_id#37] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#30] +Right keys [1]: [i_item_sk#36] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#32, i_item_id#37] +Input [4]: [ws_item_sk#30, ws_ext_sales_price#32, i_item_sk#36, i_item_id#37] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#32, i_item_id#37] +Keys [1]: [i_item_id#37] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum#38] +Results [2]: [i_item_id#37, sum#39] + +(61) Exchange +Input [2]: [i_item_id#37, sum#39] +Arguments: hashpartitioning(i_item_id#37, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#37, sum#39] +Keys [1]: [i_item_id#37] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#32))#40] +Results [2]: [i_item_id#37, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#32))#40,17,2) AS total_sales#41] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_item_id#11, total_sales#17] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(total_sales#17)] +Aggregate Attributes [2]: [sum#42, isEmpty#43] +Results [3]: [i_item_id#11, sum#44, isEmpty#45] + +(65) Exchange +Input [3]: [i_item_id#11, sum#44, isEmpty#45] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_item_id#11, sum#44, isEmpty#45] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(total_sales#17)] +Aggregate Attributes [1]: [sum(total_sales#17)#46] +Results [2]: [i_item_id#11, sum(total_sales#17)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_item_id#11, total_sales#47] +Arguments: 100, [i_item_id#11 ASC NULLS FIRST, total_sales#47 ASC NULLS FIRST], [i_item_id#11, total_sales#47] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_datafusion/simplified.txt new file mode 100644 index 0000000000..91b533a488 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_datafusion/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [i_item_id,total_sales] + WholeStageCodegen (20) + HashAggregate [i_item_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (19) + HashAggregate [i_item_id,total_sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_item_id,i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_item_id] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_id,i_category] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (11) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 + WholeStageCodegen (18) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..abc1098de1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_iceberg_compat/explain.txt @@ -0,0 +1,391 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet spark_catalog.default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet spark_catalog.default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet spark_catalog.default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnotnull(ss_addr_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 9)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#8, ca_gmt_offset#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] +Condition : ((isnotnull(ca_gmt_offset#9) AND (ca_gmt_offset#9 = -5.00)) AND isnotnull(ca_address_sk#8)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#8] +Input [2]: [ca_address_sk#8, ca_gmt_offset#9] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#8] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] +Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#8] + +(18) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#10, i_item_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#10, i_item_id#11] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#10, i_item_id#11] +Condition : isnotnull(i_item_sk#10) + +(21) Scan parquet spark_catalog.default.item +Output [2]: [i_item_id#12, i_category#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music )] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_id#12, i_category#13] + +(23) Filter [codegen id : 3] +Input [2]: [i_item_id#12, i_category#13] +Condition : (isnotnull(i_category#13) AND (i_category#13 = Music )) + +(24) Project [codegen id : 3] +Output [1]: [i_item_id#12] +Input [2]: [i_item_id#12, i_category#13] + +(25) BroadcastExchange +Input [1]: [i_item_id#12] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_id#11] +Right keys [1]: [i_item_id#12] +Join type: LeftSemi +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#10, i_item_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#10] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#3, i_item_id#11] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#3, i_item_id#11] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [2]: [i_item_id#11, sum#15] + +(31) Exchange +Input [2]: [i_item_id#11, sum#15] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#11, sum#15] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16] +Results [2]: [i_item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS total_sales#17] + +(33) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#21)] +PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] + +(35) Filter [codegen id : 11] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21] +Condition : (isnotnull(cs_bill_addr_sk#18) AND isnotnull(cs_item_sk#19)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#22] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Input [5]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, cs_sold_date_sk#21, d_date_sk#22] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#23] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#18] +Right keys [1]: [ca_address_sk#23] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#19, cs_ext_sales_price#20] +Input [4]: [cs_bill_addr_sk#18, cs_item_sk#19, cs_ext_sales_price#20, ca_address_sk#23] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#24, i_item_id#25] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#19] +Right keys [1]: [i_item_sk#24] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#20, i_item_id#25] +Input [4]: [cs_item_sk#19, cs_ext_sales_price#20, i_item_sk#24, i_item_id#25] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#20, i_item_id#25] +Keys [1]: [i_item_id#25] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum#26] +Results [2]: [i_item_id#25, sum#27] + +(46) Exchange +Input [2]: [i_item_id#25, sum#27] +Arguments: hashpartitioning(i_item_id#25, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_item_id#25, sum#27] +Keys [1]: [i_item_id#25] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#20))#28] +Results [2]: [i_item_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#20))#28,17,2) AS total_sales#29] + +(48) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#33)] +PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] + +(50) Filter [codegen id : 17] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33] +Condition : (isnotnull(ws_bill_addr_sk#31) AND isnotnull(ws_item_sk#30)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#34] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#33] +Right keys [1]: [d_date_sk#34] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32] +Input [5]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ws_sold_date_sk#33, d_date_sk#34] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#35] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#31] +Right keys [1]: [ca_address_sk#35] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#30, ws_ext_sales_price#32] +Input [4]: [ws_item_sk#30, ws_bill_addr_sk#31, ws_ext_sales_price#32, ca_address_sk#35] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#36, i_item_id#37] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#30] +Right keys [1]: [i_item_sk#36] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#32, i_item_id#37] +Input [4]: [ws_item_sk#30, ws_ext_sales_price#32, i_item_sk#36, i_item_id#37] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#32, i_item_id#37] +Keys [1]: [i_item_id#37] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum#38] +Results [2]: [i_item_id#37, sum#39] + +(61) Exchange +Input [2]: [i_item_id#37, sum#39] +Arguments: hashpartitioning(i_item_id#37, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#37, sum#39] +Keys [1]: [i_item_id#37] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#32))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#32))#40] +Results [2]: [i_item_id#37, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#32))#40,17,2) AS total_sales#41] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_item_id#11, total_sales#17] +Keys [1]: [i_item_id#11] +Functions [1]: [partial_sum(total_sales#17)] +Aggregate Attributes [2]: [sum#42, isEmpty#43] +Results [3]: [i_item_id#11, sum#44, isEmpty#45] + +(65) Exchange +Input [3]: [i_item_id#11, sum#44, isEmpty#45] +Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_item_id#11, sum#44, isEmpty#45] +Keys [1]: [i_item_id#11] +Functions [1]: [sum(total_sales#17)] +Aggregate Attributes [1]: [sum(total_sales#17)#46] +Results [2]: [i_item_id#11, sum(total_sales#17)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_item_id#11, total_sales#47] +Arguments: 100, [i_item_id#11 ASC NULLS FIRST, total_sales#47 ASC NULLS FIRST], [i_item_id#11, total_sales#47] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..91b533a488 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.native_iceberg_compat/simplified.txt @@ -0,0 +1,101 @@ +TakeOrderedAndProject [i_item_id,total_sales] + WholeStageCodegen (20) + HashAggregate [i_item_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (19) + HashAggregate [i_item_id,total_sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_addr_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_item_id,i_item_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_item_id] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_id,i_category] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (11) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 + WholeStageCodegen (18) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_datafusion/explain.txt new file mode 100644 index 0000000000..feacd1950b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_datafusion/explain.txt @@ -0,0 +1,405 @@ +== Physical Plan == +* Project (71) ++- * BroadcastNestedLoopJoin Inner BuildRight (70) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (30) + : : : +- * BroadcastHashJoin Inner BuildRight (29) + : : : :- * Project (24) + : : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : : :- * Project (17) + : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- BroadcastExchange (8) + : : : : : : +- * Project (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store (4) + : : : : : +- BroadcastExchange (15) + : : : : : +- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet spark_catalog.default.promotion (11) + : : : : +- BroadcastExchange (22) + : : : : +- * Project (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet spark_catalog.default.date_dim (18) + : : : +- BroadcastExchange (28) + : : : +- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet spark_catalog.default.customer (25) + : : +- BroadcastExchange (35) + : : +- * Project (34) + : : +- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet spark_catalog.default.customer_address (31) + : +- BroadcastExchange (42) + : +- * Project (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet spark_catalog.default.item (38) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Project (59) + : : +- * BroadcastHashJoin Inner BuildRight (58) + : : :- * Project (56) + : : : +- * BroadcastHashJoin Inner BuildRight (55) + : : : :- * Project (53) + : : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : : :- * Filter (50) + : : : : : +- * ColumnarToRow (49) + : : : : : +- Scan parquet spark_catalog.default.store_sales (48) + : : : : +- ReusedExchange (51) + : : : +- ReusedExchange (54) + : : +- ReusedExchange (57) + : +- ReusedExchange (60) + +- ReusedExchange (63) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 7] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_store_sk#3) AND isnotnull(ss_promo_sk#4)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#7, s_gmt_offset#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [s_store_sk#7, s_gmt_offset#8] + +(6) Filter [codegen id : 1] +Input [2]: [s_store_sk#7, s_gmt_offset#8] +Condition : ((isnotnull(s_gmt_offset#8) AND (s_gmt_offset#8 = -5.00)) AND isnotnull(s_store_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [s_store_sk#7] +Input [2]: [s_store_sk#7, s_gmt_offset#8] + +(8) BroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#7] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 7] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, s_store_sk#7] + +(11) Scan parquet spark_catalog.default.promotion +Output [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] + +(13) Filter [codegen id : 2] +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Condition : ((((p_channel_dmail#10 = Y) OR (p_channel_email#11 = Y)) OR (p_channel_tv#12 = Y)) AND isnotnull(p_promo_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [p_promo_sk#9] +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] + +(15) BroadcastExchange +Input [1]: [p_promo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#9] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 7] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, p_promo_sk#9] + +(18) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#13, d_year#14, d_moy#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] + +(20) Filter [codegen id : 3] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Condition : ((((isnotnull(d_year#14) AND isnotnull(d_moy#15)) AND (d_year#14 = 1998)) AND (d_moy#15 = 11)) AND isnotnull(d_date_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [d_date_sk#13] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] + +(22) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 7] +Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6, d_date_sk#13] + +(25) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#16, c_current_addr_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] + +(27) Filter [codegen id : 4] +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Condition : (isnotnull(c_customer_sk#16) AND isnotnull(c_current_addr_sk#17)) + +(28) BroadcastExchange +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(29) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#16] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 7] +Output [3]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, c_customer_sk#16, c_current_addr_sk#17] + +(31) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#18, ca_gmt_offset#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 5] +Input [2]: [ca_address_sk#18, ca_gmt_offset#19] + +(33) Filter [codegen id : 5] +Input [2]: [ca_address_sk#18, ca_gmt_offset#19] +Condition : ((isnotnull(ca_gmt_offset#19) AND (ca_gmt_offset#19 = -5.00)) AND isnotnull(ca_address_sk#18)) + +(34) Project [codegen id : 5] +Output [1]: [ca_address_sk#18] +Input [2]: [ca_address_sk#18, ca_gmt_offset#19] + +(35) BroadcastExchange +Input [1]: [ca_address_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(36) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#18] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 7] +Output [2]: [ss_item_sk#1, ss_ext_sales_price#5] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17, ca_address_sk#18] + +(38) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#20, i_category#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#20, i_category#21] + +(40) Filter [codegen id : 6] +Input [2]: [i_item_sk#20, i_category#21] +Condition : ((isnotnull(i_category#21) AND (i_category#21 = Jewelry )) AND isnotnull(i_item_sk#20)) + +(41) Project [codegen id : 6] +Output [1]: [i_item_sk#20] +Input [2]: [i_item_sk#20, i_category#21] + +(42) BroadcastExchange +Input [1]: [i_item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(43) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#20] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 7] +Output [1]: [ss_ext_sales_price#5] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#5, i_item_sk#20] + +(45) HashAggregate [codegen id : 7] +Input [1]: [ss_ext_sales_price#5] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#22] +Results [1]: [sum#23] + +(46) Exchange +Input [1]: [sum#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(47) HashAggregate [codegen id : 15] +Input [1]: [sum#23] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#24] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#24,17,2) AS promotions#25] + +(48) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#26, ss_customer_sk#27, ss_store_sk#28, ss_ext_sales_price#29, ss_sold_date_sk#30] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#30)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 13] +Input [5]: [ss_item_sk#26, ss_customer_sk#27, ss_store_sk#28, ss_ext_sales_price#29, ss_sold_date_sk#30] + +(50) Filter [codegen id : 13] +Input [5]: [ss_item_sk#26, ss_customer_sk#27, ss_store_sk#28, ss_ext_sales_price#29, ss_sold_date_sk#30] +Condition : ((isnotnull(ss_store_sk#28) AND isnotnull(ss_customer_sk#27)) AND isnotnull(ss_item_sk#26)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [s_store_sk#31] + +(52) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_store_sk#28] +Right keys [1]: [s_store_sk#31] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 13] +Output [4]: [ss_item_sk#26, ss_customer_sk#27, ss_ext_sales_price#29, ss_sold_date_sk#30] +Input [6]: [ss_item_sk#26, ss_customer_sk#27, ss_store_sk#28, ss_ext_sales_price#29, ss_sold_date_sk#30, s_store_sk#31] + +(54) ReusedExchange [Reuses operator id: 22] +Output [1]: [d_date_sk#32] + +(55) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_sold_date_sk#30] +Right keys [1]: [d_date_sk#32] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 13] +Output [3]: [ss_item_sk#26, ss_customer_sk#27, ss_ext_sales_price#29] +Input [5]: [ss_item_sk#26, ss_customer_sk#27, ss_ext_sales_price#29, ss_sold_date_sk#30, d_date_sk#32] + +(57) ReusedExchange [Reuses operator id: 28] +Output [2]: [c_customer_sk#33, c_current_addr_sk#34] + +(58) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_customer_sk#27] +Right keys [1]: [c_customer_sk#33] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 13] +Output [3]: [ss_item_sk#26, ss_ext_sales_price#29, c_current_addr_sk#34] +Input [5]: [ss_item_sk#26, ss_customer_sk#27, ss_ext_sales_price#29, c_customer_sk#33, c_current_addr_sk#34] + +(60) ReusedExchange [Reuses operator id: 35] +Output [1]: [ca_address_sk#35] + +(61) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [c_current_addr_sk#34] +Right keys [1]: [ca_address_sk#35] +Join type: Inner +Join condition: None + +(62) Project [codegen id : 13] +Output [2]: [ss_item_sk#26, ss_ext_sales_price#29] +Input [4]: [ss_item_sk#26, ss_ext_sales_price#29, c_current_addr_sk#34, ca_address_sk#35] + +(63) ReusedExchange [Reuses operator id: 42] +Output [1]: [i_item_sk#36] + +(64) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_item_sk#26] +Right keys [1]: [i_item_sk#36] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 13] +Output [1]: [ss_ext_sales_price#29] +Input [3]: [ss_item_sk#26, ss_ext_sales_price#29, i_item_sk#36] + +(66) HashAggregate [codegen id : 13] +Input [1]: [ss_ext_sales_price#29] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#29))] +Aggregate Attributes [1]: [sum#37] +Results [1]: [sum#38] + +(67) Exchange +Input [1]: [sum#38] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(68) HashAggregate [codegen id : 14] +Input [1]: [sum#38] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#29))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#29))#39] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#29))#39,17,2) AS total#40] + +(69) BroadcastExchange +Input [1]: [total#40] +Arguments: IdentityBroadcastMode, [plan_id=9] + +(70) BroadcastNestedLoopJoin [codegen id : 15] +Join type: Inner +Join condition: None + +(71) Project [codegen id : 15] +Output [3]: [promotions#25, total#40, ((cast(promotions#25 as decimal(15,4)) / cast(total#40 as decimal(15,4))) * 100) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#41] +Input [2]: [promotions#25, total#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_datafusion/simplified.txt new file mode 100644 index 0000000000..86604ede26 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_datafusion/simplified.txt @@ -0,0 +1,103 @@ +WholeStageCodegen (15) + Project [promotions,total] + BroadcastNestedLoopJoin + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [s_store_sk] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_gmt_offset] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [p_promo_sk] + Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [i_item_sk] + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_category] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (14) + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum] + InputAdapter + Exchange #9 + WholeStageCodegen (13) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [s_store_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + ReusedExchange [c_customer_sk,c_current_addr_sk] #5 + InputAdapter + ReusedExchange [ca_address_sk] #6 + InputAdapter + ReusedExchange [i_item_sk] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..feacd1950b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_iceberg_compat/explain.txt @@ -0,0 +1,405 @@ +== Physical Plan == +* Project (71) ++- * BroadcastNestedLoopJoin Inner BuildRight (70) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (30) + : : : +- * BroadcastHashJoin Inner BuildRight (29) + : : : :- * Project (24) + : : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : : :- * Project (17) + : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- BroadcastExchange (8) + : : : : : : +- * Project (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.store (4) + : : : : : +- BroadcastExchange (15) + : : : : : +- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet spark_catalog.default.promotion (11) + : : : : +- BroadcastExchange (22) + : : : : +- * Project (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet spark_catalog.default.date_dim (18) + : : : +- BroadcastExchange (28) + : : : +- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet spark_catalog.default.customer (25) + : : +- BroadcastExchange (35) + : : +- * Project (34) + : : +- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet spark_catalog.default.customer_address (31) + : +- BroadcastExchange (42) + : +- * Project (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet spark_catalog.default.item (38) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Project (59) + : : +- * BroadcastHashJoin Inner BuildRight (58) + : : :- * Project (56) + : : : +- * BroadcastHashJoin Inner BuildRight (55) + : : : :- * Project (53) + : : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : : :- * Filter (50) + : : : : : +- * ColumnarToRow (49) + : : : : : +- Scan parquet spark_catalog.default.store_sales (48) + : : : : +- ReusedExchange (51) + : : : +- ReusedExchange (54) + : : +- ReusedExchange (57) + : +- ReusedExchange (60) + +- ReusedExchange (63) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 7] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_store_sk#3) AND isnotnull(ss_promo_sk#4)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#7, s_gmt_offset#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [s_store_sk#7, s_gmt_offset#8] + +(6) Filter [codegen id : 1] +Input [2]: [s_store_sk#7, s_gmt_offset#8] +Condition : ((isnotnull(s_gmt_offset#8) AND (s_gmt_offset#8 = -5.00)) AND isnotnull(s_store_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [s_store_sk#7] +Input [2]: [s_store_sk#7, s_gmt_offset#8] + +(8) BroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#7] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 7] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, s_store_sk#7] + +(11) Scan parquet spark_catalog.default.promotion +Output [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] + +(13) Filter [codegen id : 2] +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] +Condition : ((((p_channel_dmail#10 = Y) OR (p_channel_email#11 = Y)) OR (p_channel_tv#12 = Y)) AND isnotnull(p_promo_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [p_promo_sk#9] +Input [4]: [p_promo_sk#9, p_channel_dmail#10, p_channel_email#11, p_channel_tv#12] + +(15) BroadcastExchange +Input [1]: [p_promo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#9] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 7] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, p_promo_sk#9] + +(18) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#13, d_year#14, d_moy#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] + +(20) Filter [codegen id : 3] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Condition : ((((isnotnull(d_year#14) AND isnotnull(d_moy#15)) AND (d_year#14 = 1998)) AND (d_moy#15 = 11)) AND isnotnull(d_date_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [d_date_sk#13] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] + +(22) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 7] +Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6, d_date_sk#13] + +(25) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#16, c_current_addr_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] + +(27) Filter [codegen id : 4] +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Condition : (isnotnull(c_customer_sk#16) AND isnotnull(c_current_addr_sk#17)) + +(28) BroadcastExchange +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(29) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#16] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 7] +Output [3]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, c_customer_sk#16, c_current_addr_sk#17] + +(31) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#18, ca_gmt_offset#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 5] +Input [2]: [ca_address_sk#18, ca_gmt_offset#19] + +(33) Filter [codegen id : 5] +Input [2]: [ca_address_sk#18, ca_gmt_offset#19] +Condition : ((isnotnull(ca_gmt_offset#19) AND (ca_gmt_offset#19 = -5.00)) AND isnotnull(ca_address_sk#18)) + +(34) Project [codegen id : 5] +Output [1]: [ca_address_sk#18] +Input [2]: [ca_address_sk#18, ca_gmt_offset#19] + +(35) BroadcastExchange +Input [1]: [ca_address_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(36) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#18] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 7] +Output [2]: [ss_item_sk#1, ss_ext_sales_price#5] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#17, ca_address_sk#18] + +(38) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#20, i_category#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#20, i_category#21] + +(40) Filter [codegen id : 6] +Input [2]: [i_item_sk#20, i_category#21] +Condition : ((isnotnull(i_category#21) AND (i_category#21 = Jewelry )) AND isnotnull(i_item_sk#20)) + +(41) Project [codegen id : 6] +Output [1]: [i_item_sk#20] +Input [2]: [i_item_sk#20, i_category#21] + +(42) BroadcastExchange +Input [1]: [i_item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(43) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#20] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 7] +Output [1]: [ss_ext_sales_price#5] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#5, i_item_sk#20] + +(45) HashAggregate [codegen id : 7] +Input [1]: [ss_ext_sales_price#5] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#22] +Results [1]: [sum#23] + +(46) Exchange +Input [1]: [sum#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(47) HashAggregate [codegen id : 15] +Input [1]: [sum#23] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#24] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#24,17,2) AS promotions#25] + +(48) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#26, ss_customer_sk#27, ss_store_sk#28, ss_ext_sales_price#29, ss_sold_date_sk#30] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#30)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 13] +Input [5]: [ss_item_sk#26, ss_customer_sk#27, ss_store_sk#28, ss_ext_sales_price#29, ss_sold_date_sk#30] + +(50) Filter [codegen id : 13] +Input [5]: [ss_item_sk#26, ss_customer_sk#27, ss_store_sk#28, ss_ext_sales_price#29, ss_sold_date_sk#30] +Condition : ((isnotnull(ss_store_sk#28) AND isnotnull(ss_customer_sk#27)) AND isnotnull(ss_item_sk#26)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [s_store_sk#31] + +(52) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_store_sk#28] +Right keys [1]: [s_store_sk#31] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 13] +Output [4]: [ss_item_sk#26, ss_customer_sk#27, ss_ext_sales_price#29, ss_sold_date_sk#30] +Input [6]: [ss_item_sk#26, ss_customer_sk#27, ss_store_sk#28, ss_ext_sales_price#29, ss_sold_date_sk#30, s_store_sk#31] + +(54) ReusedExchange [Reuses operator id: 22] +Output [1]: [d_date_sk#32] + +(55) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_sold_date_sk#30] +Right keys [1]: [d_date_sk#32] +Join type: Inner +Join condition: None + +(56) Project [codegen id : 13] +Output [3]: [ss_item_sk#26, ss_customer_sk#27, ss_ext_sales_price#29] +Input [5]: [ss_item_sk#26, ss_customer_sk#27, ss_ext_sales_price#29, ss_sold_date_sk#30, d_date_sk#32] + +(57) ReusedExchange [Reuses operator id: 28] +Output [2]: [c_customer_sk#33, c_current_addr_sk#34] + +(58) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_customer_sk#27] +Right keys [1]: [c_customer_sk#33] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 13] +Output [3]: [ss_item_sk#26, ss_ext_sales_price#29, c_current_addr_sk#34] +Input [5]: [ss_item_sk#26, ss_customer_sk#27, ss_ext_sales_price#29, c_customer_sk#33, c_current_addr_sk#34] + +(60) ReusedExchange [Reuses operator id: 35] +Output [1]: [ca_address_sk#35] + +(61) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [c_current_addr_sk#34] +Right keys [1]: [ca_address_sk#35] +Join type: Inner +Join condition: None + +(62) Project [codegen id : 13] +Output [2]: [ss_item_sk#26, ss_ext_sales_price#29] +Input [4]: [ss_item_sk#26, ss_ext_sales_price#29, c_current_addr_sk#34, ca_address_sk#35] + +(63) ReusedExchange [Reuses operator id: 42] +Output [1]: [i_item_sk#36] + +(64) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_item_sk#26] +Right keys [1]: [i_item_sk#36] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 13] +Output [1]: [ss_ext_sales_price#29] +Input [3]: [ss_item_sk#26, ss_ext_sales_price#29, i_item_sk#36] + +(66) HashAggregate [codegen id : 13] +Input [1]: [ss_ext_sales_price#29] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#29))] +Aggregate Attributes [1]: [sum#37] +Results [1]: [sum#38] + +(67) Exchange +Input [1]: [sum#38] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(68) HashAggregate [codegen id : 14] +Input [1]: [sum#38] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#29))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#29))#39] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#29))#39,17,2) AS total#40] + +(69) BroadcastExchange +Input [1]: [total#40] +Arguments: IdentityBroadcastMode, [plan_id=9] + +(70) BroadcastNestedLoopJoin [codegen id : 15] +Join type: Inner +Join condition: None + +(71) Project [codegen id : 15] +Output [3]: [promotions#25, total#40, ((cast(promotions#25 as decimal(15,4)) / cast(total#40 as decimal(15,4))) * 100) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#41] +Input [2]: [promotions#25, total#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..86604ede26 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.native_iceberg_compat/simplified.txt @@ -0,0 +1,103 @@ +WholeStageCodegen (15) + Project [promotions,total] + BroadcastNestedLoopJoin + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [s_store_sk] + Filter [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_gmt_offset] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [p_promo_sk] + Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [i_item_sk] + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_category] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (14) + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum] + InputAdapter + Exchange #9 + WholeStageCodegen (13) + HashAggregate [ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [s_store_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + ReusedExchange [c_customer_sk,c_current_addr_sk] #5 + InputAdapter + ReusedExchange [ca_address_sk] #6 + InputAdapter + ReusedExchange [i_item_sk] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_datafusion/explain.txt new file mode 100644 index 0000000000..59701e2d06 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_datafusion/explain.txt @@ -0,0 +1,187 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.warehouse (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.ship_mode (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.web_site (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet spark_catalog.default.date_dim (22) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_ship_mode_sk#3)) AND isnotnull(ws_web_site_sk#2)) AND isnotnull(ws_ship_date_sk#1)) + +(4) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_warehouse_sk#4] +Right keys [1]: [w_warehouse_sk#6] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7] +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(10) Scan parquet spark_catalog.default.ship_mode +Output [2]: [sm_ship_mode_sk#8, sm_type#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] + +(12) Filter [codegen id : 2] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Condition : isnotnull(sm_ship_mode_sk#8) + +(13) BroadcastExchange +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_mode_sk#3] +Right keys [1]: [sm_ship_mode_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 5] +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] + +(16) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#10, web_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [web_site_sk#10, web_name#11] + +(18) Filter [codegen id : 3] +Input [2]: [web_site_sk#10, web_name#11] +Condition : isnotnull(web_site_sk#10) + +(19) BroadcastExchange +Input [2]: [web_site_sk#10, web_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_web_site_sk#2] +Right keys [1]: [web_site_sk#10] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 5] +Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11] +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_site_sk#10, web_name#11] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_month_seq#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#12, d_month_seq#13] + +(24) Filter [codegen id : 4] +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#12] +Input [2]: [d_date_sk#12, d_month_seq#13] + +(26) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] +Input [6]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11, d_date_sk#12] + +(29) HashAggregate [codegen id : 5] +Input [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] +Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] + +(30) Exchange +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, web_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(31) HashAggregate [codegen id : 6] +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] +Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29] +Results [8]: [_groupingexpression#14 AS substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25 AS 30 days #31, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26 AS 31 - 60 days #32, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27 AS 61 - 90 days #33, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28 AS 91 - 120 days #34, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29 AS >120 days #35] + +(32) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#30 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, web_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_datafusion/simplified.txt new file mode 100644 index 0000000000..197db771f3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_datafusion/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (6) + HashAggregate [_groupingexpression,sm_type,web_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + InputAdapter + Exchange [_groupingexpression,sm_type,web_name] #1 + WholeStageCodegen (5) + HashAggregate [_groupingexpression,sm_type,web_name,ws_ship_date_sk,ws_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ws_ship_date_sk,ws_sold_date_sk,sm_type,web_name,w_warehouse_name] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + Project [ws_ship_date_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_name] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_ship_date_sk,ws_web_site_sk,ws_sold_date_sk,w_warehouse_name,sm_type] + BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] + Project [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,w_warehouse_name] + BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] + Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..59701e2d06 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_iceberg_compat/explain.txt @@ -0,0 +1,187 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.warehouse (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.ship_mode (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.web_site (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet spark_catalog.default.date_dim (22) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5] +Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_ship_mode_sk#3)) AND isnotnull(ws_web_site_sk#2)) AND isnotnull(ws_ship_date_sk#1)) + +(4) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_warehouse_sk#4] +Right keys [1]: [w_warehouse_sk#6] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7] +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(10) Scan parquet spark_catalog.default.ship_mode +Output [2]: [sm_ship_mode_sk#8, sm_type#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] + +(12) Filter [codegen id : 2] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Condition : isnotnull(sm_ship_mode_sk#8) + +(13) BroadcastExchange +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_mode_sk#3] +Right keys [1]: [sm_ship_mode_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 5] +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] + +(16) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#10, web_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [web_site_sk#10, web_name#11] + +(18) Filter [codegen id : 3] +Input [2]: [web_site_sk#10, web_name#11] +Condition : isnotnull(web_site_sk#10) + +(19) BroadcastExchange +Input [2]: [web_site_sk#10, web_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_web_site_sk#2] +Right keys [1]: [web_site_sk#10] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 5] +Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11] +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_site_sk#10, web_name#11] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_month_seq#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#12, d_month_seq#13] + +(24) Filter [codegen id : 4] +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#12] +Input [2]: [d_date_sk#12, d_month_seq#13] + +(26) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] +Input [6]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11, d_date_sk#12] + +(29) HashAggregate [codegen id : 5] +Input [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] +Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] + +(30) Exchange +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, web_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(31) HashAggregate [codegen id : 6] +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] +Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29] +Results [8]: [_groupingexpression#14 AS substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25 AS 30 days #31, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26 AS 31 - 60 days #32, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27 AS 61 - 90 days #33, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28 AS 91 - 120 days #34, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29 AS >120 days #35] + +(32) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#30 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, web_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..197db771f3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.native_iceberg_compat/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (6) + HashAggregate [_groupingexpression,sm_type,web_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + InputAdapter + Exchange [_groupingexpression,sm_type,web_name] #1 + WholeStageCodegen (5) + HashAggregate [_groupingexpression,sm_type,web_name,ws_ship_date_sk,ws_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ws_ship_date_sk,ws_sold_date_sk,sm_type,web_name,w_warehouse_name] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + Project [ws_ship_date_sk,ws_sold_date_sk,w_warehouse_name,sm_type,web_name] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_ship_date_sk,ws_web_site_sk,ws_sold_date_sk,w_warehouse_name,sm_type] + BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] + Project [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,w_warehouse_name] + BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] + Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_datafusion/explain.txt new file mode 100644 index 0000000000..f3769cf4aa --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_datafusion/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.item (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet spark_catalog.default.store_sales (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.date_dim (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet spark_catalog.default.store (18) + + +(1) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(And(In(i_category, [Books ,Children ,Electronics ]),In(i_class, [personal ,portable ,refernece ,self-help ])),In(i_brand, [exportiunivamalg #6 ,scholaramalgamalg #7 ,scholaramalgamalg #8 ,scholaramalgamalg #6 ])),And(And(In(i_category, [Men ,Music ,Women ]),In(i_class, [accessories ,classical ,fragrances ,pants ])),In(i_brand, [amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Condition : ((((i_category#4 IN (Books ,Children ,Electronics ) AND i_class#3 IN (personal ,portable ,refernece ,self-help )) AND i_brand#2 IN (scholaramalgamalg #7 ,scholaramalgamalg #8 ,exportiunivamalg #6 ,scholaramalgamalg #6 )) OR ((i_category#4 IN (Women ,Music ,Men ) AND i_class#3 IN (accessories ,classical ,fragrances ,pants )) AND i_brand#2 IN (amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [2]: [i_item_sk#1, i_manager_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(5) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#13)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(7) Filter [codegen id : 1] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) + +(8) BroadcastExchange +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#10] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(11) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_month_seq, [1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Condition : (d_month_seq#15 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#14, d_moy#16] +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] + +(15) BroadcastExchange +Input [2]: [d_date_sk#14, d_moy#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#13] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16] +Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_moy#16] + +(18) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [s_store_sk#17] + +(20) Filter [codegen id : 3] +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) + +(21) BroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#11] +Right keys [1]: [s_store_sk#17] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16, s_store_sk#17] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Keys [2]: [i_manager_id#5, d_moy#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manager_id#5, d_moy#16, sum#19] + +(25) Exchange +Input [3]: [i_manager_id#5, d_moy#16, sum#19] +Arguments: hashpartitioning(i_manager_id#5, d_moy#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manager_id#5, d_moy#16, sum#19] +Keys [2]: [i_manager_id#5, d_moy#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] + +(27) Exchange +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(28) Sort [codegen id : 6] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_manager_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] +Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_monthly_sales#23)) / avg_monthly_sales#23) > 0.1000000000000000) ELSE false END + +(31) Project [codegen id : 7] +Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] + +(32) TakeOrderedAndProject +Input [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST], [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_datafusion/simplified.txt new file mode 100644 index 0000000000..57a82302ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_datafusion/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] + WholeStageCodegen (7) + Project [i_manager_id,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen (6) + Sort [i_manager_id] + InputAdapter + Exchange [i_manager_id] #1 + WholeStageCodegen (5) + HashAggregate [i_manager_id,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_manager_id,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_manager_id,d_moy,ss_sales_price] [sum,sum] + Project [i_manager_id,ss_sales_price,d_moy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_manager_id,ss_store_sk,ss_sales_price,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_manager_id,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manager_id] + Filter [i_category,i_class,i_brand,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_moy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f3769cf4aa --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_iceberg_compat/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.item (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet spark_catalog.default.store_sales (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.date_dim (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet spark_catalog.default.store (18) + + +(1) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(And(In(i_category, [Books ,Children ,Electronics ]),In(i_class, [personal ,portable ,refernece ,self-help ])),In(i_brand, [exportiunivamalg #6 ,scholaramalgamalg #7 ,scholaramalgamalg #8 ,scholaramalgamalg #6 ])),And(And(In(i_category, [Men ,Music ,Women ]),In(i_class, [accessories ,classical ,fragrances ,pants ])),In(i_brand, [amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Condition : ((((i_category#4 IN (Books ,Children ,Electronics ) AND i_class#3 IN (personal ,portable ,refernece ,self-help )) AND i_brand#2 IN (scholaramalgamalg #7 ,scholaramalgamalg #8 ,exportiunivamalg #6 ,scholaramalgamalg #6 )) OR ((i_category#4 IN (Women ,Music ,Men ) AND i_class#3 IN (accessories ,classical ,fragrances ,pants )) AND i_brand#2 IN (amalgimporto #9 ,edu packscholar #9 ,exportiimporto #9 ,importoamalg #9 ))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [2]: [i_item_sk#1, i_manager_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(5) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#13)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(7) Filter [codegen id : 1] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) + +(8) BroadcastExchange +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#10] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] + +(11) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_month_seq, [1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] +Condition : (d_month_seq#15 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#14, d_moy#16] +Input [3]: [d_date_sk#14, d_month_seq#15, d_moy#16] + +(15) BroadcastExchange +Input [2]: [d_date_sk#14, d_moy#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#13] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16] +Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#14, d_moy#16] + +(18) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [s_store_sk#17] + +(20) Filter [codegen id : 3] +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) + +(21) BroadcastExchange +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#11] +Right keys [1]: [s_store_sk#17] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16, s_store_sk#17] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Keys [2]: [i_manager_id#5, d_moy#16] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manager_id#5, d_moy#16, sum#19] + +(25) Exchange +Input [3]: [i_manager_id#5, d_moy#16, sum#19] +Arguments: hashpartitioning(i_manager_id#5, d_moy#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manager_id#5, d_moy#16, sum#19] +Keys [2]: [i_manager_id#5, d_moy#16] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] + +(27) Exchange +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(28) Sort [codegen id : 6] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_manager_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] +Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_monthly_sales#23)) / avg_monthly_sales#23) > 0.1000000000000000) ELSE false END + +(31) Project [codegen id : 7] +Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] + +(32) TakeOrderedAndProject +Input [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST], [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..57a82302ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.native_iceberg_compat/simplified.txt @@ -0,0 +1,49 @@ +TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] + WholeStageCodegen (7) + Project [i_manager_id,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen (6) + Sort [i_manager_id] + InputAdapter + Exchange [i_manager_id] #1 + WholeStageCodegen (5) + HashAggregate [i_manager_id,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_manager_id,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_manager_id,d_moy,ss_sales_price] [sum,sum] + Project [i_manager_id,ss_sales_price,d_moy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_manager_id,ss_store_sk,ss_sales_price,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_manager_id,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manager_id] + Filter [i_category,i_class,i_brand,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_moy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_datafusion/explain.txt new file mode 100644 index 0000000000..9dce3ef15b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_datafusion/explain.txt @@ -0,0 +1,999 @@ +== Physical Plan == +* Sort (179) ++- Exchange (178) + +- * Project (177) + +- * SortMergeJoin Inner (176) + :- * Sort (114) + : +- Exchange (113) + : +- * HashAggregate (112) + : +- * HashAggregate (111) + : +- * Project (110) + : +- * BroadcastHashJoin Inner BuildRight (109) + : :- * Project (103) + : : +- * BroadcastHashJoin Inner BuildRight (102) + : : :- * Project (100) + : : : +- * BroadcastHashJoin Inner BuildRight (99) + : : : :- * Project (94) + : : : : +- * BroadcastHashJoin Inner BuildRight (93) + : : : : :- * Project (91) + : : : : : +- * BroadcastHashJoin Inner BuildRight (90) + : : : : : :- * Project (85) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (84) + : : : : : : :- * Project (82) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (81) + : : : : : : : :- * Project (76) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (75) + : : : : : : : : :- * Project (70) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (69) + : : : : : : : : : :- * Project (67) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (66) + : : : : : : : : : : :- * Project (61) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : : : : : : : : : :- * Project (58) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (57) + : : : : : : : : : : : : :- * Project (52) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (51) + : : : : : : : : : : : : : :- * Project (46) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (45) + : : : : : : : : : : : : : : :- * Project (40) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (39) + : : : : : : : : : : : : : : : :- * Project (34) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (33) + : : : : : : : : : : : : : : : : :- * Sort (12) + : : : : : : : : : : : : : : : : : +- Exchange (11) + : : : : : : : : : : : : : : : : : +- * Project (10) + : : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : : : : : : : : : : : : : : :- BroadcastExchange (4) + : : : : : : : : : : : : : : : : : : +- * Filter (3) + : : : : : : : : : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : : : : : : : : : : : : +- * Project (8) + : : : : : : : : : : : : : : : : : +- * Filter (7) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (6) + : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store_returns (5) + : : : : : : : : : : : : : : : : +- * Sort (32) + : : : : : : : : : : : : : : : : +- * Project (31) + : : : : : : : : : : : : : : : : +- * Filter (30) + : : : : : : : : : : : : : : : : +- * HashAggregate (29) + : : : : : : : : : : : : : : : : +- Exchange (28) + : : : : : : : : : : : : : : : : +- * HashAggregate (27) + : : : : : : : : : : : : : : : : +- * Project (26) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (25) + : : : : : : : : : : : : : : : : :- * Sort (18) + : : : : : : : : : : : : : : : : : +- Exchange (17) + : : : : : : : : : : : : : : : : : +- * Project (16) + : : : : : : : : : : : : : : : : : +- * Filter (15) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : : : : : : : : : : : +- * Sort (24) + : : : : : : : : : : : : : : : : +- Exchange (23) + : : : : : : : : : : : : : : : : +- * Project (22) + : : : : : : : : : : : : : : : : +- * Filter (21) + : : : : : : : : : : : : : : : : +- * ColumnarToRow (20) + : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_returns (19) + : : : : : : : : : : : : : : : +- BroadcastExchange (38) + : : : : : : : : : : : : : : : +- * Filter (37) + : : : : : : : : : : : : : : : +- * ColumnarToRow (36) + : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (35) + : : : : : : : : : : : : : : +- BroadcastExchange (44) + : : : : : : : : : : : : : : +- * Filter (43) + : : : : : : : : : : : : : : +- * ColumnarToRow (42) + : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store (41) + : : : : : : : : : : : : : +- BroadcastExchange (50) + : : : : : : : : : : : : : +- * Filter (49) + : : : : : : : : : : : : : +- * ColumnarToRow (48) + : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.customer (47) + : : : : : : : : : : : : +- BroadcastExchange (56) + : : : : : : : : : : : : +- * Filter (55) + : : : : : : : : : : : : +- * ColumnarToRow (54) + : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (53) + : : : : : : : : : : : +- ReusedExchange (59) + : : : : : : : : : : +- BroadcastExchange (65) + : : : : : : : : : : +- * Filter (64) + : : : : : : : : : : +- * ColumnarToRow (63) + : : : : : : : : : : +- Scan parquet spark_catalog.default.customer_demographics (62) + : : : : : : : : : +- ReusedExchange (68) + : : : : : : : : +- BroadcastExchange (74) + : : : : : : : : +- * Filter (73) + : : : : : : : : +- * ColumnarToRow (72) + : : : : : : : : +- Scan parquet spark_catalog.default.promotion (71) + : : : : : : : +- BroadcastExchange (80) + : : : : : : : +- * Filter (79) + : : : : : : : +- * ColumnarToRow (78) + : : : : : : : +- Scan parquet spark_catalog.default.household_demographics (77) + : : : : : : +- ReusedExchange (83) + : : : : : +- BroadcastExchange (89) + : : : : : +- * Filter (88) + : : : : : +- * ColumnarToRow (87) + : : : : : +- Scan parquet spark_catalog.default.customer_address (86) + : : : : +- ReusedExchange (92) + : : : +- BroadcastExchange (98) + : : : +- * Filter (97) + : : : +- * ColumnarToRow (96) + : : : +- Scan parquet spark_catalog.default.income_band (95) + : : +- ReusedExchange (101) + : +- BroadcastExchange (108) + : +- * Project (107) + : +- * Filter (106) + : +- * ColumnarToRow (105) + : +- Scan parquet spark_catalog.default.item (104) + +- * Sort (175) + +- Exchange (174) + +- * HashAggregate (173) + +- * HashAggregate (172) + +- * Project (171) + +- * BroadcastHashJoin Inner BuildRight (170) + :- * Project (168) + : +- * BroadcastHashJoin Inner BuildRight (167) + : :- * Project (165) + : : +- * BroadcastHashJoin Inner BuildRight (164) + : : :- * Project (162) + : : : +- * BroadcastHashJoin Inner BuildRight (161) + : : : :- * Project (159) + : : : : +- * BroadcastHashJoin Inner BuildRight (158) + : : : : :- * Project (156) + : : : : : +- * BroadcastHashJoin Inner BuildRight (155) + : : : : : :- * Project (153) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (152) + : : : : : : :- * Project (150) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (149) + : : : : : : : :- * Project (147) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (146) + : : : : : : : : :- * Project (144) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (143) + : : : : : : : : : :- * Project (141) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (140) + : : : : : : : : : : :- * Project (138) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (137) + : : : : : : : : : : : :- * Project (135) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (134) + : : : : : : : : : : : : :- * Project (132) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (131) + : : : : : : : : : : : : : :- * Project (129) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (128) + : : : : : : : : : : : : : : :- * Project (123) + : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (122) + : : : : : : : : : : : : : : : :- * Sort (116) + : : : : : : : : : : : : : : : : +- ReusedExchange (115) + : : : : : : : : : : : : : : : +- * Sort (121) + : : : : : : : : : : : : : : : +- * Project (120) + : : : : : : : : : : : : : : : +- * Filter (119) + : : : : : : : : : : : : : : : +- * HashAggregate (118) + : : : : : : : : : : : : : : : +- ReusedExchange (117) + : : : : : : : : : : : : : : +- BroadcastExchange (127) + : : : : : : : : : : : : : : +- * Filter (126) + : : : : : : : : : : : : : : +- * ColumnarToRow (125) + : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (124) + : : : : : : : : : : : : : +- ReusedExchange (130) + : : : : : : : : : : : : +- ReusedExchange (133) + : : : : : : : : : : : +- ReusedExchange (136) + : : : : : : : : : : +- ReusedExchange (139) + : : : : : : : : : +- ReusedExchange (142) + : : : : : : : : +- ReusedExchange (145) + : : : : : : : +- ReusedExchange (148) + : : : : : : +- ReusedExchange (151) + : : : : : +- ReusedExchange (154) + : : : : +- ReusedExchange (157) + : : : +- ReusedExchange (160) + : : +- ReusedExchange (163) + : +- ReusedExchange (166) + +- ReusedExchange (169) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(3) Filter [codegen id : 1] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) + +(4) BroadcastExchange +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [plan_id=1] + +(5) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(6) ColumnarToRow +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(7) Filter +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(8) Project +Output [2]: [sr_item_sk#13, sr_ticket_number#14] +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [ss_item_sk#1, ss_ticket_number#8] +Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 2] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#13, sr_ticket_number#14] + +(11) Exchange +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 3] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(15) Filter [codegen id : 4] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(16) Project [codegen id : 4] +Output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(17) Exchange +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: hashpartitioning(cs_item_sk#16, cs_order_number#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 5] +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: [cs_item_sk#16 ASC NULLS FIRST, cs_order_number#17 ASC NULLS FIRST], false, 0 + +(19) Scan parquet spark_catalog.default.catalog_returns +Output [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(21) Filter [codegen id : 6] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Condition : (isnotnull(cr_item_sk#20) AND isnotnull(cr_order_number#21)) + +(22) Project [codegen id : 6] +Output [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(23) Exchange +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: hashpartitioning(cr_item_sk#20, cr_order_number#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) Sort [codegen id : 7] +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cr_item_sk#20 ASC NULLS FIRST, cr_order_number#21 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin [codegen id : 8] +Left keys [2]: [cs_item_sk#16, cs_order_number#17] +Right keys [2]: [cr_item_sk#20, cr_order_number#21] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 8] +Output [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(27) HashAggregate [codegen id : 8] +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] +Aggregate Attributes [3]: [sum#26, sum#27, isEmpty#28] +Results [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] + +(28) Exchange +Input [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] +Arguments: hashpartitioning(cs_item_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) HashAggregate [codegen id : 9] +Input [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#18))#32, sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))#33] +Results [3]: [cs_item_sk#16, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#18))#32,17,2) AS sale#34, sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))#33 AS refund#35] + +(30) Filter [codegen id : 9] +Input [3]: [cs_item_sk#16, sale#34, refund#35] +Condition : ((isnotnull(sale#34) AND isnotnull(refund#35)) AND (cast(sale#34 as decimal(21,2)) > (2 * refund#35))) + +(31) Project [codegen id : 9] +Output [1]: [cs_item_sk#16] +Input [3]: [cs_item_sk#16, sale#34, refund#35] + +(32) Sort [codegen id : 9] +Input [1]: [cs_item_sk#16] +Arguments: [cs_item_sk#16 ASC NULLS FIRST], false, 0 + +(33) SortMergeJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [cs_item_sk#16] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 25] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#16] + +(35) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#36, d_year#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [2]: [d_date_sk#36, d_year#37] + +(37) Filter [codegen id : 10] +Input [2]: [d_date_sk#36, d_year#37] +Condition : ((isnotnull(d_year#37) AND (d_year#37 = 1999)) AND isnotnull(d_date_sk#36)) + +(38) BroadcastExchange +Input [2]: [d_date_sk#36, d_year#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#12] +Right keys [1]: [d_date_sk#36] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 25] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#36, d_year#37] + +(41) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 11] +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] + +(43) Filter [codegen id : 11] +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Condition : ((isnotnull(s_store_sk#38) AND isnotnull(s_store_name#39)) AND isnotnull(s_zip#40)) + +(44) BroadcastExchange +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(45) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#38] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 25] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_sk#38, s_store_name#39, s_zip#40] + +(47) Scan parquet spark_catalog.default.customer +Output [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 12] +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(49) Filter [codegen id : 12] +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Condition : (((((isnotnull(c_customer_sk#41) AND isnotnull(c_first_sales_date_sk#46)) AND isnotnull(c_first_shipto_date_sk#45)) AND isnotnull(c_current_cdemo_sk#42)) AND isnotnull(c_current_hdemo_sk#43)) AND isnotnull(c_current_addr_sk#44)) + +(50) BroadcastExchange +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(51) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#41] +Join type: Inner +Join condition: None + +(52) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(53) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#47, d_year#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 13] +Input [2]: [d_date_sk#47, d_year#48] + +(55) Filter [codegen id : 13] +Input [2]: [d_date_sk#47, d_year#48] +Condition : isnotnull(d_date_sk#47) + +(56) BroadcastExchange +Input [2]: [d_date_sk#47, d_year#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(57) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_first_sales_date_sk#46] +Right keys [1]: [d_date_sk#47] +Join type: Inner +Join condition: None + +(58) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, d_year#48] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46, d_date_sk#47, d_year#48] + +(59) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#49, d_year#50] + +(60) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_first_shipto_date_sk#45] +Right keys [1]: [d_date_sk#49] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, d_year#48, d_date_sk#49, d_year#50] + +(62) Scan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#51, cd_marital_status#52] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 15] +Input [2]: [cd_demo_sk#51, cd_marital_status#52] + +(64) Filter [codegen id : 15] +Input [2]: [cd_demo_sk#51, cd_marital_status#52] +Condition : (isnotnull(cd_demo_sk#51) AND isnotnull(cd_marital_status#52)) + +(65) BroadcastExchange +Input [2]: [cd_demo_sk#51, cd_marital_status#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(66) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#51] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_marital_status#52] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_demo_sk#51, cd_marital_status#52] + +(68) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#53, cd_marital_status#54] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_cdemo_sk#42] +Right keys [1]: [cd_demo_sk#53] +Join type: Inner +Join condition: NOT (cd_marital_status#52 = cd_marital_status#54) + +(70) Project [codegen id : 25] +Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_marital_status#52, cd_demo_sk#53, cd_marital_status#54] + +(71) Scan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#55] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 17] +Input [1]: [p_promo_sk#55] + +(73) Filter [codegen id : 17] +Input [1]: [p_promo_sk#55] +Condition : isnotnull(p_promo_sk#55) + +(74) BroadcastExchange +Input [1]: [p_promo_sk#55] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=11] + +(75) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_promo_sk#7] +Right keys [1]: [p_promo_sk#55] +Join type: Inner +Join condition: None + +(76) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, p_promo_sk#55] + +(77) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(78) ColumnarToRow [codegen id : 18] +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] + +(79) Filter [codegen id : 18] +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Condition : (isnotnull(hd_demo_sk#56) AND isnotnull(hd_income_band_sk#57)) + +(80) BroadcastExchange +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(81) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_hdemo_sk#4] +Right keys [1]: [hd_demo_sk#56] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_demo_sk#56, hd_income_band_sk#57] + +(83) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#58, hd_income_band_sk#59] + +(84) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_hdemo_sk#43] +Right keys [1]: [hd_demo_sk#58] +Join type: Inner +Join condition: None + +(85) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59] +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_demo_sk#58, hd_income_band_sk#59] + +(86) Scan parquet spark_catalog.default.customer_address +Output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(87) ColumnarToRow [codegen id : 20] +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(88) Filter [codegen id : 20] +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Condition : isnotnull(ca_address_sk#60) + +(89) BroadcastExchange +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] + +(90) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_addr_sk#5] +Right keys [1]: [ca_address_sk#60] +Join type: Inner +Join condition: None + +(91) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(92) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#65, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] + +(93) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_addr_sk#44] +Right keys [1]: [ca_address_sk#65] +Join type: Inner +Join condition: None + +(94) Project [codegen id : 25] +Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_address_sk#65, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] + +(95) Scan parquet spark_catalog.default.income_band +Output [1]: [ib_income_band_sk#70] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 22] +Input [1]: [ib_income_band_sk#70] + +(97) Filter [codegen id : 22] +Input [1]: [ib_income_band_sk#70] +Condition : isnotnull(ib_income_band_sk#70) + +(98) BroadcastExchange +Input [1]: [ib_income_band_sk#70] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] + +(99) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [hd_income_band_sk#57] +Right keys [1]: [ib_income_band_sk#70] +Join type: Inner +Join condition: None + +(100) Project [codegen id : 25] +Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, ib_income_band_sk#70] + +(101) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#71] + +(102) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [hd_income_band_sk#59] +Right keys [1]: [ib_income_band_sk#71] +Join type: Inner +Join condition: None + +(103) Project [codegen id : 25] +Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, ib_income_band_sk#71] + +(104) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood ,floral ,indian ,medium ,purple ,spring ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(105) ColumnarToRow [codegen id : 24] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] + +(106) Filter [codegen id : 24] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Condition : ((((((isnotnull(i_current_price#73) AND i_color#74 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#73 >= 64.00)) AND (i_current_price#73 <= 74.00)) AND (i_current_price#73 >= 65.00)) AND (i_current_price#73 <= 79.00)) AND isnotnull(i_item_sk#72)) + +(107) Project [codegen id : 24] +Output [2]: [i_item_sk#72, i_product_name#75] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] + +(108) BroadcastExchange +Input [2]: [i_item_sk#72, i_product_name#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15] + +(109) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#72] +Join type: Inner +Join condition: None + +(110) Project [codegen id : 25] +Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, d_year#48, d_year#50, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] + +(111) HashAggregate [codegen id : 25] +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, d_year#48, d_year#50, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] +Keys [15]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] +Aggregate Attributes [4]: [count#76, sum#77, sum#78, sum#79] +Results [19]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50, count#80, sum#81, sum#82, sum#83] + +(112) HashAggregate [codegen id : 25] +Input [19]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50, count#80, sum#81, sum#82, sum#83] +Keys [15]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] +Aggregate Attributes [4]: [count(1)#84, sum(UnscaledValue(ss_wholesale_cost#9))#85, sum(UnscaledValue(ss_list_price#10))#86, sum(UnscaledValue(ss_coupon_amt#11))#87] +Results [17]: [i_product_name#75 AS product_name#88, i_item_sk#72 AS item_sk#89, s_store_name#39 AS store_name#90, s_zip#40 AS store_zip#91, ca_street_number#61 AS b_street_number#92, ca_street_name#62 AS b_streen_name#93, ca_city#63 AS b_city#94, ca_zip#64 AS b_zip#95, ca_street_number#66 AS c_street_number#96, ca_street_name#67 AS c_street_name#97, ca_city#68 AS c_city#98, ca_zip#69 AS c_zip#99, d_year#37 AS syear#100, count(1)#84 AS cnt#101, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#85,17,2) AS s1#102, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#86,17,2) AS s2#103, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#87,17,2) AS s3#104] + +(113) Exchange +Input [17]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104] +Arguments: hashpartitioning(item_sk#89, store_name#90, store_zip#91, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(114) Sort [codegen id : 26] +Input [17]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104] +Arguments: [item_sk#89 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, store_zip#91 ASC NULLS FIRST], false, 0 + +(115) ReusedExchange [Reuses operator id: 11] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] + +(116) Sort [codegen id : 29] +Input [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] +Arguments: [ss_item_sk#105 ASC NULLS FIRST], false, 0 + +(117) ReusedExchange [Reuses operator id: 28] +Output [4]: [cs_item_sk#116, sum#117, sum#118, isEmpty#119] + +(118) HashAggregate [codegen id : 35] +Input [4]: [cs_item_sk#116, sum#117, sum#118, isEmpty#119] +Keys [1]: [cs_item_sk#116] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#120)), sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#120))#32, sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))#33] +Results [3]: [cs_item_sk#116, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#120))#32,17,2) AS sale#34, sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))#33 AS refund#35] + +(119) Filter [codegen id : 35] +Input [3]: [cs_item_sk#116, sale#34, refund#35] +Condition : ((isnotnull(sale#34) AND isnotnull(refund#35)) AND (cast(sale#34 as decimal(21,2)) > (2 * refund#35))) + +(120) Project [codegen id : 35] +Output [1]: [cs_item_sk#116] +Input [3]: [cs_item_sk#116, sale#34, refund#35] + +(121) Sort [codegen id : 35] +Input [1]: [cs_item_sk#116] +Arguments: [cs_item_sk#116 ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin [codegen id : 51] +Left keys [1]: [ss_item_sk#105] +Right keys [1]: [cs_item_sk#116] +Join type: Inner +Join condition: None + +(123) Project [codegen id : 51] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] +Input [12]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115, cs_item_sk#116] + +(124) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#124, d_year#125] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(125) ColumnarToRow [codegen id : 36] +Input [2]: [d_date_sk#124, d_year#125] + +(126) Filter [codegen id : 36] +Input [2]: [d_date_sk#124, d_year#125] +Condition : ((isnotnull(d_year#125) AND (d_year#125 = 2000)) AND isnotnull(d_date_sk#124)) + +(127) BroadcastExchange +Input [2]: [d_date_sk#124, d_year#125] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=17] + +(128) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_sold_date_sk#115] +Right keys [1]: [d_date_sk#124] +Join type: Inner +Join condition: None + +(129) Project [codegen id : 51] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125] +Input [13]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115, d_date_sk#124, d_year#125] + +(130) ReusedExchange [Reuses operator id: 44] +Output [3]: [s_store_sk#126, s_store_name#127, s_zip#128] + +(131) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_store_sk#110] +Right keys [1]: [s_store_sk#126] +Join type: Inner +Join condition: None + +(132) Project [codegen id : 51] +Output [12]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128] +Input [14]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_sk#126, s_store_name#127, s_zip#128] + +(133) ReusedExchange [Reuses operator id: 50] +Output [6]: [c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] + +(134) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_customer_sk#106] +Right keys [1]: [c_customer_sk#129] +Join type: Inner +Join condition: None + +(135) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] +Input [18]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] + +(136) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#135, d_year#136] + +(137) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_first_sales_date_sk#134] +Right keys [1]: [d_date_sk#135] +Join type: Inner +Join condition: None + +(138) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134, d_date_sk#135, d_year#136] + +(139) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#137, d_year#138] + +(140) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_first_shipto_date_sk#133] +Right keys [1]: [d_date_sk#137] +Join type: Inner +Join condition: None + +(141) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136, d_date_sk#137, d_year#138] + +(142) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#139, cd_marital_status#140] + +(143) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_cdemo_sk#107] +Right keys [1]: [cd_demo_sk#139] +Join type: Inner +Join condition: None + +(144) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_demo_sk#139, cd_marital_status#140] + +(145) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#141, cd_marital_status#142] + +(146) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_cdemo_sk#130] +Right keys [1]: [cd_demo_sk#141] +Join type: Inner +Join condition: NOT (cd_marital_status#140 = cd_marital_status#142) + +(147) Project [codegen id : 51] +Output [14]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [18]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140, cd_demo_sk#141, cd_marital_status#142] + +(148) ReusedExchange [Reuses operator id: 74] +Output [1]: [p_promo_sk#143] + +(149) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_promo_sk#111] +Right keys [1]: [p_promo_sk#143] +Join type: Inner +Join condition: None + +(150) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [15]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, p_promo_sk#143] + +(151) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#144, hd_income_band_sk#145] + +(152) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_hdemo_sk#108] +Right keys [1]: [hd_demo_sk#144] +Join type: Inner +Join condition: None + +(153) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145] +Input [15]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_demo_sk#144, hd_income_band_sk#145] + +(154) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#146, hd_income_band_sk#147] + +(155) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_hdemo_sk#131] +Right keys [1]: [hd_demo_sk#146] +Join type: Inner +Join condition: None + +(156) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147] +Input [15]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_demo_sk#146, hd_income_band_sk#147] + +(157) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] + +(158) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_addr_sk#109] +Right keys [1]: [ca_address_sk#148] +Join type: Inner +Join condition: None + +(159) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] +Input [18]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] + +(160) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] + +(161) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_addr_sk#132] +Right keys [1]: [ca_address_sk#153] +Join type: Inner +Join condition: None + +(162) Project [codegen id : 51] +Output [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [21]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] + +(163) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#158] + +(164) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [hd_income_band_sk#145] +Right keys [1]: [ib_income_band_sk#158] +Join type: Inner +Join condition: None + +(165) Project [codegen id : 51] +Output [18]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [20]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#158] + +(166) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#159] + +(167) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [hd_income_band_sk#147] +Right keys [1]: [ib_income_band_sk#159] +Join type: Inner +Join condition: None + +(168) Project [codegen id : 51] +Output [17]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#159] + +(169) ReusedExchange [Reuses operator id: 108] +Output [2]: [i_item_sk#160, i_product_name#161] + +(170) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_item_sk#105] +Right keys [1]: [i_item_sk#160] +Join type: Inner +Join condition: None + +(171) Project [codegen id : 51] +Output [18]: [ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] +Input [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] + +(172) HashAggregate [codegen id : 51] +Input [18]: [ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] +Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#112)), partial_sum(UnscaledValue(ss_list_price#113)), partial_sum(UnscaledValue(ss_coupon_amt#114))] +Aggregate Attributes [4]: [count#76, sum#162, sum#163, sum#164] +Results [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#80, sum#165, sum#166, sum#167] + +(173) HashAggregate [codegen id : 51] +Input [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#80, sum#165, sum#166, sum#167] +Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#112)), sum(UnscaledValue(ss_list_price#113)), sum(UnscaledValue(ss_coupon_amt#114))] +Aggregate Attributes [4]: [count(1)#84, sum(UnscaledValue(ss_wholesale_cost#112))#85, sum(UnscaledValue(ss_list_price#113))#86, sum(UnscaledValue(ss_coupon_amt#114))#87] +Results [8]: [i_item_sk#160 AS item_sk#168, s_store_name#127 AS store_name#169, s_zip#128 AS store_zip#170, d_year#125 AS syear#171, count(1)#84 AS cnt#172, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#112))#85,17,2) AS s1#173, MakeDecimal(sum(UnscaledValue(ss_list_price#113))#86,17,2) AS s2#174, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#114))#87,17,2) AS s3#175] + +(174) Exchange +Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] +Arguments: hashpartitioning(item_sk#168, store_name#169, store_zip#170, 5), ENSURE_REQUIREMENTS, [plan_id=18] + +(175) Sort [codegen id : 52] +Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] +Arguments: [item_sk#168 ASC NULLS FIRST, store_name#169 ASC NULLS FIRST, store_zip#170 ASC NULLS FIRST], false, 0 + +(176) SortMergeJoin [codegen id : 53] +Left keys [3]: [item_sk#89, store_name#90, store_zip#91] +Right keys [3]: [item_sk#168, store_name#169, store_zip#170] +Join type: Inner +Join condition: (cnt#172 <= cnt#101) + +(177) Project [codegen id : 53] +Output [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Input [25]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] + +(178) Exchange +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Arguments: rangepartitioning(product_name#88 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=19] + +(179) Sort [codegen id : 54] +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Arguments: [product_name#88 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_datafusion/simplified.txt new file mode 100644 index 0000000000..26d89524e8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_datafusion/simplified.txt @@ -0,0 +1,270 @@ +WholeStageCodegen (54) + Sort [product_name,store_name,cnt] + InputAdapter + Exchange [product_name,store_name,cnt] #1 + WholeStageCodegen (53) + Project [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + SortMergeJoin [item_sk,store_name,store_zip,item_sk,store_name,store_zip,cnt,cnt] + InputAdapter + WholeStageCodegen (26) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #2 + WholeStageCodegen (25) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + SortMergeJoin [ss_item_sk,cs_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #3 + WholeStageCodegen (2) + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + WholeStageCodegen (9) + Sort [cs_item_sk] + Project [cs_item_sk] + Filter [sale,refund] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (8) + HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty] + Project [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (5) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #6 + WholeStageCodegen (4) + Project [cs_item_sk,cs_order_number,cs_ext_list_price] + Filter [cs_item_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (7) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #7 + WholeStageCodegen (6) + Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (11) + Filter [s_store_sk,s_store_name,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (12) + Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (13) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (15) + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (17) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (18) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (20) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (22) + Filter [ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.income_band [ib_income_band_sk] + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (24) + Project [i_item_sk,i_product_name] + Filter [i_current_price,i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color,i_product_name] + InputAdapter + WholeStageCodegen (52) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #18 + WholeStageCodegen (51) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + SortMergeJoin [ss_item_sk,cs_item_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #3 + InputAdapter + WholeStageCodegen (35) + Sort [cs_item_sk] + Project [cs_item_sk] + Filter [sale,refund] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty] + InputAdapter + ReusedExchange [cs_item_sk,sum,sum,isEmpty] #5 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (36) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_zip] #9 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + ReusedExchange [p_promo_sk] #13 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #17 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..9dce3ef15b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_iceberg_compat/explain.txt @@ -0,0 +1,999 @@ +== Physical Plan == +* Sort (179) ++- Exchange (178) + +- * Project (177) + +- * SortMergeJoin Inner (176) + :- * Sort (114) + : +- Exchange (113) + : +- * HashAggregate (112) + : +- * HashAggregate (111) + : +- * Project (110) + : +- * BroadcastHashJoin Inner BuildRight (109) + : :- * Project (103) + : : +- * BroadcastHashJoin Inner BuildRight (102) + : : :- * Project (100) + : : : +- * BroadcastHashJoin Inner BuildRight (99) + : : : :- * Project (94) + : : : : +- * BroadcastHashJoin Inner BuildRight (93) + : : : : :- * Project (91) + : : : : : +- * BroadcastHashJoin Inner BuildRight (90) + : : : : : :- * Project (85) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (84) + : : : : : : :- * Project (82) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (81) + : : : : : : : :- * Project (76) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (75) + : : : : : : : : :- * Project (70) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (69) + : : : : : : : : : :- * Project (67) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (66) + : : : : : : : : : : :- * Project (61) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : : : : : : : : : :- * Project (58) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (57) + : : : : : : : : : : : : :- * Project (52) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (51) + : : : : : : : : : : : : : :- * Project (46) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (45) + : : : : : : : : : : : : : : :- * Project (40) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (39) + : : : : : : : : : : : : : : : :- * Project (34) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (33) + : : : : : : : : : : : : : : : : :- * Sort (12) + : : : : : : : : : : : : : : : : : +- Exchange (11) + : : : : : : : : : : : : : : : : : +- * Project (10) + : : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : : : : : : : : : : : : : : :- BroadcastExchange (4) + : : : : : : : : : : : : : : : : : : +- * Filter (3) + : : : : : : : : : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : : : : : : : : : : : : +- * Project (8) + : : : : : : : : : : : : : : : : : +- * Filter (7) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (6) + : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store_returns (5) + : : : : : : : : : : : : : : : : +- * Sort (32) + : : : : : : : : : : : : : : : : +- * Project (31) + : : : : : : : : : : : : : : : : +- * Filter (30) + : : : : : : : : : : : : : : : : +- * HashAggregate (29) + : : : : : : : : : : : : : : : : +- Exchange (28) + : : : : : : : : : : : : : : : : +- * HashAggregate (27) + : : : : : : : : : : : : : : : : +- * Project (26) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (25) + : : : : : : : : : : : : : : : : :- * Sort (18) + : : : : : : : : : : : : : : : : : +- Exchange (17) + : : : : : : : : : : : : : : : : : +- * Project (16) + : : : : : : : : : : : : : : : : : +- * Filter (15) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : : : : : : : : : : : +- * Sort (24) + : : : : : : : : : : : : : : : : +- Exchange (23) + : : : : : : : : : : : : : : : : +- * Project (22) + : : : : : : : : : : : : : : : : +- * Filter (21) + : : : : : : : : : : : : : : : : +- * ColumnarToRow (20) + : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_returns (19) + : : : : : : : : : : : : : : : +- BroadcastExchange (38) + : : : : : : : : : : : : : : : +- * Filter (37) + : : : : : : : : : : : : : : : +- * ColumnarToRow (36) + : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (35) + : : : : : : : : : : : : : : +- BroadcastExchange (44) + : : : : : : : : : : : : : : +- * Filter (43) + : : : : : : : : : : : : : : +- * ColumnarToRow (42) + : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store (41) + : : : : : : : : : : : : : +- BroadcastExchange (50) + : : : : : : : : : : : : : +- * Filter (49) + : : : : : : : : : : : : : +- * ColumnarToRow (48) + : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.customer (47) + : : : : : : : : : : : : +- BroadcastExchange (56) + : : : : : : : : : : : : +- * Filter (55) + : : : : : : : : : : : : +- * ColumnarToRow (54) + : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (53) + : : : : : : : : : : : +- ReusedExchange (59) + : : : : : : : : : : +- BroadcastExchange (65) + : : : : : : : : : : +- * Filter (64) + : : : : : : : : : : +- * ColumnarToRow (63) + : : : : : : : : : : +- Scan parquet spark_catalog.default.customer_demographics (62) + : : : : : : : : : +- ReusedExchange (68) + : : : : : : : : +- BroadcastExchange (74) + : : : : : : : : +- * Filter (73) + : : : : : : : : +- * ColumnarToRow (72) + : : : : : : : : +- Scan parquet spark_catalog.default.promotion (71) + : : : : : : : +- BroadcastExchange (80) + : : : : : : : +- * Filter (79) + : : : : : : : +- * ColumnarToRow (78) + : : : : : : : +- Scan parquet spark_catalog.default.household_demographics (77) + : : : : : : +- ReusedExchange (83) + : : : : : +- BroadcastExchange (89) + : : : : : +- * Filter (88) + : : : : : +- * ColumnarToRow (87) + : : : : : +- Scan parquet spark_catalog.default.customer_address (86) + : : : : +- ReusedExchange (92) + : : : +- BroadcastExchange (98) + : : : +- * Filter (97) + : : : +- * ColumnarToRow (96) + : : : +- Scan parquet spark_catalog.default.income_band (95) + : : +- ReusedExchange (101) + : +- BroadcastExchange (108) + : +- * Project (107) + : +- * Filter (106) + : +- * ColumnarToRow (105) + : +- Scan parquet spark_catalog.default.item (104) + +- * Sort (175) + +- Exchange (174) + +- * HashAggregate (173) + +- * HashAggregate (172) + +- * Project (171) + +- * BroadcastHashJoin Inner BuildRight (170) + :- * Project (168) + : +- * BroadcastHashJoin Inner BuildRight (167) + : :- * Project (165) + : : +- * BroadcastHashJoin Inner BuildRight (164) + : : :- * Project (162) + : : : +- * BroadcastHashJoin Inner BuildRight (161) + : : : :- * Project (159) + : : : : +- * BroadcastHashJoin Inner BuildRight (158) + : : : : :- * Project (156) + : : : : : +- * BroadcastHashJoin Inner BuildRight (155) + : : : : : :- * Project (153) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (152) + : : : : : : :- * Project (150) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (149) + : : : : : : : :- * Project (147) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (146) + : : : : : : : : :- * Project (144) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (143) + : : : : : : : : : :- * Project (141) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (140) + : : : : : : : : : : :- * Project (138) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (137) + : : : : : : : : : : : :- * Project (135) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (134) + : : : : : : : : : : : : :- * Project (132) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (131) + : : : : : : : : : : : : : :- * Project (129) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (128) + : : : : : : : : : : : : : : :- * Project (123) + : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (122) + : : : : : : : : : : : : : : : :- * Sort (116) + : : : : : : : : : : : : : : : : +- ReusedExchange (115) + : : : : : : : : : : : : : : : +- * Sort (121) + : : : : : : : : : : : : : : : +- * Project (120) + : : : : : : : : : : : : : : : +- * Filter (119) + : : : : : : : : : : : : : : : +- * HashAggregate (118) + : : : : : : : : : : : : : : : +- ReusedExchange (117) + : : : : : : : : : : : : : : +- BroadcastExchange (127) + : : : : : : : : : : : : : : +- * Filter (126) + : : : : : : : : : : : : : : +- * ColumnarToRow (125) + : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (124) + : : : : : : : : : : : : : +- ReusedExchange (130) + : : : : : : : : : : : : +- ReusedExchange (133) + : : : : : : : : : : : +- ReusedExchange (136) + : : : : : : : : : : +- ReusedExchange (139) + : : : : : : : : : +- ReusedExchange (142) + : : : : : : : : +- ReusedExchange (145) + : : : : : : : +- ReusedExchange (148) + : : : : : : +- ReusedExchange (151) + : : : : : +- ReusedExchange (154) + : : : : +- ReusedExchange (157) + : : : +- ReusedExchange (160) + : : +- ReusedExchange (163) + : +- ReusedExchange (166) + +- ReusedExchange (169) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(3) Filter [codegen id : 1] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) + +(4) BroadcastExchange +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [plan_id=1] + +(5) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(6) ColumnarToRow +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(7) Filter +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(8) Project +Output [2]: [sr_item_sk#13, sr_ticket_number#14] +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [ss_item_sk#1, ss_ticket_number#8] +Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 2] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#13, sr_ticket_number#14] + +(11) Exchange +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 3] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(15) Filter [codegen id : 4] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(16) Project [codegen id : 4] +Output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(17) Exchange +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: hashpartitioning(cs_item_sk#16, cs_order_number#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 5] +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: [cs_item_sk#16 ASC NULLS FIRST, cs_order_number#17 ASC NULLS FIRST], false, 0 + +(19) Scan parquet spark_catalog.default.catalog_returns +Output [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(21) Filter [codegen id : 6] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Condition : (isnotnull(cr_item_sk#20) AND isnotnull(cr_order_number#21)) + +(22) Project [codegen id : 6] +Output [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(23) Exchange +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: hashpartitioning(cr_item_sk#20, cr_order_number#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) Sort [codegen id : 7] +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cr_item_sk#20 ASC NULLS FIRST, cr_order_number#21 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin [codegen id : 8] +Left keys [2]: [cs_item_sk#16, cs_order_number#17] +Right keys [2]: [cr_item_sk#20, cr_order_number#21] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 8] +Output [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(27) HashAggregate [codegen id : 8] +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] +Aggregate Attributes [3]: [sum#26, sum#27, isEmpty#28] +Results [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] + +(28) Exchange +Input [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] +Arguments: hashpartitioning(cs_item_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) HashAggregate [codegen id : 9] +Input [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#18))#32, sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))#33] +Results [3]: [cs_item_sk#16, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#18))#32,17,2) AS sale#34, sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))#33 AS refund#35] + +(30) Filter [codegen id : 9] +Input [3]: [cs_item_sk#16, sale#34, refund#35] +Condition : ((isnotnull(sale#34) AND isnotnull(refund#35)) AND (cast(sale#34 as decimal(21,2)) > (2 * refund#35))) + +(31) Project [codegen id : 9] +Output [1]: [cs_item_sk#16] +Input [3]: [cs_item_sk#16, sale#34, refund#35] + +(32) Sort [codegen id : 9] +Input [1]: [cs_item_sk#16] +Arguments: [cs_item_sk#16 ASC NULLS FIRST], false, 0 + +(33) SortMergeJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [cs_item_sk#16] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 25] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#16] + +(35) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#36, d_year#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [2]: [d_date_sk#36, d_year#37] + +(37) Filter [codegen id : 10] +Input [2]: [d_date_sk#36, d_year#37] +Condition : ((isnotnull(d_year#37) AND (d_year#37 = 1999)) AND isnotnull(d_date_sk#36)) + +(38) BroadcastExchange +Input [2]: [d_date_sk#36, d_year#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#12] +Right keys [1]: [d_date_sk#36] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 25] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#36, d_year#37] + +(41) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 11] +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] + +(43) Filter [codegen id : 11] +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Condition : ((isnotnull(s_store_sk#38) AND isnotnull(s_store_name#39)) AND isnotnull(s_zip#40)) + +(44) BroadcastExchange +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(45) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#38] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 25] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_sk#38, s_store_name#39, s_zip#40] + +(47) Scan parquet spark_catalog.default.customer +Output [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 12] +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(49) Filter [codegen id : 12] +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Condition : (((((isnotnull(c_customer_sk#41) AND isnotnull(c_first_sales_date_sk#46)) AND isnotnull(c_first_shipto_date_sk#45)) AND isnotnull(c_current_cdemo_sk#42)) AND isnotnull(c_current_hdemo_sk#43)) AND isnotnull(c_current_addr_sk#44)) + +(50) BroadcastExchange +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(51) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#41] +Join type: Inner +Join condition: None + +(52) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(53) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#47, d_year#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 13] +Input [2]: [d_date_sk#47, d_year#48] + +(55) Filter [codegen id : 13] +Input [2]: [d_date_sk#47, d_year#48] +Condition : isnotnull(d_date_sk#47) + +(56) BroadcastExchange +Input [2]: [d_date_sk#47, d_year#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(57) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_first_sales_date_sk#46] +Right keys [1]: [d_date_sk#47] +Join type: Inner +Join condition: None + +(58) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, d_year#48] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46, d_date_sk#47, d_year#48] + +(59) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#49, d_year#50] + +(60) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_first_shipto_date_sk#45] +Right keys [1]: [d_date_sk#49] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, d_year#48, d_date_sk#49, d_year#50] + +(62) Scan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#51, cd_marital_status#52] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 15] +Input [2]: [cd_demo_sk#51, cd_marital_status#52] + +(64) Filter [codegen id : 15] +Input [2]: [cd_demo_sk#51, cd_marital_status#52] +Condition : (isnotnull(cd_demo_sk#51) AND isnotnull(cd_marital_status#52)) + +(65) BroadcastExchange +Input [2]: [cd_demo_sk#51, cd_marital_status#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(66) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#51] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_marital_status#52] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_demo_sk#51, cd_marital_status#52] + +(68) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#53, cd_marital_status#54] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_cdemo_sk#42] +Right keys [1]: [cd_demo_sk#53] +Join type: Inner +Join condition: NOT (cd_marital_status#52 = cd_marital_status#54) + +(70) Project [codegen id : 25] +Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_marital_status#52, cd_demo_sk#53, cd_marital_status#54] + +(71) Scan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#55] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 17] +Input [1]: [p_promo_sk#55] + +(73) Filter [codegen id : 17] +Input [1]: [p_promo_sk#55] +Condition : isnotnull(p_promo_sk#55) + +(74) BroadcastExchange +Input [1]: [p_promo_sk#55] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=11] + +(75) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_promo_sk#7] +Right keys [1]: [p_promo_sk#55] +Join type: Inner +Join condition: None + +(76) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, p_promo_sk#55] + +(77) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(78) ColumnarToRow [codegen id : 18] +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] + +(79) Filter [codegen id : 18] +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Condition : (isnotnull(hd_demo_sk#56) AND isnotnull(hd_income_band_sk#57)) + +(80) BroadcastExchange +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(81) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_hdemo_sk#4] +Right keys [1]: [hd_demo_sk#56] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_demo_sk#56, hd_income_band_sk#57] + +(83) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#58, hd_income_band_sk#59] + +(84) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_hdemo_sk#43] +Right keys [1]: [hd_demo_sk#58] +Join type: Inner +Join condition: None + +(85) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59] +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_demo_sk#58, hd_income_band_sk#59] + +(86) Scan parquet spark_catalog.default.customer_address +Output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(87) ColumnarToRow [codegen id : 20] +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(88) Filter [codegen id : 20] +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Condition : isnotnull(ca_address_sk#60) + +(89) BroadcastExchange +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] + +(90) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_addr_sk#5] +Right keys [1]: [ca_address_sk#60] +Join type: Inner +Join condition: None + +(91) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(92) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#65, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] + +(93) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_addr_sk#44] +Right keys [1]: [ca_address_sk#65] +Join type: Inner +Join condition: None + +(94) Project [codegen id : 25] +Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_address_sk#65, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] + +(95) Scan parquet spark_catalog.default.income_band +Output [1]: [ib_income_band_sk#70] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 22] +Input [1]: [ib_income_band_sk#70] + +(97) Filter [codegen id : 22] +Input [1]: [ib_income_band_sk#70] +Condition : isnotnull(ib_income_band_sk#70) + +(98) BroadcastExchange +Input [1]: [ib_income_band_sk#70] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] + +(99) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [hd_income_band_sk#57] +Right keys [1]: [ib_income_band_sk#70] +Join type: Inner +Join condition: None + +(100) Project [codegen id : 25] +Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, ib_income_band_sk#70] + +(101) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#71] + +(102) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [hd_income_band_sk#59] +Right keys [1]: [ib_income_band_sk#71] +Join type: Inner +Join condition: None + +(103) Project [codegen id : 25] +Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, ib_income_band_sk#71] + +(104) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood ,floral ,indian ,medium ,purple ,spring ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(105) ColumnarToRow [codegen id : 24] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] + +(106) Filter [codegen id : 24] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Condition : ((((((isnotnull(i_current_price#73) AND i_color#74 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#73 >= 64.00)) AND (i_current_price#73 <= 74.00)) AND (i_current_price#73 >= 65.00)) AND (i_current_price#73 <= 79.00)) AND isnotnull(i_item_sk#72)) + +(107) Project [codegen id : 24] +Output [2]: [i_item_sk#72, i_product_name#75] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] + +(108) BroadcastExchange +Input [2]: [i_item_sk#72, i_product_name#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15] + +(109) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#72] +Join type: Inner +Join condition: None + +(110) Project [codegen id : 25] +Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, d_year#48, d_year#50, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] + +(111) HashAggregate [codegen id : 25] +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, d_year#48, d_year#50, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] +Keys [15]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] +Aggregate Attributes [4]: [count#76, sum#77, sum#78, sum#79] +Results [19]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50, count#80, sum#81, sum#82, sum#83] + +(112) HashAggregate [codegen id : 25] +Input [19]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50, count#80, sum#81, sum#82, sum#83] +Keys [15]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] +Aggregate Attributes [4]: [count(1)#84, sum(UnscaledValue(ss_wholesale_cost#9))#85, sum(UnscaledValue(ss_list_price#10))#86, sum(UnscaledValue(ss_coupon_amt#11))#87] +Results [17]: [i_product_name#75 AS product_name#88, i_item_sk#72 AS item_sk#89, s_store_name#39 AS store_name#90, s_zip#40 AS store_zip#91, ca_street_number#61 AS b_street_number#92, ca_street_name#62 AS b_streen_name#93, ca_city#63 AS b_city#94, ca_zip#64 AS b_zip#95, ca_street_number#66 AS c_street_number#96, ca_street_name#67 AS c_street_name#97, ca_city#68 AS c_city#98, ca_zip#69 AS c_zip#99, d_year#37 AS syear#100, count(1)#84 AS cnt#101, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#85,17,2) AS s1#102, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#86,17,2) AS s2#103, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#87,17,2) AS s3#104] + +(113) Exchange +Input [17]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104] +Arguments: hashpartitioning(item_sk#89, store_name#90, store_zip#91, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(114) Sort [codegen id : 26] +Input [17]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104] +Arguments: [item_sk#89 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, store_zip#91 ASC NULLS FIRST], false, 0 + +(115) ReusedExchange [Reuses operator id: 11] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] + +(116) Sort [codegen id : 29] +Input [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] +Arguments: [ss_item_sk#105 ASC NULLS FIRST], false, 0 + +(117) ReusedExchange [Reuses operator id: 28] +Output [4]: [cs_item_sk#116, sum#117, sum#118, isEmpty#119] + +(118) HashAggregate [codegen id : 35] +Input [4]: [cs_item_sk#116, sum#117, sum#118, isEmpty#119] +Keys [1]: [cs_item_sk#116] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#120)), sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#120))#32, sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))#33] +Results [3]: [cs_item_sk#116, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#120))#32,17,2) AS sale#34, sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))#33 AS refund#35] + +(119) Filter [codegen id : 35] +Input [3]: [cs_item_sk#116, sale#34, refund#35] +Condition : ((isnotnull(sale#34) AND isnotnull(refund#35)) AND (cast(sale#34 as decimal(21,2)) > (2 * refund#35))) + +(120) Project [codegen id : 35] +Output [1]: [cs_item_sk#116] +Input [3]: [cs_item_sk#116, sale#34, refund#35] + +(121) Sort [codegen id : 35] +Input [1]: [cs_item_sk#116] +Arguments: [cs_item_sk#116 ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin [codegen id : 51] +Left keys [1]: [ss_item_sk#105] +Right keys [1]: [cs_item_sk#116] +Join type: Inner +Join condition: None + +(123) Project [codegen id : 51] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] +Input [12]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115, cs_item_sk#116] + +(124) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#124, d_year#125] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(125) ColumnarToRow [codegen id : 36] +Input [2]: [d_date_sk#124, d_year#125] + +(126) Filter [codegen id : 36] +Input [2]: [d_date_sk#124, d_year#125] +Condition : ((isnotnull(d_year#125) AND (d_year#125 = 2000)) AND isnotnull(d_date_sk#124)) + +(127) BroadcastExchange +Input [2]: [d_date_sk#124, d_year#125] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=17] + +(128) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_sold_date_sk#115] +Right keys [1]: [d_date_sk#124] +Join type: Inner +Join condition: None + +(129) Project [codegen id : 51] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125] +Input [13]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115, d_date_sk#124, d_year#125] + +(130) ReusedExchange [Reuses operator id: 44] +Output [3]: [s_store_sk#126, s_store_name#127, s_zip#128] + +(131) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_store_sk#110] +Right keys [1]: [s_store_sk#126] +Join type: Inner +Join condition: None + +(132) Project [codegen id : 51] +Output [12]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128] +Input [14]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_sk#126, s_store_name#127, s_zip#128] + +(133) ReusedExchange [Reuses operator id: 50] +Output [6]: [c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] + +(134) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_customer_sk#106] +Right keys [1]: [c_customer_sk#129] +Join type: Inner +Join condition: None + +(135) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] +Input [18]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] + +(136) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#135, d_year#136] + +(137) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_first_sales_date_sk#134] +Right keys [1]: [d_date_sk#135] +Join type: Inner +Join condition: None + +(138) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134, d_date_sk#135, d_year#136] + +(139) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#137, d_year#138] + +(140) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_first_shipto_date_sk#133] +Right keys [1]: [d_date_sk#137] +Join type: Inner +Join condition: None + +(141) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136, d_date_sk#137, d_year#138] + +(142) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#139, cd_marital_status#140] + +(143) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_cdemo_sk#107] +Right keys [1]: [cd_demo_sk#139] +Join type: Inner +Join condition: None + +(144) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_demo_sk#139, cd_marital_status#140] + +(145) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#141, cd_marital_status#142] + +(146) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_cdemo_sk#130] +Right keys [1]: [cd_demo_sk#141] +Join type: Inner +Join condition: NOT (cd_marital_status#140 = cd_marital_status#142) + +(147) Project [codegen id : 51] +Output [14]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [18]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140, cd_demo_sk#141, cd_marital_status#142] + +(148) ReusedExchange [Reuses operator id: 74] +Output [1]: [p_promo_sk#143] + +(149) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_promo_sk#111] +Right keys [1]: [p_promo_sk#143] +Join type: Inner +Join condition: None + +(150) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [15]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, p_promo_sk#143] + +(151) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#144, hd_income_band_sk#145] + +(152) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_hdemo_sk#108] +Right keys [1]: [hd_demo_sk#144] +Join type: Inner +Join condition: None + +(153) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145] +Input [15]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_demo_sk#144, hd_income_band_sk#145] + +(154) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#146, hd_income_band_sk#147] + +(155) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_hdemo_sk#131] +Right keys [1]: [hd_demo_sk#146] +Join type: Inner +Join condition: None + +(156) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147] +Input [15]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_demo_sk#146, hd_income_band_sk#147] + +(157) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] + +(158) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_addr_sk#109] +Right keys [1]: [ca_address_sk#148] +Join type: Inner +Join condition: None + +(159) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] +Input [18]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] + +(160) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] + +(161) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_addr_sk#132] +Right keys [1]: [ca_address_sk#153] +Join type: Inner +Join condition: None + +(162) Project [codegen id : 51] +Output [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [21]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] + +(163) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#158] + +(164) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [hd_income_band_sk#145] +Right keys [1]: [ib_income_band_sk#158] +Join type: Inner +Join condition: None + +(165) Project [codegen id : 51] +Output [18]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [20]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#158] + +(166) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#159] + +(167) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [hd_income_band_sk#147] +Right keys [1]: [ib_income_band_sk#159] +Join type: Inner +Join condition: None + +(168) Project [codegen id : 51] +Output [17]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#159] + +(169) ReusedExchange [Reuses operator id: 108] +Output [2]: [i_item_sk#160, i_product_name#161] + +(170) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_item_sk#105] +Right keys [1]: [i_item_sk#160] +Join type: Inner +Join condition: None + +(171) Project [codegen id : 51] +Output [18]: [ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] +Input [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] + +(172) HashAggregate [codegen id : 51] +Input [18]: [ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] +Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#112)), partial_sum(UnscaledValue(ss_list_price#113)), partial_sum(UnscaledValue(ss_coupon_amt#114))] +Aggregate Attributes [4]: [count#76, sum#162, sum#163, sum#164] +Results [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#80, sum#165, sum#166, sum#167] + +(173) HashAggregate [codegen id : 51] +Input [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#80, sum#165, sum#166, sum#167] +Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#112)), sum(UnscaledValue(ss_list_price#113)), sum(UnscaledValue(ss_coupon_amt#114))] +Aggregate Attributes [4]: [count(1)#84, sum(UnscaledValue(ss_wholesale_cost#112))#85, sum(UnscaledValue(ss_list_price#113))#86, sum(UnscaledValue(ss_coupon_amt#114))#87] +Results [8]: [i_item_sk#160 AS item_sk#168, s_store_name#127 AS store_name#169, s_zip#128 AS store_zip#170, d_year#125 AS syear#171, count(1)#84 AS cnt#172, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#112))#85,17,2) AS s1#173, MakeDecimal(sum(UnscaledValue(ss_list_price#113))#86,17,2) AS s2#174, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#114))#87,17,2) AS s3#175] + +(174) Exchange +Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] +Arguments: hashpartitioning(item_sk#168, store_name#169, store_zip#170, 5), ENSURE_REQUIREMENTS, [plan_id=18] + +(175) Sort [codegen id : 52] +Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] +Arguments: [item_sk#168 ASC NULLS FIRST, store_name#169 ASC NULLS FIRST, store_zip#170 ASC NULLS FIRST], false, 0 + +(176) SortMergeJoin [codegen id : 53] +Left keys [3]: [item_sk#89, store_name#90, store_zip#91] +Right keys [3]: [item_sk#168, store_name#169, store_zip#170] +Join type: Inner +Join condition: (cnt#172 <= cnt#101) + +(177) Project [codegen id : 53] +Output [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Input [25]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] + +(178) Exchange +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Arguments: rangepartitioning(product_name#88 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=19] + +(179) Sort [codegen id : 54] +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Arguments: [product_name#88 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..26d89524e8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.native_iceberg_compat/simplified.txt @@ -0,0 +1,270 @@ +WholeStageCodegen (54) + Sort [product_name,store_name,cnt] + InputAdapter + Exchange [product_name,store_name,cnt] #1 + WholeStageCodegen (53) + Project [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + SortMergeJoin [item_sk,store_name,store_zip,item_sk,store_name,store_zip,cnt,cnt] + InputAdapter + WholeStageCodegen (26) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #2 + WholeStageCodegen (25) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + SortMergeJoin [ss_item_sk,cs_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #3 + WholeStageCodegen (2) + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + WholeStageCodegen (9) + Sort [cs_item_sk] + Project [cs_item_sk] + Filter [sale,refund] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (8) + HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty] + Project [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (5) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #6 + WholeStageCodegen (4) + Project [cs_item_sk,cs_order_number,cs_ext_list_price] + Filter [cs_item_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (7) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #7 + WholeStageCodegen (6) + Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (11) + Filter [s_store_sk,s_store_name,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (12) + Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (13) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (15) + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (17) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (18) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (20) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (22) + Filter [ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.income_band [ib_income_band_sk] + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (24) + Project [i_item_sk,i_product_name] + Filter [i_current_price,i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color,i_product_name] + InputAdapter + WholeStageCodegen (52) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #18 + WholeStageCodegen (51) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + SortMergeJoin [ss_item_sk,cs_item_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #3 + InputAdapter + WholeStageCodegen (35) + Sort [cs_item_sk] + Project [cs_item_sk] + Filter [sale,refund] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty] + InputAdapter + ReusedExchange [cs_item_sk,sum,sum,isEmpty] #5 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (36) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_zip] #9 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + ReusedExchange [p_promo_sk] #13 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #17 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_datafusion/explain.txt new file mode 100644 index 0000000000..0da8f232f0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_datafusion/explain.txt @@ -0,0 +1,257 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (26) + : +- * BroadcastHashJoin Inner BuildRight (25) + : :- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store (1) + : : +- BroadcastExchange (18) + : : +- * Filter (17) + : : +- * HashAggregate (16) + : : +- Exchange (15) + : : +- * HashAggregate (14) + : : +- * Project (13) + : : +- * BroadcastHashJoin Inner BuildRight (12) + : : :- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : +- BroadcastExchange (11) + : : +- * Project (10) + : : +- * Filter (9) + : : +- * ColumnarToRow (8) + : : +- Scan parquet spark_catalog.default.date_dim (7) + : +- BroadcastExchange (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet spark_catalog.default.item (21) + +- BroadcastExchange (40) + +- * Filter (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet spark_catalog.default.store_sales (27) + +- ReusedExchange (30) + + +(1) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#1, s_store_name#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [2]: [s_store_sk#1, s_store_name#2] + +(3) Filter [codegen id : 9] +Input [2]: [s_store_sk#1, s_store_name#2] +Condition : isnotnull(s_store_sk#1) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(6) Filter [codegen id : 2] +Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : (isnotnull(ss_store_sk#4) AND isnotnull(ss_item_sk#3)) + +(7) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#7, d_month_seq#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(9) Filter [codegen id : 1] +Input [2]: [d_date_sk#7, d_month_seq#8] +Condition : (((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1176)) AND (d_month_seq#8 <= 1187)) AND isnotnull(d_date_sk#7)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(11) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#7] +Join type: Inner +Join condition: None + +(13) Project [codegen id : 2] +Output [3]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] +Input [5]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6, d_date_sk#7] + +(14) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] +Keys [2]: [ss_store_sk#4, ss_item_sk#3] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#5))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] + +(15) Exchange +Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] +Arguments: hashpartitioning(ss_store_sk#4, ss_item_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(16) HashAggregate [codegen id : 3] +Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] +Keys [2]: [ss_store_sk#4, ss_item_sk#3] +Functions [1]: [sum(UnscaledValue(ss_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#5))#11] +Results [3]: [ss_store_sk#4, ss_item_sk#3, MakeDecimal(sum(UnscaledValue(ss_sales_price#5))#11,17,2) AS revenue#12] + +(17) Filter [codegen id : 3] +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#12] +Condition : isnotnull(revenue#12) + +(18) BroadcastExchange +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [s_store_sk#1] +Right keys [1]: [ss_store_sk#4] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 9] +Output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12] +Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12] + +(21) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] + +(23) Filter [codegen id : 4] +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Condition : isnotnull(i_item_sk#13) + +(24) BroadcastExchange +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#3] +Right keys [1]: [i_item_sk#13] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 9] +Output [7]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12, i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] + +(27) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#21)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 6] +Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] + +(29) Filter [codegen id : 6] +Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] +Condition : isnotnull(ss_store_sk#19) + +(30) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#22] + +(31) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 6] +Output [3]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] +Input [5]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21, d_date_sk#22] + +(33) HashAggregate [codegen id : 6] +Input [3]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] +Keys [2]: [ss_store_sk#19, ss_item_sk#18] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#20))] +Aggregate Attributes [1]: [sum#23] +Results [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] + +(34) Exchange +Input [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] +Arguments: hashpartitioning(ss_store_sk#19, ss_item_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(35) HashAggregate [codegen id : 7] +Input [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] +Keys [2]: [ss_store_sk#19, ss_item_sk#18] +Functions [1]: [sum(UnscaledValue(ss_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#20))#25] +Results [2]: [ss_store_sk#19, MakeDecimal(sum(UnscaledValue(ss_sales_price#20))#25,17,2) AS revenue#26] + +(36) HashAggregate [codegen id : 7] +Input [2]: [ss_store_sk#19, revenue#26] +Keys [1]: [ss_store_sk#19] +Functions [1]: [partial_avg(revenue#26)] +Aggregate Attributes [2]: [sum#27, count#28] +Results [3]: [ss_store_sk#19, sum#29, count#30] + +(37) Exchange +Input [3]: [ss_store_sk#19, sum#29, count#30] +Arguments: hashpartitioning(ss_store_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(38) HashAggregate [codegen id : 8] +Input [3]: [ss_store_sk#19, sum#29, count#30] +Keys [1]: [ss_store_sk#19] +Functions [1]: [avg(revenue#26)] +Aggregate Attributes [1]: [avg(revenue#26)#31] +Results [2]: [ss_store_sk#19, avg(revenue#26)#31 AS ave#32] + +(39) Filter [codegen id : 8] +Input [2]: [ss_store_sk#19, ave#32] +Condition : isnotnull(ave#32) + +(40) BroadcastExchange +Input [2]: [ss_store_sk#19, ave#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [ss_store_sk#19] +Join type: Inner +Join condition: (cast(revenue#12 as decimal(23,7)) <= (0.1 * ave#32)) + +(42) Project [codegen id : 9] +Output [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Input [9]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17, ss_store_sk#19, ave#32] + +(43) TakeOrderedAndProject +Input [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#14 ASC NULLS FIRST], [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6f9a1fee81 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_datafusion/simplified.txt @@ -0,0 +1,64 @@ +TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + WholeStageCodegen (9) + Project [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + BroadcastHashJoin [ss_store_sk,ss_store_sk,revenue,ave] + Project [s_store_name,ss_store_sk,revenue,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_store_sk,ss_item_sk,revenue] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (3) + Filter [revenue] + HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + InputAdapter + Exchange [ss_store_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Filter [ave] + HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen (7) + HashAggregate [ss_store_sk,revenue] [sum,count,sum,count] + HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + InputAdapter + Exchange [ss_store_sk,ss_item_sk] #7 + WholeStageCodegen (6) + HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..0da8f232f0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_iceberg_compat/explain.txt @@ -0,0 +1,257 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (26) + : +- * BroadcastHashJoin Inner BuildRight (25) + : :- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store (1) + : : +- BroadcastExchange (18) + : : +- * Filter (17) + : : +- * HashAggregate (16) + : : +- Exchange (15) + : : +- * HashAggregate (14) + : : +- * Project (13) + : : +- * BroadcastHashJoin Inner BuildRight (12) + : : :- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : +- BroadcastExchange (11) + : : +- * Project (10) + : : +- * Filter (9) + : : +- * ColumnarToRow (8) + : : +- Scan parquet spark_catalog.default.date_dim (7) + : +- BroadcastExchange (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet spark_catalog.default.item (21) + +- BroadcastExchange (40) + +- * Filter (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet spark_catalog.default.store_sales (27) + +- ReusedExchange (30) + + +(1) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#1, s_store_name#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [2]: [s_store_sk#1, s_store_name#2] + +(3) Filter [codegen id : 9] +Input [2]: [s_store_sk#1, s_store_name#2] +Condition : isnotnull(s_store_sk#1) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#6)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] + +(6) Filter [codegen id : 2] +Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : (isnotnull(ss_store_sk#4) AND isnotnull(ss_item_sk#3)) + +(7) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#7, d_month_seq#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(9) Filter [codegen id : 1] +Input [2]: [d_date_sk#7, d_month_seq#8] +Condition : (((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1176)) AND (d_month_seq#8 <= 1187)) AND isnotnull(d_date_sk#7)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(11) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#7] +Join type: Inner +Join condition: None + +(13) Project [codegen id : 2] +Output [3]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] +Input [5]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6, d_date_sk#7] + +(14) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5] +Keys [2]: [ss_store_sk#4, ss_item_sk#3] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#5))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] + +(15) Exchange +Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] +Arguments: hashpartitioning(ss_store_sk#4, ss_item_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(16) HashAggregate [codegen id : 3] +Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] +Keys [2]: [ss_store_sk#4, ss_item_sk#3] +Functions [1]: [sum(UnscaledValue(ss_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#5))#11] +Results [3]: [ss_store_sk#4, ss_item_sk#3, MakeDecimal(sum(UnscaledValue(ss_sales_price#5))#11,17,2) AS revenue#12] + +(17) Filter [codegen id : 3] +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#12] +Condition : isnotnull(revenue#12) + +(18) BroadcastExchange +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [s_store_sk#1] +Right keys [1]: [ss_store_sk#4] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 9] +Output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12] +Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12] + +(21) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] + +(23) Filter [codegen id : 4] +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Condition : isnotnull(i_item_sk#13) + +(24) BroadcastExchange +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#3] +Right keys [1]: [i_item_sk#13] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 9] +Output [7]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12, i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] + +(27) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#21)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 6] +Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] + +(29) Filter [codegen id : 6] +Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] +Condition : isnotnull(ss_store_sk#19) + +(30) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#22] + +(31) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 6] +Output [3]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] +Input [5]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21, d_date_sk#22] + +(33) HashAggregate [codegen id : 6] +Input [3]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] +Keys [2]: [ss_store_sk#19, ss_item_sk#18] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#20))] +Aggregate Attributes [1]: [sum#23] +Results [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] + +(34) Exchange +Input [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] +Arguments: hashpartitioning(ss_store_sk#19, ss_item_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(35) HashAggregate [codegen id : 7] +Input [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] +Keys [2]: [ss_store_sk#19, ss_item_sk#18] +Functions [1]: [sum(UnscaledValue(ss_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#20))#25] +Results [2]: [ss_store_sk#19, MakeDecimal(sum(UnscaledValue(ss_sales_price#20))#25,17,2) AS revenue#26] + +(36) HashAggregate [codegen id : 7] +Input [2]: [ss_store_sk#19, revenue#26] +Keys [1]: [ss_store_sk#19] +Functions [1]: [partial_avg(revenue#26)] +Aggregate Attributes [2]: [sum#27, count#28] +Results [3]: [ss_store_sk#19, sum#29, count#30] + +(37) Exchange +Input [3]: [ss_store_sk#19, sum#29, count#30] +Arguments: hashpartitioning(ss_store_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(38) HashAggregate [codegen id : 8] +Input [3]: [ss_store_sk#19, sum#29, count#30] +Keys [1]: [ss_store_sk#19] +Functions [1]: [avg(revenue#26)] +Aggregate Attributes [1]: [avg(revenue#26)#31] +Results [2]: [ss_store_sk#19, avg(revenue#26)#31 AS ave#32] + +(39) Filter [codegen id : 8] +Input [2]: [ss_store_sk#19, ave#32] +Condition : isnotnull(ave#32) + +(40) BroadcastExchange +Input [2]: [ss_store_sk#19, ave#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [ss_store_sk#19] +Join type: Inner +Join condition: (cast(revenue#12 as decimal(23,7)) <= (0.1 * ave#32)) + +(42) Project [codegen id : 9] +Output [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Input [9]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17, ss_store_sk#19, ave#32] + +(43) TakeOrderedAndProject +Input [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#14 ASC NULLS FIRST], [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..6f9a1fee81 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.native_iceberg_compat/simplified.txt @@ -0,0 +1,64 @@ +TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + WholeStageCodegen (9) + Project [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + BroadcastHashJoin [ss_store_sk,ss_store_sk,revenue,ave] + Project [s_store_name,ss_store_sk,revenue,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_store_sk,ss_item_sk,revenue] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (3) + Filter [revenue] + HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + InputAdapter + Exchange [ss_store_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Filter [ave] + HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen (7) + HashAggregate [ss_store_sk,revenue] [sum,count,sum,count] + HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + InputAdapter + Exchange [ss_store_sk,ss_item_sk] #7 + WholeStageCodegen (6) + HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_datafusion/explain.txt new file mode 100644 index 0000000000..284cecf7d9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_datafusion/explain.txt @@ -0,0 +1,320 @@ +== Physical Plan == +TakeOrderedAndProject (55) ++- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- Union (51) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.warehouse (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.time_dim (16) + : +- BroadcastExchange (27) + : +- * Project (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.ship_mode (23) + +- * HashAggregate (50) + +- Exchange (49) + +- * HashAggregate (48) + +- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- ReusedExchange (45) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#7)] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_ship_mode_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] + +(3) Filter [codegen id : 5] +Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Condition : ((isnotnull(ws_warehouse_sk#3) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_ship_mode_sk#2)) + +(4) Scan parquet spark_catalog.default.warehouse +Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(6) Filter [codegen id : 1] +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Condition : isnotnull(w_warehouse_sk#8) + +(7) BroadcastExchange +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#8] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [12]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_date_sk#7] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 5] +Output [13]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Input [15]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#15, d_year#16, d_moy#17] + +(16) Scan parquet spark_catalog.default.time_dim +Output [2]: [t_time_sk#18, t_time#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_time_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [t_time_sk#18, t_time#19] + +(18) Filter [codegen id : 3] +Input [2]: [t_time_sk#18, t_time#19] +Condition : (((isnotnull(t_time#19) AND (t_time#19 >= 30838)) AND (t_time#19 <= 59638)) AND isnotnull(t_time_sk#18)) + +(19) Project [codegen id : 3] +Output [1]: [t_time_sk#18] +Input [2]: [t_time_sk#18, t_time#19] + +(20) BroadcastExchange +Input [1]: [t_time_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_time_sk#1] +Right keys [1]: [t_time_sk#18] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 5] +Output [12]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17, t_time_sk#18] + +(23) Scan parquet spark_catalog.default.ship_mode +Output [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [In(sm_carrier, [BARIAN ,DHL ]), IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] + +(25) Filter [codegen id : 4] +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Condition : (sm_carrier#21 IN (DHL ,BARIAN ) AND isnotnull(sm_ship_mode_sk#20)) + +(26) Project [codegen id : 4] +Output [1]: [sm_ship_mode_sk#20] +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] + +(27) BroadcastExchange +Input [1]: [sm_ship_mode_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_mode_sk#2] +Right keys [1]: [sm_ship_mode_sk#20] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Input [13]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17, sm_ship_mode_sk#20] + +(30) HashAggregate [codegen id : 5] +Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16] +Functions [24]: [partial_sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#22, isEmpty#23, sum#24, isEmpty#25, sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69] +Results [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#70, isEmpty#71, sum#72, isEmpty#73, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] + +(31) Exchange +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#70, isEmpty#71, sum#72, isEmpty#73, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#70, isEmpty#71, sum#72, isEmpty#73, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16] +Functions [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#118, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#119, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#120, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#121, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#122, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#141] +Results [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, DHL,BARIAN AS ship_carriers#142, d_year#16 AS year#143, sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#118 AS jan_sales#144, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#119 AS feb_sales#145, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#120 AS mar_sales#146, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#121 AS apr_sales#147, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#122 AS may_sales#148, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#123 AS jun_sales#149, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#124 AS jul_sales#150, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#125 AS aug_sales#151, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#126 AS sep_sales#152, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#127 AS oct_sales#153, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#128 AS nov_sales#154, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#129 AS dec_sales#155, sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#130 AS jan_net#156, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#131 AS feb_net#157, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#132 AS mar_net#158, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#133 AS apr_net#159, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#134 AS may_net#160, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#135 AS jun_net#161, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#136 AS jul_net#162, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#137 AS aug_net#163, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#138 AS sep_net#164, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#139 AS oct_net#165, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#140 AS nov_net#166, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#141 AS dec_net#167] + +(33) Scan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_warehouse_sk#170, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#174)] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [7]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_warehouse_sk#170, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174] + +(35) Filter [codegen id : 11] +Input [7]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_warehouse_sk#170, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174] +Condition : ((isnotnull(cs_warehouse_sk#170) AND isnotnull(cs_sold_time_sk#168)) AND isnotnull(cs_ship_mode_sk#169)) + +(36) ReusedExchange [Reuses operator id: 7] +Output [7]: [w_warehouse_sk#175, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_warehouse_sk#170] +Right keys [1]: [w_warehouse_sk#175] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [12]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181] +Input [14]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_warehouse_sk#170, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174, w_warehouse_sk#175, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181] + +(39) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#182, d_year#183, d_moy#184] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#174] +Right keys [1]: [d_date_sk#182] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 11] +Output [13]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184] +Input [15]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_date_sk#182, d_year#183, d_moy#184] + +(42) ReusedExchange [Reuses operator id: 20] +Output [1]: [t_time_sk#185] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_time_sk#168] +Right keys [1]: [t_time_sk#185] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 11] +Output [12]: [cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184] +Input [14]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184, t_time_sk#185] + +(45) ReusedExchange [Reuses operator id: 27] +Output [1]: [sm_ship_mode_sk#186] + +(46) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_mode_sk#169] +Right keys [1]: [sm_ship_mode_sk#186] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 11] +Output [11]: [cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184] +Input [13]: [cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184, sm_ship_mode_sk#186] + +(48) HashAggregate [codegen id : 11] +Input [11]: [cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184] +Keys [7]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183] +Functions [24]: [partial_sum(CASE WHEN (d_moy#184 = 1) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 1) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#187, isEmpty#188, sum#189, isEmpty#190, sum#191, isEmpty#192, sum#193, isEmpty#194, sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206, sum#207, isEmpty#208, sum#209, isEmpty#210, sum#211, isEmpty#212, sum#213, isEmpty#214, sum#215, isEmpty#216, sum#217, isEmpty#218, sum#219, isEmpty#220, sum#221, isEmpty#222, sum#223, isEmpty#224, sum#225, isEmpty#226, sum#227, isEmpty#228, sum#229, isEmpty#230, sum#231, isEmpty#232, sum#233, isEmpty#234] +Results [55]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, sum#235, isEmpty#236, sum#237, isEmpty#238, sum#239, isEmpty#240, sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246, sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252, sum#253, isEmpty#254, sum#255, isEmpty#256, sum#257, isEmpty#258, sum#259, isEmpty#260, sum#261, isEmpty#262, sum#263, isEmpty#264, sum#265, isEmpty#266, sum#267, isEmpty#268, sum#269, isEmpty#270, sum#271, isEmpty#272, sum#273, isEmpty#274, sum#275, isEmpty#276, sum#277, isEmpty#278, sum#279, isEmpty#280, sum#281, isEmpty#282] + +(49) Exchange +Input [55]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, sum#235, isEmpty#236, sum#237, isEmpty#238, sum#239, isEmpty#240, sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246, sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252, sum#253, isEmpty#254, sum#255, isEmpty#256, sum#257, isEmpty#258, sum#259, isEmpty#260, sum#261, isEmpty#262, sum#263, isEmpty#264, sum#265, isEmpty#266, sum#267, isEmpty#268, sum#269, isEmpty#270, sum#271, isEmpty#272, sum#273, isEmpty#274, sum#275, isEmpty#276, sum#277, isEmpty#278, sum#279, isEmpty#280, sum#281, isEmpty#282] +Arguments: hashpartitioning(w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(50) HashAggregate [codegen id : 12] +Input [55]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, sum#235, isEmpty#236, sum#237, isEmpty#238, sum#239, isEmpty#240, sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246, sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252, sum#253, isEmpty#254, sum#255, isEmpty#256, sum#257, isEmpty#258, sum#259, isEmpty#260, sum#261, isEmpty#262, sum#263, isEmpty#264, sum#265, isEmpty#266, sum#267, isEmpty#268, sum#269, isEmpty#270, sum#271, isEmpty#272, sum#273, isEmpty#274, sum#275, isEmpty#276, sum#277, isEmpty#278, sum#279, isEmpty#280, sum#281, isEmpty#282] +Keys [7]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183] +Functions [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 1) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#283, sum(CASE WHEN (d_moy#184 = 2) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#184 = 3) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#184 = 4) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#184 = 5) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#184 = 6) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#184 = 7) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#184 = 8) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#184 = 9) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#184 = 10) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#184 = 11) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#184 = 12) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#184 = 1) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#184 = 2) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#184 = 3) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#184 = 4) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#184 = 5) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#184 = 6) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#184 = 7) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#184 = 8) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#184 = 9) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#184 = 10) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#184 = 11) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#184 = 12) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#306] +Results [32]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, DHL,BARIAN AS ship_carriers#307, d_year#183 AS year#308, sum(CASE WHEN (d_moy#184 = 1) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#283 AS jan_sales#309, sum(CASE WHEN (d_moy#184 = 2) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#284 AS feb_sales#310, sum(CASE WHEN (d_moy#184 = 3) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#285 AS mar_sales#311, sum(CASE WHEN (d_moy#184 = 4) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#286 AS apr_sales#312, sum(CASE WHEN (d_moy#184 = 5) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#287 AS may_sales#313, sum(CASE WHEN (d_moy#184 = 6) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#288 AS jun_sales#314, sum(CASE WHEN (d_moy#184 = 7) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#289 AS jul_sales#315, sum(CASE WHEN (d_moy#184 = 8) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#290 AS aug_sales#316, sum(CASE WHEN (d_moy#184 = 9) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#291 AS sep_sales#317, sum(CASE WHEN (d_moy#184 = 10) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#292 AS oct_sales#318, sum(CASE WHEN (d_moy#184 = 11) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#293 AS nov_sales#319, sum(CASE WHEN (d_moy#184 = 12) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#294 AS dec_sales#320, sum(CASE WHEN (d_moy#184 = 1) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#295 AS jan_net#321, sum(CASE WHEN (d_moy#184 = 2) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#296 AS feb_net#322, sum(CASE WHEN (d_moy#184 = 3) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#297 AS mar_net#323, sum(CASE WHEN (d_moy#184 = 4) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#298 AS apr_net#324, sum(CASE WHEN (d_moy#184 = 5) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#299 AS may_net#325, sum(CASE WHEN (d_moy#184 = 6) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#300 AS jun_net#326, sum(CASE WHEN (d_moy#184 = 7) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#301 AS jul_net#327, sum(CASE WHEN (d_moy#184 = 8) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#302 AS aug_net#328, sum(CASE WHEN (d_moy#184 = 9) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#303 AS sep_net#329, sum(CASE WHEN (d_moy#184 = 10) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#304 AS oct_net#330, sum(CASE WHEN (d_moy#184 = 11) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#305 AS nov_net#331, sum(CASE WHEN (d_moy#184 = 12) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#306 AS dec_net#332] + +(51) Union + +(52) HashAggregate [codegen id : 13] +Input [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, jan_sales#144, feb_sales#145, mar_sales#146, apr_sales#147, may_sales#148, jun_sales#149, jul_sales#150, aug_sales#151, sep_sales#152, oct_sales#153, nov_sales#154, dec_sales#155, jan_net#156, feb_net#157, mar_net#158, apr_net#159, may_net#160, jun_net#161, jul_net#162, aug_net#163, sep_net#164, oct_net#165, nov_net#166, dec_net#167] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143] +Functions [36]: [partial_sum(jan_sales#144), partial_sum(feb_sales#145), partial_sum(mar_sales#146), partial_sum(apr_sales#147), partial_sum(may_sales#148), partial_sum(jun_sales#149), partial_sum(jul_sales#150), partial_sum(aug_sales#151), partial_sum(sep_sales#152), partial_sum(oct_sales#153), partial_sum(nov_sales#154), partial_sum(dec_sales#155), partial_sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum(jan_net#156), partial_sum(feb_net#157), partial_sum(mar_net#158), partial_sum(apr_net#159), partial_sum(may_net#160), partial_sum(jun_net#161), partial_sum(jul_net#162), partial_sum(aug_net#163), partial_sum(sep_net#164), partial_sum(oct_net#165), partial_sum(nov_net#166), partial_sum(dec_net#167)] +Aggregate Attributes [72]: [sum#333, isEmpty#334, sum#335, isEmpty#336, sum#337, isEmpty#338, sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404] +Results [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476] + +(53) Exchange +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(54) HashAggregate [codegen id : 14] +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143] +Functions [36]: [sum(jan_sales#144), sum(feb_sales#145), sum(mar_sales#146), sum(apr_sales#147), sum(may_sales#148), sum(jun_sales#149), sum(jul_sales#150), sum(aug_sales#151), sum(sep_sales#152), sum(oct_sales#153), sum(nov_sales#154), sum(dec_sales#155), sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum(jan_net#156), sum(feb_net#157), sum(mar_net#158), sum(apr_net#159), sum(may_net#160), sum(jun_net#161), sum(jul_net#162), sum(aug_net#163), sum(sep_net#164), sum(oct_net#165), sum(nov_net#166), sum(dec_net#167)] +Aggregate Attributes [36]: [sum(jan_sales#144)#477, sum(feb_sales#145)#478, sum(mar_sales#146)#479, sum(apr_sales#147)#480, sum(may_sales#148)#481, sum(jun_sales#149)#482, sum(jul_sales#150)#483, sum(aug_sales#151)#484, sum(sep_sales#152)#485, sum(oct_sales#153)#486, sum(nov_sales#154)#487, sum(dec_sales#155)#488, sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#489, sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#490, sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#491, sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#492, sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#493, sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#494, sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#495, sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#496, sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#497, sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#498, sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#499, sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#500, sum(jan_net#156)#501, sum(feb_net#157)#502, sum(mar_net#158)#503, sum(apr_net#159)#504, sum(may_net#160)#505, sum(jun_net#161)#506, sum(jul_net#162)#507, sum(aug_net#163)#508, sum(sep_net#164)#509, sum(oct_net#165)#510, sum(nov_net#166)#511, sum(dec_net#167)#512] +Results [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum(jan_sales#144)#477 AS jan_sales#513, sum(feb_sales#145)#478 AS feb_sales#514, sum(mar_sales#146)#479 AS mar_sales#515, sum(apr_sales#147)#480 AS apr_sales#516, sum(may_sales#148)#481 AS may_sales#517, sum(jun_sales#149)#482 AS jun_sales#518, sum(jul_sales#150)#483 AS jul_sales#519, sum(aug_sales#151)#484 AS aug_sales#520, sum(sep_sales#152)#485 AS sep_sales#521, sum(oct_sales#153)#486 AS oct_sales#522, sum(nov_sales#154)#487 AS nov_sales#523, sum(dec_sales#155)#488 AS dec_sales#524, sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#489 AS jan_sales_per_sq_foot#525, sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#490 AS feb_sales_per_sq_foot#526, sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#491 AS mar_sales_per_sq_foot#527, sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#492 AS apr_sales_per_sq_foot#528, sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#493 AS may_sales_per_sq_foot#529, sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#494 AS jun_sales_per_sq_foot#530, sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#495 AS jul_sales_per_sq_foot#531, sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#496 AS aug_sales_per_sq_foot#532, sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#497 AS sep_sales_per_sq_foot#533, sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#498 AS oct_sales_per_sq_foot#534, sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#499 AS nov_sales_per_sq_foot#535, sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#500 AS dec_sales_per_sq_foot#536, sum(jan_net#156)#501 AS jan_net#537, sum(feb_net#157)#502 AS feb_net#538, sum(mar_net#158)#503 AS mar_net#539, sum(apr_net#159)#504 AS apr_net#540, sum(may_net#160)#505 AS may_net#541, sum(jun_net#161)#506 AS jun_net#542, sum(jul_net#162)#507 AS jul_net#543, sum(aug_net#163)#508 AS aug_net#544, sum(sep_net#164)#509 AS sep_net#545, sum(oct_net#165)#510 AS oct_net#546, sum(nov_net#166)#511 AS nov_net#547, sum(dec_net#167)#512 AS dec_net#548] + +(55) TakeOrderedAndProject +Input [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, jan_sales#513, feb_sales#514, mar_sales#515, apr_sales#516, may_sales#517, jun_sales#518, jul_sales#519, aug_sales#520, sep_sales#521, oct_sales#522, nov_sales#523, dec_sales#524, jan_sales_per_sq_foot#525, feb_sales_per_sq_foot#526, mar_sales_per_sq_foot#527, apr_sales_per_sq_foot#528, may_sales_per_sq_foot#529, jun_sales_per_sq_foot#530, jul_sales_per_sq_foot#531, aug_sales_per_sq_foot#532, sep_sales_per_sq_foot#533, oct_sales_per_sq_foot#534, nov_sales_per_sq_foot#535, dec_sales_per_sq_foot#536, jan_net#537, feb_net#538, mar_net#539, apr_net#540, may_net#541, jun_net#542, jul_net#543, aug_net#544, sep_net#545, oct_net#546, nov_net#547, dec_net#548] +Arguments: 100, [w_warehouse_name#9 ASC NULLS FIRST], [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, jan_sales#513, feb_sales#514, mar_sales#515, apr_sales#516, may_sales#517, jun_sales#518, jul_sales#519, aug_sales#520, sep_sales#521, oct_sales#522, nov_sales#523, dec_sales#524, jan_sales_per_sq_foot#525, feb_sales_per_sq_foot#526, mar_sales_per_sq_foot#527, apr_sales_per_sq_foot#528, may_sales_per_sq_foot#529, jun_sales_per_sq_foot#530, jul_sales_per_sq_foot#531, aug_sales_per_sq_foot#532, sep_sales_per_sq_foot#533, oct_sales_per_sq_foot#534, nov_sales_per_sq_foot#535, dec_sales_per_sq_foot#536, jan_net#537, feb_net#538, mar_net#539, apr_net#540, may_net#541, jun_net#542, jul_net#543, aug_net#544, sep_net#545, oct_net#546, nov_net#547, dec_net#548] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9a2e8adb87 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_datafusion/simplified.txt @@ -0,0 +1,83 @@ +TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] + WholeStageCodegen (14) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum((jan_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((feb_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((mar_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((apr_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((may_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jun_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jul_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((aug_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((sep_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((oct_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((nov_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((dec_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1 + WholeStageCodegen (13) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2 + WholeStageCodegen (5) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,ws_ext_sales_price,ws_quantity,ws_net_paid] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Project [ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] + Project [ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] + Filter [ws_warehouse_sk,ws_sold_time_sk,ws_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [t_time_sk] + Filter [t_time,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_time] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [sm_ship_mode_sk] + Filter [sm_carrier,sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_carrier] + WholeStageCodegen (12) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #7 + WholeStageCodegen (11) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,cs_sales_price,cs_quantity,cs_net_paid_inc_tax] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Project [cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] + Project [cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [cs_sold_time_sk,t_time_sk] + Project [cs_sold_time_sk,cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_time_sk,cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Filter [cs_warehouse_sk,cs_sold_time_sk,cs_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk] + InputAdapter + ReusedExchange [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] #3 + InputAdapter + ReusedExchange [d_date_sk,d_year,d_moy] #4 + InputAdapter + ReusedExchange [t_time_sk] #5 + InputAdapter + ReusedExchange [sm_ship_mode_sk] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..284cecf7d9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_iceberg_compat/explain.txt @@ -0,0 +1,320 @@ +== Physical Plan == +TakeOrderedAndProject (55) ++- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- Union (51) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.warehouse (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.time_dim (16) + : +- BroadcastExchange (27) + : +- * Project (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.ship_mode (23) + +- * HashAggregate (50) + +- Exchange (49) + +- * HashAggregate (48) + +- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- ReusedExchange (45) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#7)] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_ship_mode_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] + +(3) Filter [codegen id : 5] +Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] +Condition : ((isnotnull(ws_warehouse_sk#3) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_ship_mode_sk#2)) + +(4) Scan parquet spark_catalog.default.warehouse +Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(6) Filter [codegen id : 1] +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Condition : isnotnull(w_warehouse_sk#8) + +(7) BroadcastExchange +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#8] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [12]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_date_sk#7] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 5] +Output [13]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Input [15]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#15, d_year#16, d_moy#17] + +(16) Scan parquet spark_catalog.default.time_dim +Output [2]: [t_time_sk#18, t_time#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_time_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [t_time_sk#18, t_time#19] + +(18) Filter [codegen id : 3] +Input [2]: [t_time_sk#18, t_time#19] +Condition : (((isnotnull(t_time#19) AND (t_time#19 >= 30838)) AND (t_time#19 <= 59638)) AND isnotnull(t_time_sk#18)) + +(19) Project [codegen id : 3] +Output [1]: [t_time_sk#18] +Input [2]: [t_time_sk#18, t_time#19] + +(20) BroadcastExchange +Input [1]: [t_time_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_time_sk#1] +Right keys [1]: [t_time_sk#18] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 5] +Output [12]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17, t_time_sk#18] + +(23) Scan parquet spark_catalog.default.ship_mode +Output [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [In(sm_carrier, [BARIAN ,DHL ]), IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] + +(25) Filter [codegen id : 4] +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] +Condition : (sm_carrier#21 IN (DHL ,BARIAN ) AND isnotnull(sm_ship_mode_sk#20)) + +(26) Project [codegen id : 4] +Output [1]: [sm_ship_mode_sk#20] +Input [2]: [sm_ship_mode_sk#20, sm_carrier#21] + +(27) BroadcastExchange +Input [1]: [sm_ship_mode_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_mode_sk#2] +Right keys [1]: [sm_ship_mode_sk#20] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Input [13]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17, sm_ship_mode_sk#20] + +(30) HashAggregate [codegen id : 5] +Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, d_moy#17] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16] +Functions [24]: [partial_sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#22, isEmpty#23, sum#24, isEmpty#25, sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69] +Results [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#70, isEmpty#71, sum#72, isEmpty#73, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] + +(31) Exchange +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#70, isEmpty#71, sum#72, isEmpty#73, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16, sum#70, isEmpty#71, sum#72, isEmpty#73, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#16] +Functions [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#118, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#119, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#120, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#121, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#122, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#141] +Results [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, DHL,BARIAN AS ship_carriers#142, d_year#16 AS year#143, sum(CASE WHEN (d_moy#17 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#118 AS jan_sales#144, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#119 AS feb_sales#145, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#120 AS mar_sales#146, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#121 AS apr_sales#147, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#122 AS may_sales#148, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#123 AS jun_sales#149, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#124 AS jul_sales#150, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#125 AS aug_sales#151, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#126 AS sep_sales#152, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#127 AS oct_sales#153, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#128 AS nov_sales#154, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#129 AS dec_sales#155, sum(CASE WHEN (d_moy#17 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#130 AS jan_net#156, sum(CASE WHEN (d_moy#17 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#131 AS feb_net#157, sum(CASE WHEN (d_moy#17 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#132 AS mar_net#158, sum(CASE WHEN (d_moy#17 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#133 AS apr_net#159, sum(CASE WHEN (d_moy#17 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#134 AS may_net#160, sum(CASE WHEN (d_moy#17 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#135 AS jun_net#161, sum(CASE WHEN (d_moy#17 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#136 AS jul_net#162, sum(CASE WHEN (d_moy#17 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#137 AS aug_net#163, sum(CASE WHEN (d_moy#17 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#138 AS sep_net#164, sum(CASE WHEN (d_moy#17 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#139 AS oct_net#165, sum(CASE WHEN (d_moy#17 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#140 AS nov_net#166, sum(CASE WHEN (d_moy#17 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#141 AS dec_net#167] + +(33) Scan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_warehouse_sk#170, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#174)] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [7]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_warehouse_sk#170, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174] + +(35) Filter [codegen id : 11] +Input [7]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_warehouse_sk#170, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174] +Condition : ((isnotnull(cs_warehouse_sk#170) AND isnotnull(cs_sold_time_sk#168)) AND isnotnull(cs_ship_mode_sk#169)) + +(36) ReusedExchange [Reuses operator id: 7] +Output [7]: [w_warehouse_sk#175, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_warehouse_sk#170] +Right keys [1]: [w_warehouse_sk#175] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [12]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181] +Input [14]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_warehouse_sk#170, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174, w_warehouse_sk#175, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181] + +(39) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#182, d_year#183, d_moy#184] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#174] +Right keys [1]: [d_date_sk#182] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 11] +Output [13]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184] +Input [15]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, cs_sold_date_sk#174, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_date_sk#182, d_year#183, d_moy#184] + +(42) ReusedExchange [Reuses operator id: 20] +Output [1]: [t_time_sk#185] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_time_sk#168] +Right keys [1]: [t_time_sk#185] +Join type: Inner +Join condition: None + +(44) Project [codegen id : 11] +Output [12]: [cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184] +Input [14]: [cs_sold_time_sk#168, cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184, t_time_sk#185] + +(45) ReusedExchange [Reuses operator id: 27] +Output [1]: [sm_ship_mode_sk#186] + +(46) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_mode_sk#169] +Right keys [1]: [sm_ship_mode_sk#186] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 11] +Output [11]: [cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184] +Input [13]: [cs_ship_mode_sk#169, cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184, sm_ship_mode_sk#186] + +(48) HashAggregate [codegen id : 11] +Input [11]: [cs_quantity#171, cs_sales_price#172, cs_net_paid_inc_tax#173, w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, d_moy#184] +Keys [7]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183] +Functions [24]: [partial_sum(CASE WHEN (d_moy#184 = 1) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 1) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#187, isEmpty#188, sum#189, isEmpty#190, sum#191, isEmpty#192, sum#193, isEmpty#194, sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206, sum#207, isEmpty#208, sum#209, isEmpty#210, sum#211, isEmpty#212, sum#213, isEmpty#214, sum#215, isEmpty#216, sum#217, isEmpty#218, sum#219, isEmpty#220, sum#221, isEmpty#222, sum#223, isEmpty#224, sum#225, isEmpty#226, sum#227, isEmpty#228, sum#229, isEmpty#230, sum#231, isEmpty#232, sum#233, isEmpty#234] +Results [55]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, sum#235, isEmpty#236, sum#237, isEmpty#238, sum#239, isEmpty#240, sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246, sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252, sum#253, isEmpty#254, sum#255, isEmpty#256, sum#257, isEmpty#258, sum#259, isEmpty#260, sum#261, isEmpty#262, sum#263, isEmpty#264, sum#265, isEmpty#266, sum#267, isEmpty#268, sum#269, isEmpty#270, sum#271, isEmpty#272, sum#273, isEmpty#274, sum#275, isEmpty#276, sum#277, isEmpty#278, sum#279, isEmpty#280, sum#281, isEmpty#282] + +(49) Exchange +Input [55]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, sum#235, isEmpty#236, sum#237, isEmpty#238, sum#239, isEmpty#240, sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246, sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252, sum#253, isEmpty#254, sum#255, isEmpty#256, sum#257, isEmpty#258, sum#259, isEmpty#260, sum#261, isEmpty#262, sum#263, isEmpty#264, sum#265, isEmpty#266, sum#267, isEmpty#268, sum#269, isEmpty#270, sum#271, isEmpty#272, sum#273, isEmpty#274, sum#275, isEmpty#276, sum#277, isEmpty#278, sum#279, isEmpty#280, sum#281, isEmpty#282] +Arguments: hashpartitioning(w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(50) HashAggregate [codegen id : 12] +Input [55]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183, sum#235, isEmpty#236, sum#237, isEmpty#238, sum#239, isEmpty#240, sum#241, isEmpty#242, sum#243, isEmpty#244, sum#245, isEmpty#246, sum#247, isEmpty#248, sum#249, isEmpty#250, sum#251, isEmpty#252, sum#253, isEmpty#254, sum#255, isEmpty#256, sum#257, isEmpty#258, sum#259, isEmpty#260, sum#261, isEmpty#262, sum#263, isEmpty#264, sum#265, isEmpty#266, sum#267, isEmpty#268, sum#269, isEmpty#270, sum#271, isEmpty#272, sum#273, isEmpty#274, sum#275, isEmpty#276, sum#277, isEmpty#278, sum#279, isEmpty#280, sum#281, isEmpty#282] +Keys [7]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, d_year#183] +Functions [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 1) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#283, sum(CASE WHEN (d_moy#184 = 2) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#184 = 3) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#184 = 4) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#184 = 5) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#184 = 6) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#184 = 7) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#184 = 8) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#184 = 9) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#184 = 10) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#184 = 11) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#184 = 12) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#184 = 1) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#184 = 2) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#184 = 3) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#184 = 4) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#184 = 5) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#184 = 6) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#184 = 7) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#184 = 8) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#184 = 9) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#184 = 10) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#184 = 11) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#184 = 12) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#306] +Results [32]: [w_warehouse_name#176, w_warehouse_sq_ft#177, w_city#178, w_county#179, w_state#180, w_country#181, DHL,BARIAN AS ship_carriers#307, d_year#183 AS year#308, sum(CASE WHEN (d_moy#184 = 1) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#283 AS jan_sales#309, sum(CASE WHEN (d_moy#184 = 2) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#284 AS feb_sales#310, sum(CASE WHEN (d_moy#184 = 3) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#285 AS mar_sales#311, sum(CASE WHEN (d_moy#184 = 4) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#286 AS apr_sales#312, sum(CASE WHEN (d_moy#184 = 5) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#287 AS may_sales#313, sum(CASE WHEN (d_moy#184 = 6) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#288 AS jun_sales#314, sum(CASE WHEN (d_moy#184 = 7) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#289 AS jul_sales#315, sum(CASE WHEN (d_moy#184 = 8) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#290 AS aug_sales#316, sum(CASE WHEN (d_moy#184 = 9) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#291 AS sep_sales#317, sum(CASE WHEN (d_moy#184 = 10) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#292 AS oct_sales#318, sum(CASE WHEN (d_moy#184 = 11) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#293 AS nov_sales#319, sum(CASE WHEN (d_moy#184 = 12) THEN (cs_sales_price#172 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#294 AS dec_sales#320, sum(CASE WHEN (d_moy#184 = 1) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#295 AS jan_net#321, sum(CASE WHEN (d_moy#184 = 2) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#296 AS feb_net#322, sum(CASE WHEN (d_moy#184 = 3) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#297 AS mar_net#323, sum(CASE WHEN (d_moy#184 = 4) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#298 AS apr_net#324, sum(CASE WHEN (d_moy#184 = 5) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#299 AS may_net#325, sum(CASE WHEN (d_moy#184 = 6) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#300 AS jun_net#326, sum(CASE WHEN (d_moy#184 = 7) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#301 AS jul_net#327, sum(CASE WHEN (d_moy#184 = 8) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#302 AS aug_net#328, sum(CASE WHEN (d_moy#184 = 9) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#303 AS sep_net#329, sum(CASE WHEN (d_moy#184 = 10) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#304 AS oct_net#330, sum(CASE WHEN (d_moy#184 = 11) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#305 AS nov_net#331, sum(CASE WHEN (d_moy#184 = 12) THEN (cs_net_paid_inc_tax#173 * cast(cs_quantity#171 as decimal(10,0))) ELSE 0.00 END)#306 AS dec_net#332] + +(51) Union + +(52) HashAggregate [codegen id : 13] +Input [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, jan_sales#144, feb_sales#145, mar_sales#146, apr_sales#147, may_sales#148, jun_sales#149, jul_sales#150, aug_sales#151, sep_sales#152, oct_sales#153, nov_sales#154, dec_sales#155, jan_net#156, feb_net#157, mar_net#158, apr_net#159, may_net#160, jun_net#161, jul_net#162, aug_net#163, sep_net#164, oct_net#165, nov_net#166, dec_net#167] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143] +Functions [36]: [partial_sum(jan_sales#144), partial_sum(feb_sales#145), partial_sum(mar_sales#146), partial_sum(apr_sales#147), partial_sum(may_sales#148), partial_sum(jun_sales#149), partial_sum(jul_sales#150), partial_sum(aug_sales#151), partial_sum(sep_sales#152), partial_sum(oct_sales#153), partial_sum(nov_sales#154), partial_sum(dec_sales#155), partial_sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), partial_sum(jan_net#156), partial_sum(feb_net#157), partial_sum(mar_net#158), partial_sum(apr_net#159), partial_sum(may_net#160), partial_sum(jun_net#161), partial_sum(jul_net#162), partial_sum(aug_net#163), partial_sum(sep_net#164), partial_sum(oct_net#165), partial_sum(nov_net#166), partial_sum(dec_net#167)] +Aggregate Attributes [72]: [sum#333, isEmpty#334, sum#335, isEmpty#336, sum#337, isEmpty#338, sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404] +Results [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476] + +(53) Exchange +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(54) HashAggregate [codegen id : 14] +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143] +Functions [36]: [sum(jan_sales#144), sum(feb_sales#145), sum(mar_sales#146), sum(apr_sales#147), sum(may_sales#148), sum(jun_sales#149), sum(jul_sales#150), sum(aug_sales#151), sum(sep_sales#152), sum(oct_sales#153), sum(nov_sales#154), sum(dec_sales#155), sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0)))), sum(jan_net#156), sum(feb_net#157), sum(mar_net#158), sum(apr_net#159), sum(may_net#160), sum(jun_net#161), sum(jul_net#162), sum(aug_net#163), sum(sep_net#164), sum(oct_net#165), sum(nov_net#166), sum(dec_net#167)] +Aggregate Attributes [36]: [sum(jan_sales#144)#477, sum(feb_sales#145)#478, sum(mar_sales#146)#479, sum(apr_sales#147)#480, sum(may_sales#148)#481, sum(jun_sales#149)#482, sum(jul_sales#150)#483, sum(aug_sales#151)#484, sum(sep_sales#152)#485, sum(oct_sales#153)#486, sum(nov_sales#154)#487, sum(dec_sales#155)#488, sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#489, sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#490, sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#491, sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#492, sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#493, sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#494, sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#495, sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#496, sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#497, sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#498, sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#499, sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#500, sum(jan_net#156)#501, sum(feb_net#157)#502, sum(mar_net#158)#503, sum(apr_net#159)#504, sum(may_net#160)#505, sum(jun_net#161)#506, sum(jul_net#162)#507, sum(aug_net#163)#508, sum(sep_net#164)#509, sum(oct_net#165)#510, sum(nov_net#166)#511, sum(dec_net#167)#512] +Results [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, sum(jan_sales#144)#477 AS jan_sales#513, sum(feb_sales#145)#478 AS feb_sales#514, sum(mar_sales#146)#479 AS mar_sales#515, sum(apr_sales#147)#480 AS apr_sales#516, sum(may_sales#148)#481 AS may_sales#517, sum(jun_sales#149)#482 AS jun_sales#518, sum(jul_sales#150)#483 AS jul_sales#519, sum(aug_sales#151)#484 AS aug_sales#520, sum(sep_sales#152)#485 AS sep_sales#521, sum(oct_sales#153)#486 AS oct_sales#522, sum(nov_sales#154)#487 AS nov_sales#523, sum(dec_sales#155)#488 AS dec_sales#524, sum((jan_sales#144 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#489 AS jan_sales_per_sq_foot#525, sum((feb_sales#145 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#490 AS feb_sales_per_sq_foot#526, sum((mar_sales#146 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#491 AS mar_sales_per_sq_foot#527, sum((apr_sales#147 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#492 AS apr_sales_per_sq_foot#528, sum((may_sales#148 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#493 AS may_sales_per_sq_foot#529, sum((jun_sales#149 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#494 AS jun_sales_per_sq_foot#530, sum((jul_sales#150 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#495 AS jul_sales_per_sq_foot#531, sum((aug_sales#151 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#496 AS aug_sales_per_sq_foot#532, sum((sep_sales#152 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#497 AS sep_sales_per_sq_foot#533, sum((oct_sales#153 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#498 AS oct_sales_per_sq_foot#534, sum((nov_sales#154 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#499 AS nov_sales_per_sq_foot#535, sum((dec_sales#155 / cast(w_warehouse_sq_ft#10 as decimal(10,0))))#500 AS dec_sales_per_sq_foot#536, sum(jan_net#156)#501 AS jan_net#537, sum(feb_net#157)#502 AS feb_net#538, sum(mar_net#158)#503 AS mar_net#539, sum(apr_net#159)#504 AS apr_net#540, sum(may_net#160)#505 AS may_net#541, sum(jun_net#161)#506 AS jun_net#542, sum(jul_net#162)#507 AS jul_net#543, sum(aug_net#163)#508 AS aug_net#544, sum(sep_net#164)#509 AS sep_net#545, sum(oct_net#165)#510 AS oct_net#546, sum(nov_net#166)#511 AS nov_net#547, sum(dec_net#167)#512 AS dec_net#548] + +(55) TakeOrderedAndProject +Input [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, jan_sales#513, feb_sales#514, mar_sales#515, apr_sales#516, may_sales#517, jun_sales#518, jul_sales#519, aug_sales#520, sep_sales#521, oct_sales#522, nov_sales#523, dec_sales#524, jan_sales_per_sq_foot#525, feb_sales_per_sq_foot#526, mar_sales_per_sq_foot#527, apr_sales_per_sq_foot#528, may_sales_per_sq_foot#529, jun_sales_per_sq_foot#530, jul_sales_per_sq_foot#531, aug_sales_per_sq_foot#532, sep_sales_per_sq_foot#533, oct_sales_per_sq_foot#534, nov_sales_per_sq_foot#535, dec_sales_per_sq_foot#536, jan_net#537, feb_net#538, mar_net#539, apr_net#540, may_net#541, jun_net#542, jul_net#543, aug_net#544, sep_net#545, oct_net#546, nov_net#547, dec_net#548] +Arguments: 100, [w_warehouse_name#9 ASC NULLS FIRST], [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#142, year#143, jan_sales#513, feb_sales#514, mar_sales#515, apr_sales#516, may_sales#517, jun_sales#518, jul_sales#519, aug_sales#520, sep_sales#521, oct_sales#522, nov_sales#523, dec_sales#524, jan_sales_per_sq_foot#525, feb_sales_per_sq_foot#526, mar_sales_per_sq_foot#527, apr_sales_per_sq_foot#528, may_sales_per_sq_foot#529, jun_sales_per_sq_foot#530, jul_sales_per_sq_foot#531, aug_sales_per_sq_foot#532, sep_sales_per_sq_foot#533, oct_sales_per_sq_foot#534, nov_sales_per_sq_foot#535, dec_sales_per_sq_foot#536, jan_net#537, feb_net#538, mar_net#539, apr_net#540, may_net#541, jun_net#542, jul_net#543, aug_net#544, sep_net#545, oct_net#546, nov_net#547, dec_net#548] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9a2e8adb87 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.native_iceberg_compat/simplified.txt @@ -0,0 +1,83 @@ +TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] + WholeStageCodegen (14) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum((jan_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((feb_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((mar_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((apr_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((may_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jun_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jul_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((aug_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((sep_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((oct_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((nov_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((dec_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1 + WholeStageCodegen (13) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2 + WholeStageCodegen (5) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,ws_ext_sales_price,ws_quantity,ws_net_paid] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Project [ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] + Project [ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] + Filter [ws_warehouse_sk,ws_sold_time_sk,ws_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [t_time_sk] + Filter [t_time,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_time] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [sm_ship_mode_sk] + Filter [sm_carrier,sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_carrier] + WholeStageCodegen (12) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #7 + WholeStageCodegen (11) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,cs_sales_price,cs_quantity,cs_net_paid_inc_tax] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Project [cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] + Project [cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [cs_sold_time_sk,t_time_sk] + Project [cs_sold_time_sk,cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_time_sk,cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Filter [cs_warehouse_sk,cs_sold_time_sk,cs_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk] + InputAdapter + ReusedExchange [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] #3 + InputAdapter + ReusedExchange [d_date_sk,d_year,d_moy] #4 + InputAdapter + ReusedExchange [t_time_sk] #5 + InputAdapter + ReusedExchange [sm_ship_mode_sk] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_datafusion/explain.txt new file mode 100644 index 0000000000..6ceedf2f92 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_datafusion/explain.txt @@ -0,0 +1,179 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Expand (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.store (11) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.item (17) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(6) Filter [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(8) BroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_store_id#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#11, s_store_id#12] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#11, s_store_id#12] +Condition : isnotnull(s_store_sk#11) + +(14) BroadcastExchange +Input [2]: [s_store_sk#11, s_store_id#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] + +(17) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(19) Filter [codegen id : 3] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) + +(20) BroadcastExchange +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#13] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(23) Expand [codegen id : 4] +Input [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 0], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null, 1], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null, null, 3], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null, null, null, 7], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, null, null, null, null, 15], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, null, null, null, null, null, 31], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, null, null, null, null, null, null, 63], [ss_quantity#3, ss_sales_price#4, i_category#16, null, null, null, null, null, null, null, 127], [ss_quantity#3, ss_sales_price#4, null, null, null, null, null, null, null, null, 255]], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] + +(24) HashAggregate [codegen id : 4] +Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [2]: [sum#27, isEmpty#28] +Results [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] + +(25) Exchange +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] +Arguments: hashpartitioning(i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 5] +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#31] +Results [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#31 AS sumsales#32] + +(27) Exchange +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: hashpartitioning(i_category#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(28) Sort [codegen id : 6] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [i_category#18 ASC NULLS FIRST, sumsales#32 DESC NULLS LAST], false, 0 + +(29) Window +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [rank(sumsales#32) windowspecdefinition(i_category#18, sumsales#32 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#33], [i_category#18], [sumsales#32 DESC NULLS LAST] + +(30) Filter [codegen id : 7] +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] +Condition : (rk#33 <= 100) + +(31) TakeOrderedAndProject +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] +Arguments: 100, [i_category#18 ASC NULLS FIRST, i_class#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, i_product_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_qoy#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, sumsales#32 ASC NULLS FIRST, rk#33 ASC NULLS FIRST], [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_datafusion/simplified.txt new file mode 100644 index 0000000000..4205eed4b0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_datafusion/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] + WholeStageCodegen (7) + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WholeStageCodegen (6) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,ss_sales_price,ss_quantity] [sum,isEmpty,sum,isEmpty] + Expand [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] + Project [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_year,d_moy,d_qoy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..6ceedf2f92 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_iceberg_compat/explain.txt @@ -0,0 +1,179 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Expand (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.store (11) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.item (17) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(6) Filter [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(8) BroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_store_id#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#11, s_store_id#12] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#11, s_store_id#12] +Condition : isnotnull(s_store_sk#11) + +(14) BroadcastExchange +Input [2]: [s_store_sk#11, s_store_id#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] + +(17) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(19) Filter [codegen id : 3] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) + +(20) BroadcastExchange +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#13] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(23) Expand [codegen id : 4] +Input [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 0], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null, 1], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null, null, 3], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null, null, null, 7], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, null, null, null, null, 15], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, null, null, null, null, null, 31], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, null, null, null, null, null, null, 63], [ss_quantity#3, ss_sales_price#4, i_category#16, null, null, null, null, null, null, null, 127], [ss_quantity#3, ss_sales_price#4, null, null, null, null, null, null, null, null, 255]], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] + +(24) HashAggregate [codegen id : 4] +Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [2]: [sum#27, isEmpty#28] +Results [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] + +(25) Exchange +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] +Arguments: hashpartitioning(i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 5] +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#31] +Results [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#31 AS sumsales#32] + +(27) Exchange +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: hashpartitioning(i_category#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(28) Sort [codegen id : 6] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [i_category#18 ASC NULLS FIRST, sumsales#32 DESC NULLS LAST], false, 0 + +(29) Window +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [rank(sumsales#32) windowspecdefinition(i_category#18, sumsales#32 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#33], [i_category#18], [sumsales#32 DESC NULLS LAST] + +(30) Filter [codegen id : 7] +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] +Condition : (rk#33 <= 100) + +(31) TakeOrderedAndProject +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] +Arguments: 100, [i_category#18 ASC NULLS FIRST, i_class#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, i_product_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_qoy#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, sumsales#32 ASC NULLS FIRST, rk#33 ASC NULLS FIRST], [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..4205eed4b0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.native_iceberg_compat/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] + WholeStageCodegen (7) + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WholeStageCodegen (6) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,ss_sales_price,ss_quantity] [sum,isEmpty,sum,isEmpty] + Expand [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] + Project [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_year,d_moy,d_qoy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_datafusion/explain.txt new file mode 100644 index 0000000000..6acd6c0e99 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_datafusion/explain.txt @@ -0,0 +1,248 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet spark_catalog.default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet spark_catalog.default.household_demographics (18) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet spark_catalog.default.customer_address (25) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet spark_catalog.default.customer (34) + +- ReusedExchange (40) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#9)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] + +(3) Filter [codegen id : 5] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_dom#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Condition : ((((isnotnull(d_dom#12) AND (d_dom#12 >= 1)) AND (d_dom#12 <= 2)) AND d_year#11 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(8) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Input [10]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9, d_date_sk#10] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#13, s_city#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] +Condition : (s_city#14 IN (Midway,Fairview) AND isnotnull(s_store_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#13] +Input [2]: [s_store_sk#13, s_city#14] + +(15) BroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#13] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#13] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Condition : (((hd_dep_count#16 = 4) OR (hd_vehicle_count#17 = 3)) AND isnotnull(hd_demo_sk#15)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#15] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#15] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 5] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#15] + +(25) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#18, ca_city#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#18, ca_city#19] + +(27) Filter [codegen id : 4] +Input [2]: [ca_address_sk#18, ca_city#19] +Condition : (isnotnull(ca_address_sk#18) AND isnotnull(ca_city#19)) + +(28) BroadcastExchange +Input [2]: [ca_address_sk#18, ca_city#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#18] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] +Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#18, ca_city#19] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(UnscaledValue(ss_ext_list_price#7)), partial_sum(UnscaledValue(ss_ext_tax#8))] +Aggregate Attributes [3]: [sum#20, sum#21, sum#22] +Results [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24, sum#25] + +(32) Exchange +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24, sum#25] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 8] +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24, sum#25] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(UnscaledValue(ss_ext_list_price#7)), sum(UnscaledValue(ss_ext_tax#8))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#26, sum(UnscaledValue(ss_ext_list_price#7))#27, sum(UnscaledValue(ss_ext_tax#8))#28] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#19 AS bought_city#29, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#26,17,2) AS extended_price#30, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#27,17,2) AS list_price#31, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#28,17,2) AS extended_tax#32] + +(34) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(36) Filter [codegen id : 6] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Condition : (isnotnull(c_customer_sk#33) AND isnotnull(c_current_addr_sk#34)) + +(37) BroadcastExchange +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#33] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 8] +Output [8]: [ss_ticket_number#5, bought_city#29, extended_price#30, list_price#31, extended_tax#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#29, extended_price#30, list_price#31, extended_tax#32, c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(40) ReusedExchange [Reuses operator id: 28] +Output [2]: [ca_address_sk#37, ca_city#38] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#34] +Right keys [1]: [ca_address_sk#37] +Join type: Inner +Join condition: NOT (ca_city#38 = bought_city#29) + +(42) Project [codegen id : 8] +Output [8]: [c_last_name#36, c_first_name#35, ca_city#38, bought_city#29, ss_ticket_number#5, extended_price#30, extended_tax#32, list_price#31] +Input [10]: [ss_ticket_number#5, bought_city#29, extended_price#30, list_price#31, extended_tax#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36, ca_address_sk#37, ca_city#38] + +(43) TakeOrderedAndProject +Input [8]: [c_last_name#36, c_first_name#35, ca_city#38, bought_city#29, ss_ticket_number#5, extended_price#30, extended_tax#32, list_price#31] +Arguments: 100, [c_last_name#36 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#36, c_first_name#35, ca_city#38, bought_city#29, ss_ticket_number#5, extended_price#30, extended_tax#32, list_price#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_datafusion/simplified.txt new file mode 100644 index 0000000000..84a31f6a2b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_datafusion/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_city,extended_price,extended_tax,list_price] + WholeStageCodegen (8) + Project [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,extended_price,extended_tax,list_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [ss_ticket_number,bought_city,extended_price,list_price,extended_tax,c_current_addr_sk,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_tax)),bought_city,extended_price,list_price,extended_tax,sum,sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen (5) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] [sum,sum,sum,sum,sum,sum] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ca_city] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..6acd6c0e99 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_iceberg_compat/explain.txt @@ -0,0 +1,248 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet spark_catalog.default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet spark_catalog.default.household_demographics (18) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet spark_catalog.default.customer_address (25) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet spark_catalog.default.customer (34) + +- ReusedExchange (40) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#9)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] + +(3) Filter [codegen id : 5] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_dom#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Condition : ((((isnotnull(d_dom#12) AND (d_dom#12 >= 1)) AND (d_dom#12 <= 2)) AND d_year#11 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(8) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Input [10]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9, d_date_sk#10] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#13, s_city#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] +Condition : (s_city#14 IN (Midway,Fairview) AND isnotnull(s_store_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#13] +Input [2]: [s_store_sk#13, s_city#14] + +(15) BroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#13] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#13] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Condition : (((hd_dep_count#16 = 4) OR (hd_vehicle_count#17 = 3)) AND isnotnull(hd_demo_sk#15)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#15] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#15] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 5] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#15] + +(25) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#18, ca_city#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#18, ca_city#19] + +(27) Filter [codegen id : 4] +Input [2]: [ca_address_sk#18, ca_city#19] +Condition : (isnotnull(ca_address_sk#18) AND isnotnull(ca_city#19)) + +(28) BroadcastExchange +Input [2]: [ca_address_sk#18, ca_city#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#18] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] +Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#18, ca_city#19] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#19] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(UnscaledValue(ss_ext_list_price#7)), partial_sum(UnscaledValue(ss_ext_tax#8))] +Aggregate Attributes [3]: [sum#20, sum#21, sum#22] +Results [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24, sum#25] + +(32) Exchange +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24, sum#25] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 8] +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24, sum#25] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(UnscaledValue(ss_ext_list_price#7)), sum(UnscaledValue(ss_ext_tax#8))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#26, sum(UnscaledValue(ss_ext_list_price#7))#27, sum(UnscaledValue(ss_ext_tax#8))#28] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#19 AS bought_city#29, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#26,17,2) AS extended_price#30, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#27,17,2) AS list_price#31, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#28,17,2) AS extended_tax#32] + +(34) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(36) Filter [codegen id : 6] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Condition : (isnotnull(c_customer_sk#33) AND isnotnull(c_current_addr_sk#34)) + +(37) BroadcastExchange +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#33] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 8] +Output [8]: [ss_ticket_number#5, bought_city#29, extended_price#30, list_price#31, extended_tax#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#29, extended_price#30, list_price#31, extended_tax#32, c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(40) ReusedExchange [Reuses operator id: 28] +Output [2]: [ca_address_sk#37, ca_city#38] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#34] +Right keys [1]: [ca_address_sk#37] +Join type: Inner +Join condition: NOT (ca_city#38 = bought_city#29) + +(42) Project [codegen id : 8] +Output [8]: [c_last_name#36, c_first_name#35, ca_city#38, bought_city#29, ss_ticket_number#5, extended_price#30, extended_tax#32, list_price#31] +Input [10]: [ss_ticket_number#5, bought_city#29, extended_price#30, list_price#31, extended_tax#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36, ca_address_sk#37, ca_city#38] + +(43) TakeOrderedAndProject +Input [8]: [c_last_name#36, c_first_name#35, ca_city#38, bought_city#29, ss_ticket_number#5, extended_price#30, extended_tax#32, list_price#31] +Arguments: 100, [c_last_name#36 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#36, c_first_name#35, ca_city#38, bought_city#29, ss_ticket_number#5, extended_price#30, extended_tax#32, list_price#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..84a31f6a2b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_city,extended_price,extended_tax,list_price] + WholeStageCodegen (8) + Project [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,extended_price,extended_tax,list_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [ss_ticket_number,bought_city,extended_price,list_price,extended_tax,c_current_addr_sk,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_tax)),bought_city,extended_price,list_price,extended_tax,sum,sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen (5) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] [sum,sum,sum,sum,sum,sum] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ca_city] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_datafusion/explain.txt new file mode 100644 index 0000000000..fe47588c7c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_datafusion/explain.txt @@ -0,0 +1,267 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (29) + : : +- * BroadcastHashJoin LeftAnti BuildRight (28) + : : :- * BroadcastHashJoin LeftAnti BuildRight (21) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : +- BroadcastExchange (10) + : : : : +- * Project (9) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : : +- BroadcastExchange (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * ColumnarToRow (23) + : : : +- Scan parquet spark_catalog.default.catalog_sales (22) + : : +- ReusedExchange (24) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet spark_catalog.default.customer_address (30) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.customer_demographics (37) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2001)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 6)) AND isnotnull(d_date_sk#6)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(10) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#4] +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#4] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#10)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#11] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#9] +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] + +(20) BroadcastExchange +Input [1]: [ws_bill_customer_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#9] +Join type: LeftAnti +Join condition: None + +(22) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#13)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] + +(24) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#14] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#12] +Input [3]: [cs_ship_customer_sk#12, cs_sold_date_sk#13, d_date_sk#14] + +(27) BroadcastExchange +Input [1]: [cs_ship_customer_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_ship_customer_sk#12] +Join type: LeftAnti +Join condition: None + +(29) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(30) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#15, ca_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [GA,KY,NM]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#15, ca_state#16] + +(32) Filter [codegen id : 7] +Input [2]: [ca_address_sk#15, ca_state#16] +Condition : (ca_state#16 IN (KY,GA,NM) AND isnotnull(ca_address_sk#15)) + +(33) Project [codegen id : 7] +Output [1]: [ca_address_sk#15] +Input [2]: [ca_address_sk#15, ca_state#16] + +(34) BroadcastExchange +Input [1]: [ca_address_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#15] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#15] + +(37) Scan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] + +(39) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Condition : isnotnull(cd_demo_sk#17) + +(40) BroadcastExchange +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 9] +Output [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] + +(43) HashAggregate [codegen id : 9] +Input [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Keys [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [6]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, count#24] + +(44) Exchange +Input [6]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, count#24] +Arguments: hashpartitioning(cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 10] +Input [6]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, count#24] +Keys [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, count(1)#25 AS cnt1#26, cd_purchase_estimate#21, count(1)#25 AS cnt2#27, cd_credit_rating#22, count(1)#25 AS cnt3#28] + +(46) TakeOrderedAndProject +Input [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cnt1#26, cd_purchase_estimate#21, cnt2#27, cd_credit_rating#22, cnt3#28] +Arguments: 100, [cd_gender#18 ASC NULLS FIRST, cd_marital_status#19 ASC NULLS FIRST, cd_education_status#20 ASC NULLS FIRST, cd_purchase_estimate#21 ASC NULLS FIRST, cd_credit_rating#22 ASC NULLS FIRST], [cd_gender#18, cd_marital_status#19, cd_education_status#20, cnt1#26, cd_purchase_estimate#21, cnt2#27, cd_credit_rating#22, cnt3#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_datafusion/simplified.txt new file mode 100644 index 0000000000..3ca9dff655 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_datafusion/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cnt1,cnt2,cnt3] + WholeStageCodegen (10) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,count] [count(1),cnt1,cnt2,cnt3,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] #1 + WholeStageCodegen (9) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..fe47588c7c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_iceberg_compat/explain.txt @@ -0,0 +1,267 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (29) + : : +- * BroadcastHashJoin LeftAnti BuildRight (28) + : : :- * BroadcastHashJoin LeftAnti BuildRight (21) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : +- BroadcastExchange (10) + : : : : +- * Project (9) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : : +- BroadcastExchange (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * ColumnarToRow (23) + : : : +- Scan parquet spark_catalog.default.catalog_sales (22) + : : +- ReusedExchange (24) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet spark_catalog.default.customer_address (30) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.customer_demographics (37) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2001)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 6)) AND isnotnull(d_date_sk#6)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(10) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#4] +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#4] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#10)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#11] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#9] +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] + +(20) BroadcastExchange +Input [1]: [ws_bill_customer_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#9] +Join type: LeftAnti +Join condition: None + +(22) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#13)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] + +(24) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#14] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#12] +Input [3]: [cs_ship_customer_sk#12, cs_sold_date_sk#13, d_date_sk#14] + +(27) BroadcastExchange +Input [1]: [cs_ship_customer_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_ship_customer_sk#12] +Join type: LeftAnti +Join condition: None + +(29) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(30) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#15, ca_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [GA,KY,NM]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#15, ca_state#16] + +(32) Filter [codegen id : 7] +Input [2]: [ca_address_sk#15, ca_state#16] +Condition : (ca_state#16 IN (KY,GA,NM) AND isnotnull(ca_address_sk#15)) + +(33) Project [codegen id : 7] +Output [1]: [ca_address_sk#15] +Input [2]: [ca_address_sk#15, ca_state#16] + +(34) BroadcastExchange +Input [1]: [ca_address_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#15] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#15] + +(37) Scan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] + +(39) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Condition : isnotnull(cd_demo_sk#17) + +(40) BroadcastExchange +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 9] +Output [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] + +(43) HashAggregate [codegen id : 9] +Input [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Keys [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [6]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, count#24] + +(44) Exchange +Input [6]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, count#24] +Arguments: hashpartitioning(cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 10] +Input [6]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22, count#24] +Keys [5]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cd_purchase_estimate#21, cd_credit_rating#22] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, count(1)#25 AS cnt1#26, cd_purchase_estimate#21, count(1)#25 AS cnt2#27, cd_credit_rating#22, count(1)#25 AS cnt3#28] + +(46) TakeOrderedAndProject +Input [8]: [cd_gender#18, cd_marital_status#19, cd_education_status#20, cnt1#26, cd_purchase_estimate#21, cnt2#27, cd_credit_rating#22, cnt3#28] +Arguments: 100, [cd_gender#18 ASC NULLS FIRST, cd_marital_status#19 ASC NULLS FIRST, cd_education_status#20 ASC NULLS FIRST, cd_purchase_estimate#21 ASC NULLS FIRST, cd_credit_rating#22 ASC NULLS FIRST], [cd_gender#18, cd_marital_status#19, cd_education_status#20, cnt1#26, cd_purchase_estimate#21, cnt2#27, cd_credit_rating#22, cnt3#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..3ca9dff655 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.native_iceberg_compat/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cnt1,cnt2,cnt3] + WholeStageCodegen (10) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,count] [count(1),cnt1,cnt2,cnt3,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] #1 + WholeStageCodegen (9) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_datafusion/explain.txt new file mode 100644 index 0000000000..5c2f1ed7e5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_datafusion/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.item (18) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet spark_catalog.default.promotion (24) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] + +(18) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_id#16] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) + +(21) BroadcastExchange +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#15, i_item_id#16] + +(24) Scan parquet spark_catalog.default.promotion +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(26) Filter [codegen id : 4] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) + +(27) Project [codegen id : 4] +Output [1]: [p_promo_sk#17] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(28) BroadcastExchange +Input [1]: [p_promo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_promo_sk#3] +Right keys [1]: [p_promo_sk#17] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Input [7]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16, p_promo_sk#17] + +(31) HashAggregate [codegen id : 5] +Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] +Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] + +(32) Exchange +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#16] +Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [4]: [avg(ss_quantity#4)#36, avg(UnscaledValue(ss_list_price#5))#37, avg(UnscaledValue(ss_coupon_amt#7))#38, avg(UnscaledValue(ss_sales_price#6))#39] +Results [5]: [i_item_id#16, avg(ss_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(ss_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(ss_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(ss_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8410ad41e8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_datafusion/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + WholeStageCodegen (6) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(ss_quantity),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..5c2f1ed7e5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_iceberg_compat/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.item (18) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet spark_catalog.default.promotion (24) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2000)) AND isnotnull(d_date_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] + +(18) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_id#16] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) + +(21) BroadcastExchange +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#15, i_item_id#16] + +(24) Scan parquet spark_catalog.default.promotion +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(26) Filter [codegen id : 4] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) + +(27) Project [codegen id : 4] +Output [1]: [p_promo_sk#17] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] + +(28) BroadcastExchange +Input [1]: [p_promo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_promo_sk#3] +Right keys [1]: [p_promo_sk#17] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Input [7]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16, p_promo_sk#17] + +(31) HashAggregate [codegen id : 5] +Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] +Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] + +(32) Exchange +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#16] +Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [4]: [avg(ss_quantity#4)#36, avg(UnscaledValue(ss_list_price#5))#37, avg(UnscaledValue(ss_coupon_amt#7))#38, avg(UnscaledValue(ss_sales_price#6))#39] +Results [5]: [i_item_id#16, avg(ss_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(ss_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(ss_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(ss_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8410ad41e8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.native_iceberg_compat/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + WholeStageCodegen (6) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(ss_quantity),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_datafusion/explain.txt new file mode 100644 index 0000000000..1cb84c5726 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_datafusion/explain.txt @@ -0,0 +1,266 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * Project (45) + +- Window (44) + +- * Sort (43) + +- Exchange (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * Expand (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.date_dim (4) + +- BroadcastExchange (35) + +- * BroadcastHashJoin LeftSemi BuildRight (34) + :- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.store (11) + +- BroadcastExchange (33) + +- * Project (32) + +- * Filter (31) + +- Window (30) + +- * Sort (29) + +- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet spark_catalog.default.store_sales (14) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.store (17) + +- ReusedExchange (23) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 8] +Output [2]: [ss_store_sk#1, ss_net_profit#2] +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#6, s_county#7, s_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 7] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] + +(13) Filter [codegen id : 7] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Condition : isnotnull(s_store_sk#6) + +(14) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#11)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] + +(16) Filter [codegen id : 4] +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_store_sk#9) + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#12, s_state#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_state#13] + +(19) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_state#13] +Condition : isnotnull(s_store_sk#12) + +(20) BroadcastExchange +Input [2]: [s_store_sk#12, s_state#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#9] +Right keys [1]: [s_store_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] +Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#14] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output [2]: [ss_net_profit#10, s_state#13] +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] + +(26) HashAggregate [codegen id : 4] +Input [2]: [ss_net_profit#10, s_state#13] +Keys [1]: [s_state#13] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [s_state#13, sum#16] + +(27) Exchange +Input [2]: [s_state#13, sum#16] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(28) HashAggregate [codegen id : 5] +Input [2]: [s_state#13, sum#16] +Keys [1]: [s_state#13] +Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17] +Results [3]: [s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w0#18, s_state#13] + +(29) Sort [codegen id : 5] +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [s_state#13 ASC NULLS FIRST, _w0#18 DESC NULLS LAST], false, 0 + +(30) Window +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [rank(_w0#18) windowspecdefinition(s_state#13, _w0#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w0#18 DESC NULLS LAST] + +(31) Filter [codegen id : 6] +Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19] +Condition : (ranking#19 <= 5) + +(32) Project [codegen id : 6] +Output [1]: [s_state#13] +Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19] + +(33) BroadcastExchange +Input [1]: [s_state#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] + +(34) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [s_state#8] +Right keys [1]: [s_state#13] +Join type: LeftSemi +Join condition: None + +(35) BroadcastExchange +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(36) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#6] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 8] +Output [3]: [ss_net_profit#2, s_state#8, s_county#7] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] + +(38) Expand [codegen id : 8] +Input [3]: [ss_net_profit#2, s_state#8, s_county#7] +Arguments: [[ss_net_profit#2, s_state#8, s_county#7, 0], [ss_net_profit#2, s_state#8, null, 1], [ss_net_profit#2, null, null, 3]], [ss_net_profit#2, s_state#20, s_county#21, spark_grouping_id#22] + +(39) HashAggregate [codegen id : 8] +Input [4]: [ss_net_profit#2, s_state#20, s_county#21, spark_grouping_id#22] +Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#23] +Results [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] + +(40) Exchange +Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] +Arguments: hashpartitioning(s_state#20, s_county#21, spark_grouping_id#22, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(41) HashAggregate [codegen id : 9] +Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] +Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#25] +Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS total_sum#26, s_state#20, s_county#21, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS lochierarchy#27, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS _w0#28, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS _w1#29, CASE WHEN (cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint) = 0) THEN s_state#20 END AS _w2#30] + +(42) Exchange +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30] +Arguments: hashpartitioning(_w1#29, _w2#30, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(43) Sort [codegen id : 10] +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30] +Arguments: [_w1#29 ASC NULLS FIRST, _w2#30 ASC NULLS FIRST, _w0#28 DESC NULLS LAST], false, 0 + +(44) Window +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30] +Arguments: [rank(_w0#28) windowspecdefinition(_w1#29, _w2#30, _w0#28 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#29, _w2#30], [_w0#28 DESC NULLS LAST] + +(45) Project [codegen id : 11] +Output [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] +Input [8]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30, rank_within_parent#31] + +(46) TakeOrderedAndProject +Input [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] +Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (lochierarchy#27 = 0) THEN s_state#20 END ASC NULLS FIRST, rank_within_parent#31 ASC NULLS FIRST], [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a98d640eee --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_datafusion/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] + WholeStageCodegen (11) + Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (10) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (9) + HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w0,_w1,_w2,sum] + InputAdapter + Exchange [s_state,s_county,spark_grouping_id] #2 + WholeStageCodegen (8) + HashAggregate [s_state,s_county,spark_grouping_id,ss_net_profit] [sum,sum] + Expand [ss_net_profit,s_state,s_county] + Project [ss_net_profit,s_state,s_county] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + BroadcastHashJoin [s_state,s_state] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w0,s_state] + WholeStageCodegen (5) + Sort [s_state,_w0] + HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),_w0,s_state,sum] + InputAdapter + Exchange [s_state] #6 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_state] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_net_profit,ss_sold_date_sk,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..1cb84c5726 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_iceberg_compat/explain.txt @@ -0,0 +1,266 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * Project (45) + +- Window (44) + +- * Sort (43) + +- Exchange (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * Expand (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.date_dim (4) + +- BroadcastExchange (35) + +- * BroadcastHashJoin LeftSemi BuildRight (34) + :- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.store (11) + +- BroadcastExchange (33) + +- * Project (32) + +- * Filter (31) + +- Window (30) + +- * Sort (29) + +- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet spark_catalog.default.store_sales (14) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.store (17) + +- ReusedExchange (23) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 8] +Output [2]: [ss_store_sk#1, ss_net_profit#2] +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#6, s_county#7, s_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 7] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] + +(13) Filter [codegen id : 7] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Condition : isnotnull(s_store_sk#6) + +(14) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#11)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] + +(16) Filter [codegen id : 4] +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_store_sk#9) + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#12, s_state#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_state#13] + +(19) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_state#13] +Condition : isnotnull(s_store_sk#12) + +(20) BroadcastExchange +Input [2]: [s_store_sk#12, s_state#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#9] +Right keys [1]: [s_store_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] +Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#14] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output [2]: [ss_net_profit#10, s_state#13] +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] + +(26) HashAggregate [codegen id : 4] +Input [2]: [ss_net_profit#10, s_state#13] +Keys [1]: [s_state#13] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [s_state#13, sum#16] + +(27) Exchange +Input [2]: [s_state#13, sum#16] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(28) HashAggregate [codegen id : 5] +Input [2]: [s_state#13, sum#16] +Keys [1]: [s_state#13] +Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17] +Results [3]: [s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w0#18, s_state#13] + +(29) Sort [codegen id : 5] +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [s_state#13 ASC NULLS FIRST, _w0#18 DESC NULLS LAST], false, 0 + +(30) Window +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [rank(_w0#18) windowspecdefinition(s_state#13, _w0#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w0#18 DESC NULLS LAST] + +(31) Filter [codegen id : 6] +Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19] +Condition : (ranking#19 <= 5) + +(32) Project [codegen id : 6] +Output [1]: [s_state#13] +Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19] + +(33) BroadcastExchange +Input [1]: [s_state#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] + +(34) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [s_state#8] +Right keys [1]: [s_state#13] +Join type: LeftSemi +Join condition: None + +(35) BroadcastExchange +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(36) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#6] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 8] +Output [3]: [ss_net_profit#2, s_state#8, s_county#7] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] + +(38) Expand [codegen id : 8] +Input [3]: [ss_net_profit#2, s_state#8, s_county#7] +Arguments: [[ss_net_profit#2, s_state#8, s_county#7, 0], [ss_net_profit#2, s_state#8, null, 1], [ss_net_profit#2, null, null, 3]], [ss_net_profit#2, s_state#20, s_county#21, spark_grouping_id#22] + +(39) HashAggregate [codegen id : 8] +Input [4]: [ss_net_profit#2, s_state#20, s_county#21, spark_grouping_id#22] +Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#23] +Results [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] + +(40) Exchange +Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] +Arguments: hashpartitioning(s_state#20, s_county#21, spark_grouping_id#22, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(41) HashAggregate [codegen id : 9] +Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] +Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#25] +Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS total_sum#26, s_state#20, s_county#21, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS lochierarchy#27, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS _w0#28, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS _w1#29, CASE WHEN (cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint) = 0) THEN s_state#20 END AS _w2#30] + +(42) Exchange +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30] +Arguments: hashpartitioning(_w1#29, _w2#30, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(43) Sort [codegen id : 10] +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30] +Arguments: [_w1#29 ASC NULLS FIRST, _w2#30 ASC NULLS FIRST, _w0#28 DESC NULLS LAST], false, 0 + +(44) Window +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30] +Arguments: [rank(_w0#28) windowspecdefinition(_w1#29, _w2#30, _w0#28 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#29, _w2#30], [_w0#28 DESC NULLS LAST] + +(45) Project [codegen id : 11] +Output [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] +Input [8]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30, rank_within_parent#31] + +(46) TakeOrderedAndProject +Input [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] +Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (lochierarchy#27 = 0) THEN s_state#20 END ASC NULLS FIRST, rank_within_parent#31 ASC NULLS FIRST], [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a98d640eee --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.native_iceberg_compat/simplified.txt @@ -0,0 +1,71 @@ +TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] + WholeStageCodegen (11) + Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (10) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (9) + HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w0,_w1,_w2,sum] + InputAdapter + Exchange [s_state,s_county,spark_grouping_id] #2 + WholeStageCodegen (8) + HashAggregate [s_state,s_county,spark_grouping_id,ss_net_profit] [sum,sum] + Expand [ss_net_profit,s_state,s_county] + Project [ss_net_profit,s_state,s_county] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + BroadcastHashJoin [s_state,s_state] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w0,s_state] + WholeStageCodegen (5) + Sort [s_state,_w0] + HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),_w0,s_state,sum] + InputAdapter + Exchange [s_state] #6 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_state] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_net_profit,ss_sold_date_sk,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_datafusion/explain.txt new file mode 100644 index 0000000000..1d8a3199e5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_datafusion/explain.txt @@ -0,0 +1,240 @@ +== Physical Plan == +* Sort (42) ++- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildLeft (29) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.item (1) + : +- Union (28) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.web_sales (6) + : : +- BroadcastExchange (13) + : : +- * Project (12) + : : +- * Filter (11) + : : +- * ColumnarToRow (10) + : : +- Scan parquet spark_catalog.default.date_dim (9) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet spark_catalog.default.catalog_sales (16) + : : +- ReusedExchange (19) + : +- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet spark_catalog.default.store_sales (22) + : +- ReusedExchange (25) + +- BroadcastExchange (35) + +- * Project (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet spark_catalog.default.time_dim (31) + + +(1) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(3) Filter [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Condition : ((isnotnull(i_manager_id#4) AND (i_manager_id#4 = 1)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(5) BroadcastExchange +Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(6) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#8)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] + +(8) Filter [codegen id : 3] +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Condition : (isnotnull(ws_item_sk#6) AND isnotnull(ws_sold_time_sk#5)) + +(9) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(11) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(12) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(13) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [3]: [ws_ext_sales_price#7 AS ext_price#12, ws_item_sk#6 AS sold_item_sk#13, ws_sold_time_sk#5 AS time_sk#14] +Input [5]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8, d_date_sk#9] + +(16) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#18)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 5] +Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] + +(18) Filter [codegen id : 5] +Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_sold_time_sk#15)) + +(19) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#19] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#19] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 5] +Output [3]: [cs_ext_sales_price#17 AS ext_price#20, cs_item_sk#16 AS sold_item_sk#21, cs_sold_time_sk#15 AS time_sk#22] +Input [5]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] + +(22) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#26)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 7] +Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] + +(24) Filter [codegen id : 7] +Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_time_sk#23)) + +(25) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#27] + +(26) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#26] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 7] +Output [3]: [ss_ext_sales_price#25 AS ext_price#28, ss_item_sk#24 AS sold_item_sk#29, ss_sold_time_sk#23 AS time_sk#30] +Input [5]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26, d_date_sk#27] + +(28) Union + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [sold_item_sk#13] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 9] +Output [4]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14] +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#12, sold_item_sk#13, time_sk#14] + +(31) Scan parquet spark_catalog.default.time_dim +Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [Or(EqualTo(t_meal_time,breakfast ),EqualTo(t_meal_time,dinner )), IsNotNull(t_time_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 8] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(33) Filter [codegen id : 8] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Condition : (((t_meal_time#34 = breakfast ) OR (t_meal_time#34 = dinner )) AND isnotnull(t_time_sk#31)) + +(34) Project [codegen id : 8] +Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(35) BroadcastExchange +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [time_sk#14] +Right keys [1]: [t_time_sk#31] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 9] +Output [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] +Input [7]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14, t_time_sk#31, t_hour#32, t_minute#33] + +(38) HashAggregate [codegen id : 9] +Input [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [partial_sum(UnscaledValue(ext_price#12))] +Aggregate Attributes [1]: [sum#35] +Results [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#36] + +(39) Exchange +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#36] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 10] +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#36] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [sum(UnscaledValue(ext_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#12))#37] +Results [5]: [i_brand_id#2 AS brand_id#38, i_brand#3 AS brand#39, t_hour#32, t_minute#33, MakeDecimal(sum(UnscaledValue(ext_price#12))#37,17,2) AS ext_price#40] + +(41) Exchange +Input [5]: [brand_id#38, brand#39, t_hour#32, t_minute#33, ext_price#40] +Arguments: rangepartitioning(ext_price#40 DESC NULLS LAST, brand_id#38 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(42) Sort [codegen id : 11] +Input [5]: [brand_id#38, brand#39, t_hour#32, t_minute#33, ext_price#40] +Arguments: [ext_price#40 DESC NULLS LAST, brand_id#38 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_datafusion/simplified.txt new file mode 100644 index 0000000000..bdc48db383 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_datafusion/simplified.txt @@ -0,0 +1,65 @@ +WholeStageCodegen (11) + Sort [ext_price,brand_id] + InputAdapter + Exchange [ext_price,brand_id] #1 + WholeStageCodegen (10) + HashAggregate [i_brand,i_brand_id,t_hour,t_minute,sum] [sum(UnscaledValue(ext_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 + WholeStageCodegen (9) + HashAggregate [i_brand,i_brand_id,t_hour,t_minute,ext_price] [sum,sum] + Project [i_brand_id,i_brand,ext_price,t_hour,t_minute] + BroadcastHashJoin [time_sk,t_time_sk] + Project [i_brand_id,i_brand,ext_price,time_sk] + BroadcastHashJoin [i_item_sk,sold_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] + InputAdapter + Union + WholeStageCodegen (3) + Project [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk,ws_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (5) + Project [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (7) + Project [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk,ss_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Project [t_time_sk,t_hour,t_minute] + Filter [t_meal_time,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..1d8a3199e5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_iceberg_compat/explain.txt @@ -0,0 +1,240 @@ +== Physical Plan == +* Sort (42) ++- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildLeft (29) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.item (1) + : +- Union (28) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.web_sales (6) + : : +- BroadcastExchange (13) + : : +- * Project (12) + : : +- * Filter (11) + : : +- * ColumnarToRow (10) + : : +- Scan parquet spark_catalog.default.date_dim (9) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet spark_catalog.default.catalog_sales (16) + : : +- ReusedExchange (19) + : +- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet spark_catalog.default.store_sales (22) + : +- ReusedExchange (25) + +- BroadcastExchange (35) + +- * Project (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet spark_catalog.default.time_dim (31) + + +(1) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(3) Filter [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Condition : ((isnotnull(i_manager_id#4) AND (i_manager_id#4 = 1)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(5) BroadcastExchange +Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(6) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#8)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] + +(8) Filter [codegen id : 3] +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Condition : (isnotnull(ws_item_sk#6) AND isnotnull(ws_sold_time_sk#5)) + +(9) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(11) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(12) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(13) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [3]: [ws_ext_sales_price#7 AS ext_price#12, ws_item_sk#6 AS sold_item_sk#13, ws_sold_time_sk#5 AS time_sk#14] +Input [5]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8, d_date_sk#9] + +(16) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#18)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 5] +Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] + +(18) Filter [codegen id : 5] +Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_sold_time_sk#15)) + +(19) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#19] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#18] +Right keys [1]: [d_date_sk#19] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 5] +Output [3]: [cs_ext_sales_price#17 AS ext_price#20, cs_item_sk#16 AS sold_item_sk#21, cs_sold_time_sk#15 AS time_sk#22] +Input [5]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] + +(22) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#26)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 7] +Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] + +(24) Filter [codegen id : 7] +Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_time_sk#23)) + +(25) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#27] + +(26) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#26] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 7] +Output [3]: [ss_ext_sales_price#25 AS ext_price#28, ss_item_sk#24 AS sold_item_sk#29, ss_sold_time_sk#23 AS time_sk#30] +Input [5]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26, d_date_sk#27] + +(28) Union + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [sold_item_sk#13] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 9] +Output [4]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14] +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#12, sold_item_sk#13, time_sk#14] + +(31) Scan parquet spark_catalog.default.time_dim +Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [Or(EqualTo(t_meal_time,breakfast ),EqualTo(t_meal_time,dinner )), IsNotNull(t_time_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 8] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(33) Filter [codegen id : 8] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Condition : (((t_meal_time#34 = breakfast ) OR (t_meal_time#34 = dinner )) AND isnotnull(t_time_sk#31)) + +(34) Project [codegen id : 8] +Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(35) BroadcastExchange +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [time_sk#14] +Right keys [1]: [t_time_sk#31] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 9] +Output [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] +Input [7]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14, t_time_sk#31, t_hour#32, t_minute#33] + +(38) HashAggregate [codegen id : 9] +Input [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [partial_sum(UnscaledValue(ext_price#12))] +Aggregate Attributes [1]: [sum#35] +Results [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#36] + +(39) Exchange +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#36] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(40) HashAggregate [codegen id : 10] +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#36] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [sum(UnscaledValue(ext_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#12))#37] +Results [5]: [i_brand_id#2 AS brand_id#38, i_brand#3 AS brand#39, t_hour#32, t_minute#33, MakeDecimal(sum(UnscaledValue(ext_price#12))#37,17,2) AS ext_price#40] + +(41) Exchange +Input [5]: [brand_id#38, brand#39, t_hour#32, t_minute#33, ext_price#40] +Arguments: rangepartitioning(ext_price#40 DESC NULLS LAST, brand_id#38 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(42) Sort [codegen id : 11] +Input [5]: [brand_id#38, brand#39, t_hour#32, t_minute#33, ext_price#40] +Arguments: [ext_price#40 DESC NULLS LAST, brand_id#38 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bdc48db383 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.native_iceberg_compat/simplified.txt @@ -0,0 +1,65 @@ +WholeStageCodegen (11) + Sort [ext_price,brand_id] + InputAdapter + Exchange [ext_price,brand_id] #1 + WholeStageCodegen (10) + HashAggregate [i_brand,i_brand_id,t_hour,t_minute,sum] [sum(UnscaledValue(ext_price)),brand_id,brand,ext_price,sum] + InputAdapter + Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 + WholeStageCodegen (9) + HashAggregate [i_brand,i_brand_id,t_hour,t_minute,ext_price] [sum,sum] + Project [i_brand_id,i_brand,ext_price,t_hour,t_minute] + BroadcastHashJoin [time_sk,t_time_sk] + Project [i_brand_id,i_brand,ext_price,time_sk] + BroadcastHashJoin [i_item_sk,sold_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] + InputAdapter + Union + WholeStageCodegen (3) + Project [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk,ws_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (5) + Project [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk,cs_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (7) + Project [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk,ss_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Project [t_time_sk,t_hour,t_minute] + Filter [t_meal_time,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_datafusion/explain.txt new file mode 100644 index 0000000000..e15d9df434 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_datafusion/explain.txt @@ -0,0 +1,423 @@ +== Physical Plan == +TakeOrderedAndProject (74) ++- * HashAggregate (73) + +- Exchange (72) + +- * HashAggregate (71) + +- * Project (70) + +- * SortMergeJoin LeftOuter (69) + :- * Sort (62) + : +- Exchange (61) + : +- * Project (60) + : +- * BroadcastHashJoin LeftOuter BuildRight (59) + : :- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Project (35) + : : : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : : : :- * Project (28) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : : :- * Project (21) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : : :- * Project (15) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : : : :- * Project (9) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : : : : :- * Filter (3) + : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : : : : : : +- BroadcastExchange (7) + : : : : : : : : : +- * Filter (6) + : : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : : +- Scan parquet spark_catalog.default.inventory (4) + : : : : : : : : +- BroadcastExchange (13) + : : : : : : : : +- * Filter (12) + : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : +- Scan parquet spark_catalog.default.warehouse (10) + : : : : : : : +- BroadcastExchange (19) + : : : : : : : +- * Filter (18) + : : : : : : : +- * ColumnarToRow (17) + : : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : : +- BroadcastExchange (26) + : : : : : : +- * Project (25) + : : : : : : +- * Filter (24) + : : : : : : +- * ColumnarToRow (23) + : : : : : : +- Scan parquet spark_catalog.default.customer_demographics (22) + : : : : : +- BroadcastExchange (33) + : : : : : +- * Project (32) + : : : : : +- * Filter (31) + : : : : : +- * ColumnarToRow (30) + : : : : : +- Scan parquet spark_catalog.default.household_demographics (29) + : : : : +- BroadcastExchange (40) + : : : : +- * Project (39) + : : : : +- * Filter (38) + : : : : +- * ColumnarToRow (37) + : : : : +- Scan parquet spark_catalog.default.date_dim (36) + : : : +- BroadcastExchange (46) + : : : +- * Filter (45) + : : : +- * ColumnarToRow (44) + : : : +- Scan parquet spark_catalog.default.date_dim (43) + : : +- BroadcastExchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet spark_catalog.default.date_dim (49) + : +- BroadcastExchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet spark_catalog.default.promotion (55) + +- * Sort (68) + +- Exchange (67) + +- * Project (66) + +- * Filter (65) + +- * ColumnarToRow (64) + +- Scan parquet spark_catalog.default.catalog_returns (63) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 10] +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] + +(3) Filter [codegen id : 10] +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1)) + +(4) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#12)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(6) Filter [codegen id : 1] +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Condition : ((isnotnull(inv_quantity_on_hand#11) AND isnotnull(inv_item_sk#9)) AND isnotnull(inv_warehouse_sk#10)) + +(7) BroadcastExchange +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [inv_item_sk#9] +Join type: Inner +Join condition: (inv_quantity_on_hand#11 < cs_quantity#7) + +(9) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] +Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(10) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Condition : isnotnull(w_warehouse_sk#13) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [inv_warehouse_sk#10] +Right keys [1]: [w_warehouse_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12, w_warehouse_sk#13, w_warehouse_name#14] + +(16) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_desc#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_desc#16] + +(18) Filter [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_desc#16] +Condition : isnotnull(i_item_sk#15) + +(19) BroadcastExchange +Input [2]: [i_item_sk#15, i_item_desc#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 10] +Output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_sk#15, i_item_desc#16] + +(22) Scan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#17, cd_marital_status#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] + +(24) Filter [codegen id : 4] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Condition : ((isnotnull(cd_marital_status#18) AND (cd_marital_status#18 = D)) AND isnotnull(cd_demo_sk#17)) + +(25) Project [codegen id : 4] +Output [1]: [cd_demo_sk#17] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] + +(26) BroadcastExchange +Input [1]: [cd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, cd_demo_sk#17] + +(29) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#19, hd_buy_potential#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000 ), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] + +(31) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Condition : ((isnotnull(hd_buy_potential#20) AND (hd_buy_potential#20 = >10000 )) AND isnotnull(hd_demo_sk#19)) + +(32) Project [codegen id : 5] +Output [1]: [hd_demo_sk#19] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] + +(33) BroadcastExchange +Input [1]: [hd_demo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_bill_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#19] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 10] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, hd_demo_sk#19] + +(36) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(38) Filter [codegen id : 6] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Condition : ((((isnotnull(d_year#24) AND (d_year#24 = 1999)) AND isnotnull(d_date_sk#21)) AND isnotnull(d_week_seq#23)) AND isnotnull(d_date#22)) + +(39) Project [codegen id : 6] +Output [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(40) BroadcastExchange +Input [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#8] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date_sk#21, d_date#22, d_week_seq#23] + +(43) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#25, d_week_seq#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#25, d_week_seq#26] + +(45) Filter [codegen id : 7] +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) + +(46) BroadcastExchange +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [d_week_seq#23, inv_date_sk#12] +Right keys [2]: [d_week_seq#26, d_date_sk#25] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#25, d_week_seq#26] + +(49) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#27, d_date#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#27, d_date#28] + +(51) Filter [codegen id : 8] +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) + +(52) BroadcastExchange +Input [2]: [d_date_sk#27, d_date#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(53) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: (d_date#28 > date_add(d_date#22, 5)) + +(54) Project [codegen id : 10] +Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#27, d_date#28] + +(55) Scan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 9] +Input [1]: [p_promo_sk#29] + +(57) Filter [codegen id : 9] +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) + +(58) BroadcastExchange +Input [1]: [p_promo_sk#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(59) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_promo_sk#5] +Right keys [1]: [p_promo_sk#29] +Join type: LeftOuter +Join condition: None + +(60) Project [codegen id : 10] +Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, p_promo_sk#29] + +(61) Exchange +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(62) Sort [codegen id : 11] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0 + +(63) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(64) ColumnarToRow [codegen id : 12] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(65) Filter [codegen id : 12] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) + +(66) Project [codegen id : 12] +Output [2]: [cr_item_sk#30, cr_order_number#31] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(67) Exchange +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(68) Sort [codegen id : 13] +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST], false, 0 + +(69) SortMergeJoin [codegen id : 14] +Left keys [2]: [cs_item_sk#4, cs_order_number#6] +Right keys [2]: [cr_item_sk#30, cr_order_number#31] +Join type: LeftOuter +Join condition: None + +(70) Project [codegen id : 14] +Output [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, cr_item_sk#30, cr_order_number#31] + +(71) HashAggregate [codegen id : 14] +Input [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#33] +Results [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] + +(72) Exchange +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] +Arguments: hashpartitioning(i_item_desc#16, w_warehouse_name#14, d_week_seq#23, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(73) HashAggregate [codegen id : 15] +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#35] +Results [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count(1)#35 AS no_promo#36, count(1)#35 AS promo#37, count(1)#35 AS total_cnt#38] + +(74) TakeOrderedAndProject +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#36, promo#37, total_cnt#38] +Arguments: 100, [total_cnt#38 DESC NULLS LAST, i_item_desc#16 ASC NULLS FIRST, w_warehouse_name#14 ASC NULLS FIRST, d_week_seq#23 ASC NULLS FIRST], [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#36, promo#37, total_cnt#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_datafusion/simplified.txt new file mode 100644 index 0000000000..37b3b330ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_datafusion/simplified.txt @@ -0,0 +1,114 @@ +TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo] + WholeStageCodegen (15) + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] [count(1),no_promo,promo,total_cnt,count] + InputAdapter + Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + WholeStageCodegen (14) + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq] [count,count] + Project [w_warehouse_name,i_item_desc,d_week_seq] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (11) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #2 + WholeStageCodegen (10) + Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] + BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Project [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk] + BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] + Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [cd_demo_sk] + Filter [cd_marital_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [d_date_sk,d_date,d_week_seq] + Filter [d_year,d_date_sk,d_week_seq,d_date] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + Filter [d_week_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (9) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk] + InputAdapter + WholeStageCodegen (13) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #12 + WholeStageCodegen (12) + Project [cr_item_sk,cr_order_number] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..e15d9df434 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_iceberg_compat/explain.txt @@ -0,0 +1,423 @@ +== Physical Plan == +TakeOrderedAndProject (74) ++- * HashAggregate (73) + +- Exchange (72) + +- * HashAggregate (71) + +- * Project (70) + +- * SortMergeJoin LeftOuter (69) + :- * Sort (62) + : +- Exchange (61) + : +- * Project (60) + : +- * BroadcastHashJoin LeftOuter BuildRight (59) + : :- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Project (35) + : : : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : : : :- * Project (28) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : : :- * Project (21) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : : :- * Project (15) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : : : :- * Project (9) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : : : : :- * Filter (3) + : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : : : : : : +- BroadcastExchange (7) + : : : : : : : : : +- * Filter (6) + : : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : : +- Scan parquet spark_catalog.default.inventory (4) + : : : : : : : : +- BroadcastExchange (13) + : : : : : : : : +- * Filter (12) + : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : +- Scan parquet spark_catalog.default.warehouse (10) + : : : : : : : +- BroadcastExchange (19) + : : : : : : : +- * Filter (18) + : : : : : : : +- * ColumnarToRow (17) + : : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : : +- BroadcastExchange (26) + : : : : : : +- * Project (25) + : : : : : : +- * Filter (24) + : : : : : : +- * ColumnarToRow (23) + : : : : : : +- Scan parquet spark_catalog.default.customer_demographics (22) + : : : : : +- BroadcastExchange (33) + : : : : : +- * Project (32) + : : : : : +- * Filter (31) + : : : : : +- * ColumnarToRow (30) + : : : : : +- Scan parquet spark_catalog.default.household_demographics (29) + : : : : +- BroadcastExchange (40) + : : : : +- * Project (39) + : : : : +- * Filter (38) + : : : : +- * ColumnarToRow (37) + : : : : +- Scan parquet spark_catalog.default.date_dim (36) + : : : +- BroadcastExchange (46) + : : : +- * Filter (45) + : : : +- * ColumnarToRow (44) + : : : +- Scan parquet spark_catalog.default.date_dim (43) + : : +- BroadcastExchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet spark_catalog.default.date_dim (49) + : +- BroadcastExchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet spark_catalog.default.promotion (55) + +- * Sort (68) + +- Exchange (67) + +- * Project (66) + +- * Filter (65) + +- * ColumnarToRow (64) + +- Scan parquet spark_catalog.default.catalog_returns (63) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 10] +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] + +(3) Filter [codegen id : 10] +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1)) + +(4) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#12)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(6) Filter [codegen id : 1] +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Condition : ((isnotnull(inv_quantity_on_hand#11) AND isnotnull(inv_item_sk#9)) AND isnotnull(inv_warehouse_sk#10)) + +(7) BroadcastExchange +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [inv_item_sk#9] +Join type: Inner +Join condition: (inv_quantity_on_hand#11 < cs_quantity#7) + +(9) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] +Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(10) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Condition : isnotnull(w_warehouse_sk#13) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [inv_warehouse_sk#10] +Right keys [1]: [w_warehouse_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12, w_warehouse_sk#13, w_warehouse_name#14] + +(16) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_desc#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_desc#16] + +(18) Filter [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_desc#16] +Condition : isnotnull(i_item_sk#15) + +(19) BroadcastExchange +Input [2]: [i_item_sk#15, i_item_desc#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 10] +Output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_sk#15, i_item_desc#16] + +(22) Scan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#17, cd_marital_status#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] + +(24) Filter [codegen id : 4] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Condition : ((isnotnull(cd_marital_status#18) AND (cd_marital_status#18 = D)) AND isnotnull(cd_demo_sk#17)) + +(25) Project [codegen id : 4] +Output [1]: [cd_demo_sk#17] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] + +(26) BroadcastExchange +Input [1]: [cd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, cd_demo_sk#17] + +(29) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#19, hd_buy_potential#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000 ), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] + +(31) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Condition : ((isnotnull(hd_buy_potential#20) AND (hd_buy_potential#20 = >10000 )) AND isnotnull(hd_demo_sk#19)) + +(32) Project [codegen id : 5] +Output [1]: [hd_demo_sk#19] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] + +(33) BroadcastExchange +Input [1]: [hd_demo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_bill_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#19] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 10] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, hd_demo_sk#19] + +(36) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(38) Filter [codegen id : 6] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Condition : ((((isnotnull(d_year#24) AND (d_year#24 = 1999)) AND isnotnull(d_date_sk#21)) AND isnotnull(d_week_seq#23)) AND isnotnull(d_date#22)) + +(39) Project [codegen id : 6] +Output [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(40) BroadcastExchange +Input [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#8] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date_sk#21, d_date#22, d_week_seq#23] + +(43) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#25, d_week_seq#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#25, d_week_seq#26] + +(45) Filter [codegen id : 7] +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) + +(46) BroadcastExchange +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [d_week_seq#23, inv_date_sk#12] +Right keys [2]: [d_week_seq#26, d_date_sk#25] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#25, d_week_seq#26] + +(49) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#27, d_date#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#27, d_date#28] + +(51) Filter [codegen id : 8] +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) + +(52) BroadcastExchange +Input [2]: [d_date_sk#27, d_date#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(53) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: (d_date#28 > date_add(d_date#22, 5)) + +(54) Project [codegen id : 10] +Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#27, d_date#28] + +(55) Scan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 9] +Input [1]: [p_promo_sk#29] + +(57) Filter [codegen id : 9] +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) + +(58) BroadcastExchange +Input [1]: [p_promo_sk#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(59) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_promo_sk#5] +Right keys [1]: [p_promo_sk#29] +Join type: LeftOuter +Join condition: None + +(60) Project [codegen id : 10] +Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, p_promo_sk#29] + +(61) Exchange +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(62) Sort [codegen id : 11] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0 + +(63) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(64) ColumnarToRow [codegen id : 12] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(65) Filter [codegen id : 12] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) + +(66) Project [codegen id : 12] +Output [2]: [cr_item_sk#30, cr_order_number#31] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(67) Exchange +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(68) Sort [codegen id : 13] +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST], false, 0 + +(69) SortMergeJoin [codegen id : 14] +Left keys [2]: [cs_item_sk#4, cs_order_number#6] +Right keys [2]: [cr_item_sk#30, cr_order_number#31] +Join type: LeftOuter +Join condition: None + +(70) Project [codegen id : 14] +Output [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, cr_item_sk#30, cr_order_number#31] + +(71) HashAggregate [codegen id : 14] +Input [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#33] +Results [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] + +(72) Exchange +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] +Arguments: hashpartitioning(i_item_desc#16, w_warehouse_name#14, d_week_seq#23, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(73) HashAggregate [codegen id : 15] +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#35] +Results [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count(1)#35 AS no_promo#36, count(1)#35 AS promo#37, count(1)#35 AS total_cnt#38] + +(74) TakeOrderedAndProject +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#36, promo#37, total_cnt#38] +Arguments: 100, [total_cnt#38 DESC NULLS LAST, i_item_desc#16 ASC NULLS FIRST, w_warehouse_name#14 ASC NULLS FIRST, d_week_seq#23 ASC NULLS FIRST], [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#36, promo#37, total_cnt#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..37b3b330ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.native_iceberg_compat/simplified.txt @@ -0,0 +1,114 @@ +TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo] + WholeStageCodegen (15) + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] [count(1),no_promo,promo,total_cnt,count] + InputAdapter + Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + WholeStageCodegen (14) + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq] [count,count] + Project [w_warehouse_name,i_item_desc,d_week_seq] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (11) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #2 + WholeStageCodegen (10) + Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] + BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Project [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk] + BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] + Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [cd_demo_sk] + Filter [cd_marital_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [d_date_sk,d_date,d_week_seq] + Filter [d_year,d_date_sk,d_week_seq,d_date] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + Filter [d_week_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (9) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk] + InputAdapter + WholeStageCodegen (13) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #12 + WholeStageCodegen (12) + Project [cr_item_sk,cr_order_number] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_datafusion/explain.txt new file mode 100644 index 0000000000..37ac7a21de --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_datafusion/explain.txt @@ -0,0 +1,208 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet spark_catalog.default.customer (29) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : ((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#9, s_county#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_county, [Bronx County,Franklin Parish,Orange County,Williamson County]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] +Condition : (s_county#10 IN (Williamson County,Franklin Parish,Bronx County,Orange County) AND isnotnull(s_store_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#9] +Input [2]: [s_store_sk#9, s_county#10] + +(15) BroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.0) END) AND isnotnull(hd_demo_sk#11)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#11] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#1, ss_ticket_number#4] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] + +(26) Exchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#17 AS cnt#18] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18] +Condition : ((cnt#18 >= 1) AND (cnt#18 <= 5)) + +(29) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Condition : isnotnull(c_customer_sk#19) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#19] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18, c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(35) Exchange +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: rangepartitioning(cnt#18 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: [cnt#18 DESC NULLS LAST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_datafusion/simplified.txt new file mode 100644 index 0000000000..46f1e68f4a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_datafusion/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [cnt] + InputAdapter + Exchange [cnt] #1 + WholeStageCodegen (6) + Project [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [cnt] + HashAggregate [ss_ticket_number,ss_customer_sk,count] [count(1),cnt,count] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk] #2 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..37ac7a21de --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_iceberg_compat/explain.txt @@ -0,0 +1,208 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet spark_catalog.default.customer (29) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : ((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#9, s_county#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_county, [Bronx County,Franklin Parish,Orange County,Williamson County]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] +Condition : (s_county#10 IN (Williamson County,Franklin Parish,Bronx County,Orange County) AND isnotnull(s_store_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#9] +Input [2]: [s_store_sk#9, s_county#10] + +(15) BroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.0) END) AND isnotnull(hd_demo_sk#11)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#11] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#1, ss_ticket_number#4] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] + +(26) Exchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#17 AS cnt#18] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18] +Condition : ((cnt#18 >= 1) AND (cnt#18 <= 5)) + +(29) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Condition : isnotnull(c_customer_sk#19) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#19] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18, c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(35) Exchange +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: rangepartitioning(cnt#18 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: [cnt#18 DESC NULLS LAST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..46f1e68f4a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.native_iceberg_compat/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [cnt] + InputAdapter + Exchange [cnt] #1 + WholeStageCodegen (6) + Project [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [cnt] + HashAggregate [ss_ticket_number,ss_customer_sk,count] [count(1),cnt,count] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk] #2 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_datafusion/explain.txt new file mode 100644 index 0000000000..cd5821566e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_datafusion/explain.txt @@ -0,0 +1,418 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Project (70) + +- * BroadcastHashJoin Inner BuildRight (69) + :- * Project (55) + : +- * BroadcastHashJoin Inner BuildRight (54) + : :- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet spark_catalog.default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet spark_catalog.default.date_dim (26) + : +- BroadcastExchange (53) + : +- * Filter (52) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Project (45) + : : +- * BroadcastHashJoin Inner BuildRight (44) + : : :- * Filter (39) + : : : +- * ColumnarToRow (38) + : : : +- Scan parquet spark_catalog.default.customer (37) + : : +- BroadcastExchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet spark_catalog.default.web_sales (40) + : +- ReusedExchange (46) + +- BroadcastExchange (68) + +- * HashAggregate (67) + +- Exchange (66) + +- * HashAggregate (65) + +- * Project (64) + +- * BroadcastHashJoin Inner BuildRight (63) + :- * Project (61) + : +- * BroadcastHashJoin Inner BuildRight (60) + : :- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet spark_catalog.default.customer (56) + : +- ReusedExchange (59) + +- ReusedExchange (62) + + +(1) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(3) Filter [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_customer_sk#5) + +(7) BroadcastExchange +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_year#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#9] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#9] +Condition : (((isnotnull(d_year#9) AND (d_year#9 = 2001)) AND d_year#9 IN (2001,2002)) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#8, d_year#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9] + +(16) HashAggregate [codegen id : 3] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))] +Aggregate Attributes [1]: [sum#10] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] + +(17) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 16] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [sum(UnscaledValue(ss_net_paid#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#6))#12] +Results [2]: [c_customer_id#2 AS customer_id#13, MakeDecimal(sum(UnscaledValue(ss_net_paid#6))#12,17,2) AS year_total#14] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#13, year_total#14] +Condition : (isnotnull(year_total#14) AND (year_total#14 > 0.00)) + +(20) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] + +(22) Filter [codegen id : 6] +Input [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_customer_id#16)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [3]: [ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#15] +Right keys [1]: [ss_customer_sk#19] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21] +Input [7]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18, ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#22, d_year#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] +Condition : (((isnotnull(d_year#23) AND (d_year#23 = 2002)) AND d_year#23 IN (2001,2002)) AND isnotnull(d_date_sk#22)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#22, d_year#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] +Input [7]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21, d_date_sk#22, d_year#23] + +(32) HashAggregate [codegen id : 6] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] +Keys [4]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#20))] +Aggregate Attributes [1]: [sum#24] +Results [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] + +(33) Exchange +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] +Arguments: hashpartitioning(c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(34) HashAggregate [codegen id : 7] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] +Keys [4]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23] +Functions [1]: [sum(UnscaledValue(ss_net_paid#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#20))#12] +Results [4]: [c_customer_id#16 AS customer_id#26, c_first_name#17 AS customer_first_name#27, c_last_name#18 AS customer_last_name#28, MakeDecimal(sum(UnscaledValue(ss_net_paid#20))#12,17,2) AS year_total#29] + +(35) BroadcastExchange +Input [4]: [customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#26] +Join type: Inner +Join condition: None + +(37) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] + +(39) Filter [codegen id : 10] +Input [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] +Condition : (isnotnull(c_customer_sk#30) AND isnotnull(c_customer_id#31)) + +(40) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#36)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] + +(42) Filter [codegen id : 8] +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Condition : isnotnull(ws_bill_customer_sk#34) + +(43) BroadcastExchange +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#30] +Right keys [1]: [ws_bill_customer_sk#34] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 10] +Output [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, ws_sold_date_sk#36] +Input [7]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33, ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#37, d_year#38] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#36] +Right keys [1]: [d_date_sk#37] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, d_year#38] +Input [7]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, ws_sold_date_sk#36, d_date_sk#37, d_year#38] + +(49) HashAggregate [codegen id : 10] +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, d_year#38] +Keys [4]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#35))] +Aggregate Attributes [1]: [sum#39] +Results [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] + +(50) Exchange +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] +Arguments: hashpartitioning(c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 11] +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] +Keys [4]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38] +Functions [1]: [sum(UnscaledValue(ws_net_paid#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#35))#41] +Results [2]: [c_customer_id#31 AS customer_id#42, MakeDecimal(sum(UnscaledValue(ws_net_paid#35))#41,17,2) AS year_total#43] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#42, year_total#43] +Condition : (isnotnull(year_total#43) AND (year_total#43 > 0.00)) + +(53) BroadcastExchange +Input [2]: [customer_id#42, year_total#43] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(54) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#42] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 16] +Output [7]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#43] +Input [8]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, customer_id#42, year_total#43] + +(56) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 14] +Input [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] + +(58) Filter [codegen id : 14] +Input [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] +Condition : (isnotnull(c_customer_sk#44) AND isnotnull(c_customer_id#45)) + +(59) ReusedExchange [Reuses operator id: 43] +Output [3]: [ws_bill_customer_sk#48, ws_net_paid#49, ws_sold_date_sk#50] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#44] +Right keys [1]: [ws_bill_customer_sk#48] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 14] +Output [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, ws_sold_date_sk#50] +Input [7]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47, ws_bill_customer_sk#48, ws_net_paid#49, ws_sold_date_sk#50] + +(62) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#51, d_year#52] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#50] +Right keys [1]: [d_date_sk#51] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 14] +Output [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, d_year#52] +Input [7]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, ws_sold_date_sk#50, d_date_sk#51, d_year#52] + +(65) HashAggregate [codegen id : 14] +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, d_year#52] +Keys [4]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#49))] +Aggregate Attributes [1]: [sum#53] +Results [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] + +(66) Exchange +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] +Arguments: hashpartitioning(c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(67) HashAggregate [codegen id : 15] +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] +Keys [4]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52] +Functions [1]: [sum(UnscaledValue(ws_net_paid#49))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#49))#41] +Results [2]: [c_customer_id#45 AS customer_id#55, MakeDecimal(sum(UnscaledValue(ws_net_paid#49))#41,17,2) AS year_total#56] + +(68) BroadcastExchange +Input [2]: [customer_id#55, year_total#56] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(69) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#55] +Join type: Inner +Join condition: (CASE WHEN (year_total#43 > 0.00) THEN (year_total#56 / year_total#43) END > CASE WHEN (year_total#14 > 0.00) THEN (year_total#29 / year_total#14) END) + +(70) Project [codegen id : 16] +Output [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Input [9]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#43, customer_id#55, year_total#56] + +(71) TakeOrderedAndProject +Input [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Arguments: 100, [customer_id#26 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST], [customer_id#26, customer_first_name#27, customer_last_name#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_datafusion/simplified.txt new file mode 100644 index 0000000000..7cde4f3773 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_datafusion/simplified.txt @@ -0,0 +1,106 @@ +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] + WholeStageCodegen (16) + Project [customer_id,customer_first_name,customer_last_name] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,customer_first_name,customer_last_name,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..cd5821566e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_iceberg_compat/explain.txt @@ -0,0 +1,418 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Project (70) + +- * BroadcastHashJoin Inner BuildRight (69) + :- * Project (55) + : +- * BroadcastHashJoin Inner BuildRight (54) + : :- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet spark_catalog.default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet spark_catalog.default.date_dim (26) + : +- BroadcastExchange (53) + : +- * Filter (52) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Project (45) + : : +- * BroadcastHashJoin Inner BuildRight (44) + : : :- * Filter (39) + : : : +- * ColumnarToRow (38) + : : : +- Scan parquet spark_catalog.default.customer (37) + : : +- BroadcastExchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet spark_catalog.default.web_sales (40) + : +- ReusedExchange (46) + +- BroadcastExchange (68) + +- * HashAggregate (67) + +- Exchange (66) + +- * HashAggregate (65) + +- * Project (64) + +- * BroadcastHashJoin Inner BuildRight (63) + :- * Project (61) + : +- * BroadcastHashJoin Inner BuildRight (60) + : :- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet spark_catalog.default.customer (56) + : +- ReusedExchange (59) + +- ReusedExchange (62) + + +(1) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(3) Filter [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_customer_sk#5) + +(7) BroadcastExchange +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_year#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#9] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#9] +Condition : (((isnotnull(d_year#9) AND (d_year#9 = 2001)) AND d_year#9 IN (2001,2002)) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#8, d_year#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9] + +(16) HashAggregate [codegen id : 3] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))] +Aggregate Attributes [1]: [sum#10] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] + +(17) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 16] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [sum(UnscaledValue(ss_net_paid#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#6))#12] +Results [2]: [c_customer_id#2 AS customer_id#13, MakeDecimal(sum(UnscaledValue(ss_net_paid#6))#12,17,2) AS year_total#14] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#13, year_total#14] +Condition : (isnotnull(year_total#14) AND (year_total#14 > 0.00)) + +(20) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] + +(22) Filter [codegen id : 6] +Input [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_customer_id#16)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [3]: [ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#15] +Right keys [1]: [ss_customer_sk#19] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21] +Input [7]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18, ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#22, d_year#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] +Condition : (((isnotnull(d_year#23) AND (d_year#23 = 2002)) AND d_year#23 IN (2001,2002)) AND isnotnull(d_date_sk#22)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#22, d_year#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] +Input [7]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21, d_date_sk#22, d_year#23] + +(32) HashAggregate [codegen id : 6] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] +Keys [4]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#20))] +Aggregate Attributes [1]: [sum#24] +Results [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] + +(33) Exchange +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] +Arguments: hashpartitioning(c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(34) HashAggregate [codegen id : 7] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] +Keys [4]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23] +Functions [1]: [sum(UnscaledValue(ss_net_paid#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#20))#12] +Results [4]: [c_customer_id#16 AS customer_id#26, c_first_name#17 AS customer_first_name#27, c_last_name#18 AS customer_last_name#28, MakeDecimal(sum(UnscaledValue(ss_net_paid#20))#12,17,2) AS year_total#29] + +(35) BroadcastExchange +Input [4]: [customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#26] +Join type: Inner +Join condition: None + +(37) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] + +(39) Filter [codegen id : 10] +Input [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] +Condition : (isnotnull(c_customer_sk#30) AND isnotnull(c_customer_id#31)) + +(40) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#36)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] + +(42) Filter [codegen id : 8] +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Condition : isnotnull(ws_bill_customer_sk#34) + +(43) BroadcastExchange +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#30] +Right keys [1]: [ws_bill_customer_sk#34] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 10] +Output [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, ws_sold_date_sk#36] +Input [7]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33, ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#37, d_year#38] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#36] +Right keys [1]: [d_date_sk#37] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, d_year#38] +Input [7]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, ws_sold_date_sk#36, d_date_sk#37, d_year#38] + +(49) HashAggregate [codegen id : 10] +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, d_year#38] +Keys [4]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#35))] +Aggregate Attributes [1]: [sum#39] +Results [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] + +(50) Exchange +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] +Arguments: hashpartitioning(c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 11] +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] +Keys [4]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38] +Functions [1]: [sum(UnscaledValue(ws_net_paid#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#35))#41] +Results [2]: [c_customer_id#31 AS customer_id#42, MakeDecimal(sum(UnscaledValue(ws_net_paid#35))#41,17,2) AS year_total#43] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#42, year_total#43] +Condition : (isnotnull(year_total#43) AND (year_total#43 > 0.00)) + +(53) BroadcastExchange +Input [2]: [customer_id#42, year_total#43] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(54) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#42] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 16] +Output [7]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#43] +Input [8]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, customer_id#42, year_total#43] + +(56) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 14] +Input [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] + +(58) Filter [codegen id : 14] +Input [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] +Condition : (isnotnull(c_customer_sk#44) AND isnotnull(c_customer_id#45)) + +(59) ReusedExchange [Reuses operator id: 43] +Output [3]: [ws_bill_customer_sk#48, ws_net_paid#49, ws_sold_date_sk#50] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#44] +Right keys [1]: [ws_bill_customer_sk#48] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 14] +Output [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, ws_sold_date_sk#50] +Input [7]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47, ws_bill_customer_sk#48, ws_net_paid#49, ws_sold_date_sk#50] + +(62) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#51, d_year#52] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#50] +Right keys [1]: [d_date_sk#51] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 14] +Output [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, d_year#52] +Input [7]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, ws_sold_date_sk#50, d_date_sk#51, d_year#52] + +(65) HashAggregate [codegen id : 14] +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, d_year#52] +Keys [4]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#49))] +Aggregate Attributes [1]: [sum#53] +Results [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] + +(66) Exchange +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] +Arguments: hashpartitioning(c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(67) HashAggregate [codegen id : 15] +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] +Keys [4]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52] +Functions [1]: [sum(UnscaledValue(ws_net_paid#49))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#49))#41] +Results [2]: [c_customer_id#45 AS customer_id#55, MakeDecimal(sum(UnscaledValue(ws_net_paid#49))#41,17,2) AS year_total#56] + +(68) BroadcastExchange +Input [2]: [customer_id#55, year_total#56] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(69) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#55] +Join type: Inner +Join condition: (CASE WHEN (year_total#43 > 0.00) THEN (year_total#56 / year_total#43) END > CASE WHEN (year_total#14 > 0.00) THEN (year_total#29 / year_total#14) END) + +(70) Project [codegen id : 16] +Output [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Input [9]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#43, customer_id#55, year_total#56] + +(71) TakeOrderedAndProject +Input [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Arguments: 100, [customer_id#26 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST], [customer_id#26, customer_first_name#27, customer_last_name#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..7cde4f3773 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.native_iceberg_compat/simplified.txt @@ -0,0 +1,106 @@ +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] + WholeStageCodegen (16) + Project [customer_id,customer_first_name,customer_last_name] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,customer_first_name,customer_last_name,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_datafusion/explain.txt new file mode 100644 index 0000000000..d9f01957c9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_datafusion/explain.txt @@ -0,0 +1,754 @@ +== Physical Plan == +TakeOrderedAndProject (135) ++- * Project (134) + +- * SortMergeJoin Inner (133) + :- * Sort (74) + : +- Exchange (73) + : +- * Filter (72) + : +- * HashAggregate (71) + : +- Exchange (70) + : +- * HashAggregate (69) + : +- * HashAggregate (68) + : +- Exchange (67) + : +- * HashAggregate (66) + : +- Union (65) + : :- * Project (26) + : : +- * SortMergeJoin LeftOuter (25) + : : :- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.item (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.date_dim (11) + : : +- * Sort (24) + : : +- Exchange (23) + : : +- * Project (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet spark_catalog.default.catalog_returns (19) + : :- * Project (45) + : : +- * SortMergeJoin LeftOuter (44) + : : :- * Sort (37) + : : : +- Exchange (36) + : : : +- * Project (35) + : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : :- * Project (32) + : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : :- * Filter (29) + : : : : : +- * ColumnarToRow (28) + : : : : : +- Scan parquet spark_catalog.default.store_sales (27) + : : : : +- ReusedExchange (30) + : : : +- ReusedExchange (33) + : : +- * Sort (43) + : : +- Exchange (42) + : : +- * Project (41) + : : +- * Filter (40) + : : +- * ColumnarToRow (39) + : : +- Scan parquet spark_catalog.default.store_returns (38) + : +- * Project (64) + : +- * SortMergeJoin LeftOuter (63) + : :- * Sort (56) + : : +- Exchange (55) + : : +- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (51) + : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : :- * Filter (48) + : : : : +- * ColumnarToRow (47) + : : : : +- Scan parquet spark_catalog.default.web_sales (46) + : : : +- ReusedExchange (49) + : : +- ReusedExchange (52) + : +- * Sort (62) + : +- Exchange (61) + : +- * Project (60) + : +- * Filter (59) + : +- * ColumnarToRow (58) + : +- Scan parquet spark_catalog.default.web_returns (57) + +- * Sort (132) + +- Exchange (131) + +- * Filter (130) + +- * HashAggregate (129) + +- Exchange (128) + +- * HashAggregate (127) + +- * HashAggregate (126) + +- Exchange (125) + +- * HashAggregate (124) + +- Union (123) + :- * Project (92) + : +- * SortMergeJoin LeftOuter (91) + : :- * Sort (88) + : : +- Exchange (87) + : : +- * Project (86) + : : +- * BroadcastHashJoin Inner BuildRight (85) + : : :- * Project (80) + : : : +- * BroadcastHashJoin Inner BuildRight (79) + : : : :- * Filter (77) + : : : : +- * ColumnarToRow (76) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (75) + : : : +- ReusedExchange (78) + : : +- BroadcastExchange (84) + : : +- * Filter (83) + : : +- * ColumnarToRow (82) + : : +- Scan parquet spark_catalog.default.date_dim (81) + : +- * Sort (90) + : +- ReusedExchange (89) + :- * Project (107) + : +- * SortMergeJoin LeftOuter (106) + : :- * Sort (103) + : : +- Exchange (102) + : : +- * Project (101) + : : +- * BroadcastHashJoin Inner BuildRight (100) + : : :- * Project (98) + : : : +- * BroadcastHashJoin Inner BuildRight (97) + : : : :- * Filter (95) + : : : : +- * ColumnarToRow (94) + : : : : +- Scan parquet spark_catalog.default.store_sales (93) + : : : +- ReusedExchange (96) + : : +- ReusedExchange (99) + : +- * Sort (105) + : +- ReusedExchange (104) + +- * Project (122) + +- * SortMergeJoin LeftOuter (121) + :- * Sort (118) + : +- Exchange (117) + : +- * Project (116) + : +- * BroadcastHashJoin Inner BuildRight (115) + : :- * Project (113) + : : +- * BroadcastHashJoin Inner BuildRight (112) + : : :- * Filter (110) + : : : +- * ColumnarToRow (109) + : : : +- Scan parquet spark_catalog.default.web_sales (108) + : : +- ReusedExchange (111) + : +- ReusedExchange (114) + +- * Sort (120) + +- ReusedExchange (119) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] + +(3) Filter [codegen id : 3] +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books ), IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_manufact_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books )) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(7) Project [codegen id : 1] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(8) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_year#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#12, d_year#13] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2002)) AND isnotnull(d_date_sk#12)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#12, d_year#13] + +(17) Exchange +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 4] +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST], false, 0 + +(19) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(21) Filter [codegen id : 5] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Condition : (isnotnull(cr_order_number#15) AND isnotnull(cr_item_sk#14)) + +(22) Project [codegen id : 5] +Output [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(23) Exchange +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: hashpartitioning(cr_order_number#15, cr_item_sk#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) Sort [codegen id : 6] +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cr_order_number#15 ASC NULLS FIRST, cr_item_sk#14 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin [codegen id : 7] +Left keys [2]: [cs_order_number#2, cs_item_sk#1] +Right keys [2]: [cr_order_number#15, cr_item_sk#14] +Join type: LeftOuter +Join condition: None + +(26) Project [codegen id : 7] +Output [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#3 - coalesce(cr_return_quantity#16, 0)) AS sales_cnt#19, (cs_ext_sales_price#4 - coalesce(cr_return_amount#17, 0.00)) AS sales_amt#20] +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13, cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] + +(27) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#25)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 10] +Input [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] + +(29) Filter [codegen id : 10] +Input [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Condition : isnotnull(ss_item_sk#21) + +(30) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(31) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_item_sk#21] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 10] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] +Input [10]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(33) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#31, d_year#32] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#25] +Right keys [1]: [d_date_sk#31] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 10] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Input [11]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_date_sk#31, d_year#32] + +(36) Exchange +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: hashpartitioning(ss_ticket_number#22, ss_item_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(37) Sort [codegen id : 11] +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: [ss_ticket_number#22 ASC NULLS FIRST, ss_item_sk#21 ASC NULLS FIRST], false, 0 + +(38) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 12] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] + +(40) Filter [codegen id : 12] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Condition : (isnotnull(sr_ticket_number#34) AND isnotnull(sr_item_sk#33)) + +(41) Project [codegen id : 12] +Output [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] + +(42) Exchange +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: hashpartitioning(sr_ticket_number#34, sr_item_sk#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(43) Sort [codegen id : 13] +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: [sr_ticket_number#34 ASC NULLS FIRST, sr_item_sk#33 ASC NULLS FIRST], false, 0 + +(44) SortMergeJoin [codegen id : 14] +Left keys [2]: [ss_ticket_number#22, ss_item_sk#21] +Right keys [2]: [sr_ticket_number#34, sr_item_sk#33] +Join type: LeftOuter +Join condition: None + +(45) Project [codegen id : 14] +Output [7]: [d_year#32, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, (ss_quantity#23 - coalesce(sr_return_quantity#35, 0)) AS sales_cnt#38, (ss_ext_sales_price#24 - coalesce(sr_return_amt#36, 0.00)) AS sales_amt#39] +Input [13]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32, sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] + +(46) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#44)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 17] +Input [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] + +(48) Filter [codegen id : 17] +Input [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Condition : isnotnull(ws_item_sk#40) + +(49) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(50) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#40] +Right keys [1]: [i_item_sk#45] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 17] +Output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] +Input [10]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(52) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#50, d_year#51] + +(53) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#44] +Right keys [1]: [d_date_sk#50] +Join type: Inner +Join condition: None + +(54) Project [codegen id : 17] +Output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Input [11]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_date_sk#50, d_year#51] + +(55) Exchange +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: hashpartitioning(ws_order_number#41, ws_item_sk#40, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(56) Sort [codegen id : 18] +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: [ws_order_number#41 ASC NULLS FIRST, ws_item_sk#40 ASC NULLS FIRST], false, 0 + +(57) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 19] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] + +(59) Filter [codegen id : 19] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Condition : (isnotnull(wr_order_number#53) AND isnotnull(wr_item_sk#52)) + +(60) Project [codegen id : 19] +Output [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] + +(61) Exchange +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: hashpartitioning(wr_order_number#53, wr_item_sk#52, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(62) Sort [codegen id : 20] +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: [wr_order_number#53 ASC NULLS FIRST, wr_item_sk#52 ASC NULLS FIRST], false, 0 + +(63) SortMergeJoin [codegen id : 21] +Left keys [2]: [ws_order_number#41, ws_item_sk#40] +Right keys [2]: [wr_order_number#53, wr_item_sk#52] +Join type: LeftOuter +Join condition: None + +(64) Project [codegen id : 21] +Output [7]: [d_year#51, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, (ws_quantity#42 - coalesce(wr_return_quantity#54, 0)) AS sales_cnt#57, (ws_ext_sales_price#43 - coalesce(wr_return_amt#55, 0.00)) AS sales_amt#58] +Input [13]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51, wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] + +(65) Union + +(66) HashAggregate [codegen id : 22] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] + +(67) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(68) HashAggregate [codegen id : 23] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] + +(69) HashAggregate [codegen id : 23] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum#59, sum#60] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] + +(70) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(71) HashAggregate [codegen id : 24] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum(sales_cnt#19)#63, sum(UnscaledValue(sales_amt#20))#64] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(sales_cnt#19)#63 AS sales_cnt#65, MakeDecimal(sum(UnscaledValue(sales_amt#20))#64,18,2) AS sales_amt#66] + +(72) Filter [codegen id : 24] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Condition : isnotnull(sales_cnt#65) + +(73) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(74) Sort [codegen id : 25] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0 + +(75) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#71)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 28] +Input [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] + +(77) Filter [codegen id : 28] +Input [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] +Condition : isnotnull(cs_item_sk#67) + +(78) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#72, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] + +(79) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [cs_item_sk#67] +Right keys [1]: [i_item_sk#72] +Join type: Inner +Join condition: None + +(80) Project [codegen id : 28] +Output [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Input [10]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_item_sk#72, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] + +(81) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#77, d_year#78] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(82) ColumnarToRow [codegen id : 27] +Input [2]: [d_date_sk#77, d_year#78] + +(83) Filter [codegen id : 27] +Input [2]: [d_date_sk#77, d_year#78] +Condition : ((isnotnull(d_year#78) AND (d_year#78 = 2001)) AND isnotnull(d_date_sk#77)) + +(84) BroadcastExchange +Input [2]: [d_date_sk#77, d_year#78] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(85) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [cs_sold_date_sk#71] +Right keys [1]: [d_date_sk#77] +Join type: Inner +Join condition: None + +(86) Project [codegen id : 28] +Output [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Input [11]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_date_sk#77, d_year#78] + +(87) Exchange +Input [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Arguments: hashpartitioning(cs_order_number#68, cs_item_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(88) Sort [codegen id : 29] +Input [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Arguments: [cs_order_number#68 ASC NULLS FIRST, cs_item_sk#67 ASC NULLS FIRST], false, 0 + +(89) ReusedExchange [Reuses operator id: 23] +Output [4]: [cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] + +(90) Sort [codegen id : 31] +Input [4]: [cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] +Arguments: [cr_order_number#80 ASC NULLS FIRST, cr_item_sk#79 ASC NULLS FIRST], false, 0 + +(91) SortMergeJoin [codegen id : 32] +Left keys [2]: [cs_order_number#68, cs_item_sk#67] +Right keys [2]: [cr_order_number#80, cr_item_sk#79] +Join type: LeftOuter +Join condition: None + +(92) Project [codegen id : 32] +Output [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, (cs_quantity#69 - coalesce(cr_return_quantity#81, 0)) AS sales_cnt#19, (cs_ext_sales_price#70 - coalesce(cr_return_amount#82, 0.00)) AS sales_amt#20] +Input [13]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78, cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] + +(93) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#87)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(94) ColumnarToRow [codegen id : 35] +Input [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] + +(95) Filter [codegen id : 35] +Input [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] +Condition : isnotnull(ss_item_sk#83) + +(96) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] + +(97) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ss_item_sk#83] +Right keys [1]: [i_item_sk#88] +Join type: Inner +Join condition: None + +(98) Project [codegen id : 35] +Output [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] +Input [10]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] + +(99) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#93, d_year#94] + +(100) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ss_sold_date_sk#87] +Right keys [1]: [d_date_sk#93] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 35] +Output [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Input [11]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_date_sk#93, d_year#94] + +(102) Exchange +Input [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Arguments: hashpartitioning(ss_ticket_number#84, ss_item_sk#83, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(103) Sort [codegen id : 36] +Input [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Arguments: [ss_ticket_number#84 ASC NULLS FIRST, ss_item_sk#83 ASC NULLS FIRST], false, 0 + +(104) ReusedExchange [Reuses operator id: 42] +Output [4]: [sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] + +(105) Sort [codegen id : 38] +Input [4]: [sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] +Arguments: [sr_ticket_number#96 ASC NULLS FIRST, sr_item_sk#95 ASC NULLS FIRST], false, 0 + +(106) SortMergeJoin [codegen id : 39] +Left keys [2]: [ss_ticket_number#84, ss_item_sk#83] +Right keys [2]: [sr_ticket_number#96, sr_item_sk#95] +Join type: LeftOuter +Join condition: None + +(107) Project [codegen id : 39] +Output [7]: [d_year#94, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, (ss_quantity#85 - coalesce(sr_return_quantity#97, 0)) AS sales_cnt#38, (ss_ext_sales_price#86 - coalesce(sr_return_amt#98, 0.00)) AS sales_amt#39] +Input [13]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94, sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] + +(108) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#103)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(109) ColumnarToRow [codegen id : 42] +Input [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] + +(110) Filter [codegen id : 42] +Input [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] +Condition : isnotnull(ws_item_sk#99) + +(111) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#104, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] + +(112) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_item_sk#99] +Right keys [1]: [i_item_sk#104] +Join type: Inner +Join condition: None + +(113) Project [codegen id : 42] +Output [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] +Input [10]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_item_sk#104, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] + +(114) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#109, d_year#110] + +(115) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_sold_date_sk#103] +Right keys [1]: [d_date_sk#109] +Join type: Inner +Join condition: None + +(116) Project [codegen id : 42] +Output [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Input [11]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_date_sk#109, d_year#110] + +(117) Exchange +Input [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Arguments: hashpartitioning(ws_order_number#100, ws_item_sk#99, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(118) Sort [codegen id : 43] +Input [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Arguments: [ws_order_number#100 ASC NULLS FIRST, ws_item_sk#99 ASC NULLS FIRST], false, 0 + +(119) ReusedExchange [Reuses operator id: 61] +Output [4]: [wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] + +(120) Sort [codegen id : 45] +Input [4]: [wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] +Arguments: [wr_order_number#112 ASC NULLS FIRST, wr_item_sk#111 ASC NULLS FIRST], false, 0 + +(121) SortMergeJoin [codegen id : 46] +Left keys [2]: [ws_order_number#100, ws_item_sk#99] +Right keys [2]: [wr_order_number#112, wr_item_sk#111] +Join type: LeftOuter +Join condition: None + +(122) Project [codegen id : 46] +Output [7]: [d_year#110, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, (ws_quantity#101 - coalesce(wr_return_quantity#113, 0)) AS sales_cnt#57, (ws_ext_sales_price#102 - coalesce(wr_return_amt#114, 0.00)) AS sales_amt#58] +Input [13]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110, wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] + +(123) Union + +(124) HashAggregate [codegen id : 47] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] + +(125) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(126) HashAggregate [codegen id : 48] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] + +(127) HashAggregate [codegen id : 48] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum#59, sum#115] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] + +(128) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] +Arguments: hashpartitioning(d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, 5), ENSURE_REQUIREMENTS, [plan_id=17] + +(129) HashAggregate [codegen id : 49] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] +Keys [5]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum(sales_cnt#19)#63, sum(UnscaledValue(sales_amt#20))#64] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum(sales_cnt#19)#63 AS sales_cnt#117, MakeDecimal(sum(UnscaledValue(sales_amt#20))#64,18,2) AS sales_amt#118] + +(130) Filter [codegen id : 49] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Condition : isnotnull(sales_cnt#117) + +(131) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Arguments: hashpartitioning(i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, 5), ENSURE_REQUIREMENTS, [plan_id=18] + +(132) Sort [codegen id : 50] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Arguments: [i_brand_id#73 ASC NULLS FIRST, i_class_id#74 ASC NULLS FIRST, i_category_id#75 ASC NULLS FIRST, i_manufact_id#76 ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin [codegen id : 51] +Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right keys [4]: [i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Join type: Inner +Join condition: ((cast(sales_cnt#65 as decimal(17,2)) / cast(sales_cnt#117 as decimal(17,2))) < 0.90000000000000000000) + +(134) Project [codegen id : 51] +Output [10]: [d_year#78 AS prev_year#119, d_year#13 AS year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#117 AS prev_yr_cnt#121, sales_cnt#65 AS curr_yr_cnt#122, (sales_cnt#65 - sales_cnt#117) AS sales_cnt_diff#123, (sales_amt#66 - sales_amt#118) AS sales_amt_diff#124] +Input [14]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66, d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] + +(135) TakeOrderedAndProject +Input [10]: [prev_year#119, year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#121, curr_yr_cnt#122, sales_cnt_diff#123, sales_amt_diff#124] +Arguments: 100, [sales_cnt_diff#123 ASC NULLS FIRST], [prev_year#119, year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#121, curr_yr_cnt#122, sales_cnt_diff#123, sales_amt_diff#124] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f74420a1ad --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_datafusion/simplified.txt @@ -0,0 +1,232 @@ +TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_amt_diff] + WholeStageCodegen (51) + Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt] + SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt] + InputAdapter + WholeStageCodegen (25) + Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 + WholeStageCodegen (24) + Filter [sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 + WholeStageCodegen (23) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 + WholeStageCodegen (22) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Union + WholeStageCodegen (7) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #4 + WholeStageCodegen (3) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (6) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #7 + WholeStageCodegen (5) + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #8 + WholeStageCodegen (10) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + WholeStageCodegen (13) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #9 + WholeStageCodegen (12) + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + WholeStageCodegen (21) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (18) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #10 + WholeStageCodegen (17) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + WholeStageCodegen (20) + Sort [wr_order_number,wr_item_sk] + InputAdapter + Exchange [wr_order_number,wr_item_sk] #11 + WholeStageCodegen (19) + Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Filter [wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + InputAdapter + WholeStageCodegen (50) + Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12 + WholeStageCodegen (49) + Filter [sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13 + WholeStageCodegen (48) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14 + WholeStageCodegen (47) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Union + WholeStageCodegen (32) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (29) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #15 + WholeStageCodegen (28) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (27) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (31) + Sort [cr_order_number,cr_item_sk] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + WholeStageCodegen (39) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (36) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #17 + WholeStageCodegen (35) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #16 + InputAdapter + WholeStageCodegen (38) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9 + WholeStageCodegen (46) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (43) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #18 + WholeStageCodegen (42) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #16 + InputAdapter + WholeStageCodegen (45) + Sort [wr_order_number,wr_item_sk] + InputAdapter + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d9f01957c9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_iceberg_compat/explain.txt @@ -0,0 +1,754 @@ +== Physical Plan == +TakeOrderedAndProject (135) ++- * Project (134) + +- * SortMergeJoin Inner (133) + :- * Sort (74) + : +- Exchange (73) + : +- * Filter (72) + : +- * HashAggregate (71) + : +- Exchange (70) + : +- * HashAggregate (69) + : +- * HashAggregate (68) + : +- Exchange (67) + : +- * HashAggregate (66) + : +- Union (65) + : :- * Project (26) + : : +- * SortMergeJoin LeftOuter (25) + : : :- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.item (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.date_dim (11) + : : +- * Sort (24) + : : +- Exchange (23) + : : +- * Project (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet spark_catalog.default.catalog_returns (19) + : :- * Project (45) + : : +- * SortMergeJoin LeftOuter (44) + : : :- * Sort (37) + : : : +- Exchange (36) + : : : +- * Project (35) + : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : :- * Project (32) + : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : :- * Filter (29) + : : : : : +- * ColumnarToRow (28) + : : : : : +- Scan parquet spark_catalog.default.store_sales (27) + : : : : +- ReusedExchange (30) + : : : +- ReusedExchange (33) + : : +- * Sort (43) + : : +- Exchange (42) + : : +- * Project (41) + : : +- * Filter (40) + : : +- * ColumnarToRow (39) + : : +- Scan parquet spark_catalog.default.store_returns (38) + : +- * Project (64) + : +- * SortMergeJoin LeftOuter (63) + : :- * Sort (56) + : : +- Exchange (55) + : : +- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (51) + : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : :- * Filter (48) + : : : : +- * ColumnarToRow (47) + : : : : +- Scan parquet spark_catalog.default.web_sales (46) + : : : +- ReusedExchange (49) + : : +- ReusedExchange (52) + : +- * Sort (62) + : +- Exchange (61) + : +- * Project (60) + : +- * Filter (59) + : +- * ColumnarToRow (58) + : +- Scan parquet spark_catalog.default.web_returns (57) + +- * Sort (132) + +- Exchange (131) + +- * Filter (130) + +- * HashAggregate (129) + +- Exchange (128) + +- * HashAggregate (127) + +- * HashAggregate (126) + +- Exchange (125) + +- * HashAggregate (124) + +- Union (123) + :- * Project (92) + : +- * SortMergeJoin LeftOuter (91) + : :- * Sort (88) + : : +- Exchange (87) + : : +- * Project (86) + : : +- * BroadcastHashJoin Inner BuildRight (85) + : : :- * Project (80) + : : : +- * BroadcastHashJoin Inner BuildRight (79) + : : : :- * Filter (77) + : : : : +- * ColumnarToRow (76) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (75) + : : : +- ReusedExchange (78) + : : +- BroadcastExchange (84) + : : +- * Filter (83) + : : +- * ColumnarToRow (82) + : : +- Scan parquet spark_catalog.default.date_dim (81) + : +- * Sort (90) + : +- ReusedExchange (89) + :- * Project (107) + : +- * SortMergeJoin LeftOuter (106) + : :- * Sort (103) + : : +- Exchange (102) + : : +- * Project (101) + : : +- * BroadcastHashJoin Inner BuildRight (100) + : : :- * Project (98) + : : : +- * BroadcastHashJoin Inner BuildRight (97) + : : : :- * Filter (95) + : : : : +- * ColumnarToRow (94) + : : : : +- Scan parquet spark_catalog.default.store_sales (93) + : : : +- ReusedExchange (96) + : : +- ReusedExchange (99) + : +- * Sort (105) + : +- ReusedExchange (104) + +- * Project (122) + +- * SortMergeJoin LeftOuter (121) + :- * Sort (118) + : +- Exchange (117) + : +- * Project (116) + : +- * BroadcastHashJoin Inner BuildRight (115) + : :- * Project (113) + : : +- * BroadcastHashJoin Inner BuildRight (112) + : : :- * Filter (110) + : : : +- * ColumnarToRow (109) + : : : +- Scan parquet spark_catalog.default.web_sales (108) + : : +- ReusedExchange (111) + : +- ReusedExchange (114) + +- * Sort (120) + +- ReusedExchange (119) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] + +(3) Filter [codegen id : 3] +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books ), IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_manufact_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books )) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(7) Project [codegen id : 1] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(8) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_year#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#12, d_year#13] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2002)) AND isnotnull(d_date_sk#12)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#12, d_year#13] + +(17) Exchange +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 4] +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST], false, 0 + +(19) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(21) Filter [codegen id : 5] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Condition : (isnotnull(cr_order_number#15) AND isnotnull(cr_item_sk#14)) + +(22) Project [codegen id : 5] +Output [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(23) Exchange +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: hashpartitioning(cr_order_number#15, cr_item_sk#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) Sort [codegen id : 6] +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cr_order_number#15 ASC NULLS FIRST, cr_item_sk#14 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin [codegen id : 7] +Left keys [2]: [cs_order_number#2, cs_item_sk#1] +Right keys [2]: [cr_order_number#15, cr_item_sk#14] +Join type: LeftOuter +Join condition: None + +(26) Project [codegen id : 7] +Output [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#3 - coalesce(cr_return_quantity#16, 0)) AS sales_cnt#19, (cs_ext_sales_price#4 - coalesce(cr_return_amount#17, 0.00)) AS sales_amt#20] +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13, cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] + +(27) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#25)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 10] +Input [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] + +(29) Filter [codegen id : 10] +Input [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Condition : isnotnull(ss_item_sk#21) + +(30) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(31) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_item_sk#21] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 10] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] +Input [10]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(33) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#31, d_year#32] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#25] +Right keys [1]: [d_date_sk#31] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 10] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Input [11]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_date_sk#31, d_year#32] + +(36) Exchange +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: hashpartitioning(ss_ticket_number#22, ss_item_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(37) Sort [codegen id : 11] +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: [ss_ticket_number#22 ASC NULLS FIRST, ss_item_sk#21 ASC NULLS FIRST], false, 0 + +(38) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 12] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] + +(40) Filter [codegen id : 12] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Condition : (isnotnull(sr_ticket_number#34) AND isnotnull(sr_item_sk#33)) + +(41) Project [codegen id : 12] +Output [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] + +(42) Exchange +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: hashpartitioning(sr_ticket_number#34, sr_item_sk#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(43) Sort [codegen id : 13] +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: [sr_ticket_number#34 ASC NULLS FIRST, sr_item_sk#33 ASC NULLS FIRST], false, 0 + +(44) SortMergeJoin [codegen id : 14] +Left keys [2]: [ss_ticket_number#22, ss_item_sk#21] +Right keys [2]: [sr_ticket_number#34, sr_item_sk#33] +Join type: LeftOuter +Join condition: None + +(45) Project [codegen id : 14] +Output [7]: [d_year#32, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, (ss_quantity#23 - coalesce(sr_return_quantity#35, 0)) AS sales_cnt#38, (ss_ext_sales_price#24 - coalesce(sr_return_amt#36, 0.00)) AS sales_amt#39] +Input [13]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32, sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] + +(46) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#44)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 17] +Input [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] + +(48) Filter [codegen id : 17] +Input [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Condition : isnotnull(ws_item_sk#40) + +(49) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(50) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#40] +Right keys [1]: [i_item_sk#45] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 17] +Output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] +Input [10]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(52) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#50, d_year#51] + +(53) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#44] +Right keys [1]: [d_date_sk#50] +Join type: Inner +Join condition: None + +(54) Project [codegen id : 17] +Output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Input [11]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_date_sk#50, d_year#51] + +(55) Exchange +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: hashpartitioning(ws_order_number#41, ws_item_sk#40, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(56) Sort [codegen id : 18] +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: [ws_order_number#41 ASC NULLS FIRST, ws_item_sk#40 ASC NULLS FIRST], false, 0 + +(57) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 19] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] + +(59) Filter [codegen id : 19] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Condition : (isnotnull(wr_order_number#53) AND isnotnull(wr_item_sk#52)) + +(60) Project [codegen id : 19] +Output [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] + +(61) Exchange +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: hashpartitioning(wr_order_number#53, wr_item_sk#52, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(62) Sort [codegen id : 20] +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: [wr_order_number#53 ASC NULLS FIRST, wr_item_sk#52 ASC NULLS FIRST], false, 0 + +(63) SortMergeJoin [codegen id : 21] +Left keys [2]: [ws_order_number#41, ws_item_sk#40] +Right keys [2]: [wr_order_number#53, wr_item_sk#52] +Join type: LeftOuter +Join condition: None + +(64) Project [codegen id : 21] +Output [7]: [d_year#51, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, (ws_quantity#42 - coalesce(wr_return_quantity#54, 0)) AS sales_cnt#57, (ws_ext_sales_price#43 - coalesce(wr_return_amt#55, 0.00)) AS sales_amt#58] +Input [13]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51, wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] + +(65) Union + +(66) HashAggregate [codegen id : 22] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] + +(67) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(68) HashAggregate [codegen id : 23] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] + +(69) HashAggregate [codegen id : 23] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum#59, sum#60] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] + +(70) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(71) HashAggregate [codegen id : 24] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum(sales_cnt#19)#63, sum(UnscaledValue(sales_amt#20))#64] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(sales_cnt#19)#63 AS sales_cnt#65, MakeDecimal(sum(UnscaledValue(sales_amt#20))#64,18,2) AS sales_amt#66] + +(72) Filter [codegen id : 24] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Condition : isnotnull(sales_cnt#65) + +(73) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(74) Sort [codegen id : 25] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0 + +(75) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#71)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 28] +Input [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] + +(77) Filter [codegen id : 28] +Input [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] +Condition : isnotnull(cs_item_sk#67) + +(78) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#72, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] + +(79) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [cs_item_sk#67] +Right keys [1]: [i_item_sk#72] +Join type: Inner +Join condition: None + +(80) Project [codegen id : 28] +Output [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Input [10]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_item_sk#72, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] + +(81) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#77, d_year#78] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(82) ColumnarToRow [codegen id : 27] +Input [2]: [d_date_sk#77, d_year#78] + +(83) Filter [codegen id : 27] +Input [2]: [d_date_sk#77, d_year#78] +Condition : ((isnotnull(d_year#78) AND (d_year#78 = 2001)) AND isnotnull(d_date_sk#77)) + +(84) BroadcastExchange +Input [2]: [d_date_sk#77, d_year#78] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(85) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [cs_sold_date_sk#71] +Right keys [1]: [d_date_sk#77] +Join type: Inner +Join condition: None + +(86) Project [codegen id : 28] +Output [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Input [11]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_date_sk#77, d_year#78] + +(87) Exchange +Input [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Arguments: hashpartitioning(cs_order_number#68, cs_item_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(88) Sort [codegen id : 29] +Input [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Arguments: [cs_order_number#68 ASC NULLS FIRST, cs_item_sk#67 ASC NULLS FIRST], false, 0 + +(89) ReusedExchange [Reuses operator id: 23] +Output [4]: [cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] + +(90) Sort [codegen id : 31] +Input [4]: [cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] +Arguments: [cr_order_number#80 ASC NULLS FIRST, cr_item_sk#79 ASC NULLS FIRST], false, 0 + +(91) SortMergeJoin [codegen id : 32] +Left keys [2]: [cs_order_number#68, cs_item_sk#67] +Right keys [2]: [cr_order_number#80, cr_item_sk#79] +Join type: LeftOuter +Join condition: None + +(92) Project [codegen id : 32] +Output [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, (cs_quantity#69 - coalesce(cr_return_quantity#81, 0)) AS sales_cnt#19, (cs_ext_sales_price#70 - coalesce(cr_return_amount#82, 0.00)) AS sales_amt#20] +Input [13]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78, cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] + +(93) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#87)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(94) ColumnarToRow [codegen id : 35] +Input [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] + +(95) Filter [codegen id : 35] +Input [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] +Condition : isnotnull(ss_item_sk#83) + +(96) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] + +(97) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ss_item_sk#83] +Right keys [1]: [i_item_sk#88] +Join type: Inner +Join condition: None + +(98) Project [codegen id : 35] +Output [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] +Input [10]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] + +(99) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#93, d_year#94] + +(100) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ss_sold_date_sk#87] +Right keys [1]: [d_date_sk#93] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 35] +Output [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Input [11]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_date_sk#93, d_year#94] + +(102) Exchange +Input [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Arguments: hashpartitioning(ss_ticket_number#84, ss_item_sk#83, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(103) Sort [codegen id : 36] +Input [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Arguments: [ss_ticket_number#84 ASC NULLS FIRST, ss_item_sk#83 ASC NULLS FIRST], false, 0 + +(104) ReusedExchange [Reuses operator id: 42] +Output [4]: [sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] + +(105) Sort [codegen id : 38] +Input [4]: [sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] +Arguments: [sr_ticket_number#96 ASC NULLS FIRST, sr_item_sk#95 ASC NULLS FIRST], false, 0 + +(106) SortMergeJoin [codegen id : 39] +Left keys [2]: [ss_ticket_number#84, ss_item_sk#83] +Right keys [2]: [sr_ticket_number#96, sr_item_sk#95] +Join type: LeftOuter +Join condition: None + +(107) Project [codegen id : 39] +Output [7]: [d_year#94, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, (ss_quantity#85 - coalesce(sr_return_quantity#97, 0)) AS sales_cnt#38, (ss_ext_sales_price#86 - coalesce(sr_return_amt#98, 0.00)) AS sales_amt#39] +Input [13]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94, sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] + +(108) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#103)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(109) ColumnarToRow [codegen id : 42] +Input [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] + +(110) Filter [codegen id : 42] +Input [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] +Condition : isnotnull(ws_item_sk#99) + +(111) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#104, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] + +(112) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_item_sk#99] +Right keys [1]: [i_item_sk#104] +Join type: Inner +Join condition: None + +(113) Project [codegen id : 42] +Output [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] +Input [10]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_item_sk#104, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] + +(114) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#109, d_year#110] + +(115) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_sold_date_sk#103] +Right keys [1]: [d_date_sk#109] +Join type: Inner +Join condition: None + +(116) Project [codegen id : 42] +Output [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Input [11]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_date_sk#109, d_year#110] + +(117) Exchange +Input [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Arguments: hashpartitioning(ws_order_number#100, ws_item_sk#99, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(118) Sort [codegen id : 43] +Input [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Arguments: [ws_order_number#100 ASC NULLS FIRST, ws_item_sk#99 ASC NULLS FIRST], false, 0 + +(119) ReusedExchange [Reuses operator id: 61] +Output [4]: [wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] + +(120) Sort [codegen id : 45] +Input [4]: [wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] +Arguments: [wr_order_number#112 ASC NULLS FIRST, wr_item_sk#111 ASC NULLS FIRST], false, 0 + +(121) SortMergeJoin [codegen id : 46] +Left keys [2]: [ws_order_number#100, ws_item_sk#99] +Right keys [2]: [wr_order_number#112, wr_item_sk#111] +Join type: LeftOuter +Join condition: None + +(122) Project [codegen id : 46] +Output [7]: [d_year#110, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, (ws_quantity#101 - coalesce(wr_return_quantity#113, 0)) AS sales_cnt#57, (ws_ext_sales_price#102 - coalesce(wr_return_amt#114, 0.00)) AS sales_amt#58] +Input [13]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110, wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] + +(123) Union + +(124) HashAggregate [codegen id : 47] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] + +(125) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(126) HashAggregate [codegen id : 48] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] + +(127) HashAggregate [codegen id : 48] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum#59, sum#115] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] + +(128) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] +Arguments: hashpartitioning(d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, 5), ENSURE_REQUIREMENTS, [plan_id=17] + +(129) HashAggregate [codegen id : 49] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] +Keys [5]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum(sales_cnt#19)#63, sum(UnscaledValue(sales_amt#20))#64] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum(sales_cnt#19)#63 AS sales_cnt#117, MakeDecimal(sum(UnscaledValue(sales_amt#20))#64,18,2) AS sales_amt#118] + +(130) Filter [codegen id : 49] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Condition : isnotnull(sales_cnt#117) + +(131) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Arguments: hashpartitioning(i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, 5), ENSURE_REQUIREMENTS, [plan_id=18] + +(132) Sort [codegen id : 50] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Arguments: [i_brand_id#73 ASC NULLS FIRST, i_class_id#74 ASC NULLS FIRST, i_category_id#75 ASC NULLS FIRST, i_manufact_id#76 ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin [codegen id : 51] +Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right keys [4]: [i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Join type: Inner +Join condition: ((cast(sales_cnt#65 as decimal(17,2)) / cast(sales_cnt#117 as decimal(17,2))) < 0.90000000000000000000) + +(134) Project [codegen id : 51] +Output [10]: [d_year#78 AS prev_year#119, d_year#13 AS year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#117 AS prev_yr_cnt#121, sales_cnt#65 AS curr_yr_cnt#122, (sales_cnt#65 - sales_cnt#117) AS sales_cnt_diff#123, (sales_amt#66 - sales_amt#118) AS sales_amt_diff#124] +Input [14]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66, d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] + +(135) TakeOrderedAndProject +Input [10]: [prev_year#119, year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#121, curr_yr_cnt#122, sales_cnt_diff#123, sales_amt_diff#124] +Arguments: 100, [sales_cnt_diff#123 ASC NULLS FIRST], [prev_year#119, year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#121, curr_yr_cnt#122, sales_cnt_diff#123, sales_amt_diff#124] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..f74420a1ad --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.native_iceberg_compat/simplified.txt @@ -0,0 +1,232 @@ +TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_amt_diff] + WholeStageCodegen (51) + Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt] + SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt] + InputAdapter + WholeStageCodegen (25) + Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 + WholeStageCodegen (24) + Filter [sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 + WholeStageCodegen (23) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 + WholeStageCodegen (22) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Union + WholeStageCodegen (7) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #4 + WholeStageCodegen (3) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (6) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #7 + WholeStageCodegen (5) + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #8 + WholeStageCodegen (10) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + WholeStageCodegen (13) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #9 + WholeStageCodegen (12) + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + WholeStageCodegen (21) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (18) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #10 + WholeStageCodegen (17) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + WholeStageCodegen (20) + Sort [wr_order_number,wr_item_sk] + InputAdapter + Exchange [wr_order_number,wr_item_sk] #11 + WholeStageCodegen (19) + Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Filter [wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + InputAdapter + WholeStageCodegen (50) + Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12 + WholeStageCodegen (49) + Filter [sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13 + WholeStageCodegen (48) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14 + WholeStageCodegen (47) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Union + WholeStageCodegen (32) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (29) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #15 + WholeStageCodegen (28) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (27) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (31) + Sort [cr_order_number,cr_item_sk] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + WholeStageCodegen (39) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (36) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #17 + WholeStageCodegen (35) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #16 + InputAdapter + WholeStageCodegen (38) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9 + WholeStageCodegen (46) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (43) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #18 + WholeStageCodegen (42) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #16 + InputAdapter + WholeStageCodegen (45) + Sort [wr_order_number,wr_item_sk] + InputAdapter + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_datafusion/explain.txt new file mode 100644 index 0000000000..68694e2373 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_datafusion/explain.txt @@ -0,0 +1,218 @@ +== Physical Plan == +TakeOrderedAndProject (38) ++- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- Union (34) + :- * Project (15) + : +- * BroadcastHashJoin Inner BuildRight (14) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.item (4) + : +- BroadcastExchange (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet spark_catalog.default.date_dim (10) + :- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet spark_catalog.default.web_sales (16) + : : +- ReusedExchange (19) + : +- ReusedExchange (22) + +- * Project (33) + +- * BroadcastHashJoin Inner BuildRight (32) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildRight (29) + : :- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet spark_catalog.default.catalog_sales (25) + : +- ReusedExchange (28) + +- ReusedExchange (31) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#5, i_category#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#5, i_category#6] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#5, i_category#6] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [2]: [i_item_sk#5, i_category#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6] +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_item_sk#5, i_category#6] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Condition : isnotnull(d_date_sk#7) + +(13) BroadcastExchange +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#7] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [6]: [store AS channel#10, ss_store_sk#2 AS col_name#11, d_year#8, d_qoy#9, i_category#6, ss_ext_sales_price#3 AS ext_sales_price#12] +Input [7]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6, d_date_sk#7, d_year#8, d_qoy#9] + +(16) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#16)] +PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 6] +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] + +(18) Filter [codegen id : 6] +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Condition : (isnull(ws_ship_customer_sk#14) AND isnotnull(ws_item_sk#13)) + +(19) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#17, i_category#18] + +(20) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#13] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 6] +Output [4]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18] +Input [6]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_item_sk#17, i_category#18] + +(22) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#19, d_year#20, d_qoy#21] + +(23) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#16] +Right keys [1]: [d_date_sk#19] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 6] +Output [6]: [web AS channel#22, ws_ship_customer_sk#14 AS col_name#23, d_year#20, d_qoy#21, i_category#18, ws_ext_sales_price#15 AS ext_sales_price#24] +Input [7]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18, d_date_sk#19, d_year#20, d_qoy#21] + +(25) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#28)] +PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 9] +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] + +(27) Filter [codegen id : 9] +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Condition : (isnull(cs_ship_addr_sk#25) AND isnotnull(cs_item_sk#26)) + +(28) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#29, i_category#30] + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#26] +Right keys [1]: [i_item_sk#29] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 9] +Output [4]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30] +Input [6]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28, i_item_sk#29, i_category#30] + +(31) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#31, d_year#32, d_qoy#33] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#28] +Right keys [1]: [d_date_sk#31] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [6]: [catalog AS channel#34, cs_ship_addr_sk#25 AS col_name#35, d_year#32, d_qoy#33, i_category#30, cs_ext_sales_price#27 AS ext_sales_price#36] +Input [7]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30, d_date_sk#31, d_year#32, d_qoy#33] + +(34) Union + +(35) HashAggregate [codegen id : 10] +Input [6]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#12))] +Aggregate Attributes [2]: [count#37, sum#38] +Results [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] + +(36) Exchange +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] +Arguments: hashpartitioning(channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(37) HashAggregate [codegen id : 11] +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#12))] +Aggregate Attributes [2]: [count(1)#41, sum(UnscaledValue(ext_sales_price#12))#42] +Results [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count(1)#41 AS sales_cnt#43, MakeDecimal(sum(UnscaledValue(ext_sales_price#12))#42,17,2) AS sales_amt#44] + +(38) TakeOrderedAndProject +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#43, sales_amt#44] +Arguments: 100, [channel#10 ASC NULLS FIRST, col_name#11 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#9 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#43, sales_amt#44] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2d208fdbb2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_datafusion/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_amt] + WholeStageCodegen (11) + HashAggregate [channel,col_name,d_year,d_qoy,i_category,count,sum] [count(1),sum(UnscaledValue(ext_sales_price)),sales_cnt,sales_amt,count,sum] + InputAdapter + Exchange [channel,col_name,d_year,d_qoy,i_category] #1 + WholeStageCodegen (10) + HashAggregate [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] [count,sum,count,sum] + InputAdapter + Union + WholeStageCodegen (3) + Project [ss_store_sk,d_year,d_qoy,i_category,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + WholeStageCodegen (6) + Project [ws_ship_customer_sk,d_year,d_qoy,i_category,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_ship_customer_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_category] #2 + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] #3 + WholeStageCodegen (9) + Project [cs_ship_addr_sk,d_year,d_qoy,i_category,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ship_addr_sk,cs_ext_sales_price,cs_sold_date_sk,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_ship_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_category] #2 + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..68694e2373 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_iceberg_compat/explain.txt @@ -0,0 +1,218 @@ +== Physical Plan == +TakeOrderedAndProject (38) ++- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- Union (34) + :- * Project (15) + : +- * BroadcastHashJoin Inner BuildRight (14) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.item (4) + : +- BroadcastExchange (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet spark_catalog.default.date_dim (10) + :- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet spark_catalog.default.web_sales (16) + : : +- ReusedExchange (19) + : +- ReusedExchange (22) + +- * Project (33) + +- * BroadcastHashJoin Inner BuildRight (32) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildRight (29) + : :- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet spark_catalog.default.catalog_sales (25) + : +- ReusedExchange (28) + +- ReusedExchange (31) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4] +Condition : (isnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#5, i_category#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#5, i_category#6] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#5, i_category#6] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [2]: [i_item_sk#5, i_category#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6] +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_item_sk#5, i_category#6] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Condition : isnotnull(d_date_sk#7) + +(13) BroadcastExchange +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#7] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [6]: [store AS channel#10, ss_store_sk#2 AS col_name#11, d_year#8, d_qoy#9, i_category#6, ss_ext_sales_price#3 AS ext_sales_price#12] +Input [7]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6, d_date_sk#7, d_year#8, d_qoy#9] + +(16) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#16)] +PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 6] +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] + +(18) Filter [codegen id : 6] +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Condition : (isnull(ws_ship_customer_sk#14) AND isnotnull(ws_item_sk#13)) + +(19) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#17, i_category#18] + +(20) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#13] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 6] +Output [4]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18] +Input [6]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_item_sk#17, i_category#18] + +(22) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#19, d_year#20, d_qoy#21] + +(23) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#16] +Right keys [1]: [d_date_sk#19] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 6] +Output [6]: [web AS channel#22, ws_ship_customer_sk#14 AS col_name#23, d_year#20, d_qoy#21, i_category#18, ws_ext_sales_price#15 AS ext_sales_price#24] +Input [7]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18, d_date_sk#19, d_year#20, d_qoy#21] + +(25) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#28)] +PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 9] +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] + +(27) Filter [codegen id : 9] +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Condition : (isnull(cs_ship_addr_sk#25) AND isnotnull(cs_item_sk#26)) + +(28) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#29, i_category#30] + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#26] +Right keys [1]: [i_item_sk#29] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 9] +Output [4]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30] +Input [6]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28, i_item_sk#29, i_category#30] + +(31) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#31, d_year#32, d_qoy#33] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#28] +Right keys [1]: [d_date_sk#31] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [6]: [catalog AS channel#34, cs_ship_addr_sk#25 AS col_name#35, d_year#32, d_qoy#33, i_category#30, cs_ext_sales_price#27 AS ext_sales_price#36] +Input [7]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30, d_date_sk#31, d_year#32, d_qoy#33] + +(34) Union + +(35) HashAggregate [codegen id : 10] +Input [6]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#12))] +Aggregate Attributes [2]: [count#37, sum#38] +Results [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] + +(36) Exchange +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] +Arguments: hashpartitioning(channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(37) HashAggregate [codegen id : 11] +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#12))] +Aggregate Attributes [2]: [count(1)#41, sum(UnscaledValue(ext_sales_price#12))#42] +Results [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count(1)#41 AS sales_cnt#43, MakeDecimal(sum(UnscaledValue(ext_sales_price#12))#42,17,2) AS sales_amt#44] + +(38) TakeOrderedAndProject +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#43, sales_amt#44] +Arguments: 100, [channel#10 ASC NULLS FIRST, col_name#11 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#9 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#43, sales_amt#44] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..2d208fdbb2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.native_iceberg_compat/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_amt] + WholeStageCodegen (11) + HashAggregate [channel,col_name,d_year,d_qoy,i_category,count,sum] [count(1),sum(UnscaledValue(ext_sales_price)),sales_cnt,sales_amt,count,sum] + InputAdapter + Exchange [channel,col_name,d_year,d_qoy,i_category] #1 + WholeStageCodegen (10) + HashAggregate [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] [count,sum,count,sum] + InputAdapter + Union + WholeStageCodegen (3) + Project [ss_store_sk,d_year,d_qoy,i_category,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_sold_date_sk,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + WholeStageCodegen (6) + Project [ws_ship_customer_sk,d_year,d_qoy,i_category,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_ship_customer_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_category] #2 + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] #3 + WholeStageCodegen (9) + Project [cs_ship_addr_sk,d_year,d_qoy,i_category,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ship_addr_sk,cs_ext_sales_price,cs_sold_date_sk,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_ship_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_category] #2 + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_datafusion/explain.txt new file mode 100644 index 0000000000..6d88eef211 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_datafusion/explain.txt @@ -0,0 +1,527 @@ +== Physical Plan == +TakeOrderedAndProject (89) ++- * HashAggregate (88) + +- Exchange (87) + +- * HashAggregate (86) + +- * Expand (85) + +- Union (84) + :- * Project (34) + : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (32) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet spark_catalog.default.store_returns (20) + : : +- ReusedExchange (23) + : +- ReusedExchange (26) + :- * Project (53) + : +- * BroadcastNestedLoopJoin Inner BuildLeft (52) + : :- BroadcastExchange (43) + : : +- * HashAggregate (42) + : : +- Exchange (41) + : : +- * HashAggregate (40) + : : +- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * ColumnarToRow (36) + : : : +- Scan parquet spark_catalog.default.catalog_sales (35) + : : +- ReusedExchange (37) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * ColumnarToRow (45) + : : +- Scan parquet spark_catalog.default.catalog_returns (44) + : +- ReusedExchange (46) + +- * Project (83) + +- * BroadcastHashJoin LeftOuter BuildRight (82) + :- * HashAggregate (68) + : +- Exchange (67) + : +- * HashAggregate (66) + : +- * Project (65) + : +- * BroadcastHashJoin Inner BuildRight (64) + : :- * Project (59) + : : +- * BroadcastHashJoin Inner BuildRight (58) + : : :- * Filter (56) + : : : +- * ColumnarToRow (55) + : : : +- Scan parquet spark_catalog.default.web_sales (54) + : : +- ReusedExchange (57) + : +- BroadcastExchange (63) + : +- * Filter (62) + : +- * ColumnarToRow (61) + : +- Scan parquet spark_catalog.default.web_page (60) + +- BroadcastExchange (81) + +- * HashAggregate (80) + +- Exchange (79) + +- * HashAggregate (78) + +- * Project (77) + +- * BroadcastHashJoin Inner BuildRight (76) + :- * Project (74) + : +- * BroadcastHashJoin Inner BuildRight (73) + : :- * Filter (71) + : : +- * ColumnarToRow (70) + : : +- Scan parquet spark_catalog.default.web_returns (69) + : +- ReusedExchange (72) + +- ReusedExchange (75) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 2000-08-03)) AND (d_date#6 <= 2000-09-02)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_date#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] +Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#7] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#7] +Condition : isnotnull(s_store_sk#7) + +(14) BroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Keys [1]: [s_store_sk#7] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#8, sum#9] +Results [3]: [s_store_sk#7, sum#10, sum#11] + +(18) Exchange +Input [3]: [s_store_sk#7, sum#10, sum#11] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 8] +Input [3]: [s_store_sk#7, sum#10, sum#11] +Keys [1]: [s_store_sk#7] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13] +Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15] + +(20) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#19)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] + +(22) Filter [codegen id : 6] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Condition : isnotnull(sr_store_sk#16) + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#20] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [d_date_sk#20] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [3]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18] +Input [5]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19, d_date_sk#20] + +(26) ReusedExchange [Reuses operator id: 14] +Output [1]: [s_store_sk#21] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_store_sk#16] +Right keys [1]: [s_store_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, s_store_sk#21] + +(29) HashAggregate [codegen id : 6] +Input [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Keys [1]: [s_store_sk#21] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#17)), partial_sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum#22, sum#23] +Results [3]: [s_store_sk#21, sum#24, sum#25] + +(30) Exchange +Input [3]: [s_store_sk#21, sum#24, sum#25] +Arguments: hashpartitioning(s_store_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 7] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Keys [1]: [s_store_sk#21] +Functions [2]: [sum(UnscaledValue(sr_return_amt#17)), sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#17))#26, sum(UnscaledValue(sr_net_loss#18))#27] +Results [3]: [s_store_sk#21, MakeDecimal(sum(UnscaledValue(sr_return_amt#17))#26,17,2) AS returns#28, MakeDecimal(sum(UnscaledValue(sr_net_loss#18))#27,17,2) AS profit_loss#29] + +(32) BroadcastExchange +Input [3]: [s_store_sk#21, returns#28, profit_loss#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_store_sk#7] +Right keys [1]: [s_store_sk#21] +Join type: LeftOuter +Join condition: None + +(34) Project [codegen id : 8] +Output [5]: [sales#14, coalesce(returns#28, 0.00) AS returns#30, (profit#15 - coalesce(profit_loss#29, 0.00)) AS profit#31, store channel AS channel#32, s_store_sk#7 AS id#33] +Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29] + +(35) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#37)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] + +(37) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#38] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Input [5]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37, d_date_sk#38] + +(40) HashAggregate [codegen id : 10] +Input [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum#39, sum#40] +Results [3]: [cs_call_center_sk#34, sum#41, sum#42] + +(41) Exchange +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Arguments: hashpartitioning(cs_call_center_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(42) HashAggregate [codegen id : 11] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44] +Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46] + +(43) BroadcastExchange +Input [3]: [cs_call_center_sk#34, sales#45, profit#46] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(44) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#49)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 13] +Input [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] + +(46) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#50] + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cr_returned_date_sk#49] +Right keys [1]: [d_date_sk#50] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 13] +Output [2]: [cr_return_amount#47, cr_net_loss#48] +Input [4]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49, d_date_sk#50] + +(49) HashAggregate [codegen id : 13] +Input [2]: [cr_return_amount#47, cr_net_loss#48] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#47)), partial_sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [2]: [sum#53, sum#54] + +(50) Exchange +Input [2]: [sum#53, sum#54] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate +Input [2]: [sum#53, sum#54] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#47)), sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(UnscaledValue(cr_net_loss#48))#56] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58] + +(52) BroadcastNestedLoopJoin [codegen id : 14] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 14] +Output [5]: [sales#45, returns#57, (profit#46 - profit_loss#58) AS profit#59, catalog channel AS channel#60, cs_call_center_sk#34 AS id#61] +Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58] + +(54) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#65)] +PushedFilters: [IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 17] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] + +(56) Filter [codegen id : 17] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Condition : isnotnull(ws_web_page_sk#62) + +(57) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#66] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 17] +Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64] +Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66] + +(60) Scan parquet spark_catalog.default.web_page +Output [1]: [wp_web_page_sk#67] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 16] +Input [1]: [wp_web_page_sk#67] + +(62) Filter [codegen id : 16] +Input [1]: [wp_web_page_sk#67] +Condition : isnotnull(wp_web_page_sk#67) + +(63) BroadcastExchange +Input [1]: [wp_web_page_sk#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(64) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_web_page_sk#62] +Right keys [1]: [wp_web_page_sk#67] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 17] +Output [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] + +(66) HashAggregate [codegen id : 17] +Input [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#63)), partial_sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum#68, sum#69] +Results [3]: [wp_web_page_sk#67, sum#70, sum#71] + +(67) Exchange +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Arguments: hashpartitioning(wp_web_page_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(68) HashAggregate [codegen id : 22] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73] +Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75] + +(69) Scan parquet spark_catalog.default.web_returns +Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#79)] +PushedFilters: [IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(70) ColumnarToRow [codegen id : 20] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] + +(71) Filter [codegen id : 20] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Condition : isnotnull(wr_web_page_sk#76) + +(72) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#80] + +(73) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [wr_returned_date_sk#79] +Right keys [1]: [d_date_sk#80] +Join type: Inner +Join condition: None + +(74) Project [codegen id : 20] +Output [3]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78] +Input [5]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79, d_date_sk#80] + +(75) ReusedExchange [Reuses operator id: 63] +Output [1]: [wp_web_page_sk#81] + +(76) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [wr_web_page_sk#76] +Right keys [1]: [wp_web_page_sk#81] +Join type: Inner +Join condition: None + +(77) Project [codegen id : 20] +Output [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] + +(78) HashAggregate [codegen id : 20] +Input [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#77)), partial_sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum#82, sum#83] +Results [3]: [wp_web_page_sk#81, sum#84, sum#85] + +(79) Exchange +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Arguments: hashpartitioning(wp_web_page_sk#81, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(80) HashAggregate [codegen id : 21] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [sum(UnscaledValue(wr_return_amt#77)), sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#77))#86, sum(UnscaledValue(wr_net_loss#78))#87] +Results [3]: [wp_web_page_sk#81, MakeDecimal(sum(UnscaledValue(wr_return_amt#77))#86,17,2) AS returns#88, MakeDecimal(sum(UnscaledValue(wr_net_loss#78))#87,17,2) AS profit_loss#89] + +(81) BroadcastExchange +Input [3]: [wp_web_page_sk#81, returns#88, profit_loss#89] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(82) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [wp_web_page_sk#67] +Right keys [1]: [wp_web_page_sk#81] +Join type: LeftOuter +Join condition: None + +(83) Project [codegen id : 22] +Output [5]: [sales#74, coalesce(returns#88, 0.00) AS returns#90, (profit#75 - coalesce(profit_loss#89, 0.00)) AS profit#91, web channel AS channel#92, wp_web_page_sk#67 AS id#93] +Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89] + +(84) Union + +(85) Expand [codegen id : 23] +Input [5]: [sales#14, returns#30, profit#31, channel#32, id#33] +Arguments: [[sales#14, returns#30, profit#31, channel#32, id#33, 0], [sales#14, returns#30, profit#31, channel#32, null, 1], [sales#14, returns#30, profit#31, null, null, 3]], [sales#14, returns#30, profit#31, channel#94, id#95, spark_grouping_id#96] + +(86) HashAggregate [codegen id : 23] +Input [6]: [sales#14, returns#30, profit#31, channel#94, id#95, spark_grouping_id#96] +Keys [3]: [channel#94, id#95, spark_grouping_id#96] +Functions [3]: [partial_sum(sales#14), partial_sum(returns#30), partial_sum(profit#31)] +Aggregate Attributes [6]: [sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Results [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] + +(87) Exchange +Input [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Arguments: hashpartitioning(channel#94, id#95, spark_grouping_id#96, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(88) HashAggregate [codegen id : 24] +Input [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Keys [3]: [channel#94, id#95, spark_grouping_id#96] +Functions [3]: [sum(sales#14), sum(returns#30), sum(profit#31)] +Aggregate Attributes [3]: [sum(sales#14)#109, sum(returns#30)#110, sum(profit#31)#111] +Results [5]: [channel#94, id#95, sum(sales#14)#109 AS sales#112, sum(returns#30)#110 AS returns#113, sum(profit#31)#111 AS profit#114] + +(89) TakeOrderedAndProject +Input [5]: [channel#94, id#95, sales#112, returns#113, profit#114] +Arguments: 100, [channel#94 ASC NULLS FIRST, id#95 ASC NULLS FIRST], [channel#94, id#95, sales#112, returns#113, profit#114] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b813daff14 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_datafusion/simplified.txt @@ -0,0 +1,136 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (24) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (23) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (8) + Project [sales,returns,profit,profit_loss,s_store_sk] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [s_store_sk] #2 + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,s_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange [s_store_sk] #6 + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum] + Project [sr_return_amt,sr_net_loss,s_store_sk] + BroadcastHashJoin [sr_store_sk,s_store_sk] + Project [sr_store_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [s_store_sk] #4 + WholeStageCodegen (14) + Project [sales,returns,profit,profit_loss,cs_call_center_sk] + BroadcastNestedLoopJoin + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [cs_call_center_sk] #8 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange #9 + WholeStageCodegen (13) + HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] + Project [cr_return_amount,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (22) + Project [sales,returns,profit,profit_loss,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [wp_web_page_sk] #10 + WholeStageCodegen (17) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (16) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange [wp_web_page_sk] #13 + WholeStageCodegen (20) + HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum] + Project [wr_return_amt,wr_net_loss,wp_web_page_sk] + BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] + Project [wr_web_page_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [wp_web_page_sk] #11 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..6d88eef211 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_iceberg_compat/explain.txt @@ -0,0 +1,527 @@ +== Physical Plan == +TakeOrderedAndProject (89) ++- * HashAggregate (88) + +- Exchange (87) + +- * HashAggregate (86) + +- * Expand (85) + +- Union (84) + :- * Project (34) + : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (32) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet spark_catalog.default.store_returns (20) + : : +- ReusedExchange (23) + : +- ReusedExchange (26) + :- * Project (53) + : +- * BroadcastNestedLoopJoin Inner BuildLeft (52) + : :- BroadcastExchange (43) + : : +- * HashAggregate (42) + : : +- Exchange (41) + : : +- * HashAggregate (40) + : : +- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * ColumnarToRow (36) + : : : +- Scan parquet spark_catalog.default.catalog_sales (35) + : : +- ReusedExchange (37) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * ColumnarToRow (45) + : : +- Scan parquet spark_catalog.default.catalog_returns (44) + : +- ReusedExchange (46) + +- * Project (83) + +- * BroadcastHashJoin LeftOuter BuildRight (82) + :- * HashAggregate (68) + : +- Exchange (67) + : +- * HashAggregate (66) + : +- * Project (65) + : +- * BroadcastHashJoin Inner BuildRight (64) + : :- * Project (59) + : : +- * BroadcastHashJoin Inner BuildRight (58) + : : :- * Filter (56) + : : : +- * ColumnarToRow (55) + : : : +- Scan parquet spark_catalog.default.web_sales (54) + : : +- ReusedExchange (57) + : +- BroadcastExchange (63) + : +- * Filter (62) + : +- * ColumnarToRow (61) + : +- Scan parquet spark_catalog.default.web_page (60) + +- BroadcastExchange (81) + +- * HashAggregate (80) + +- Exchange (79) + +- * HashAggregate (78) + +- * Project (77) + +- * BroadcastHashJoin Inner BuildRight (76) + :- * Project (74) + : +- * BroadcastHashJoin Inner BuildRight (73) + : :- * Filter (71) + : : +- * ColumnarToRow (70) + : : +- Scan parquet spark_catalog.default.web_returns (69) + : +- ReusedExchange (72) + +- ReusedExchange (75) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 2000-08-03)) AND (d_date#6 <= 2000-09-02)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_date#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] +Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#7] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#7] +Condition : isnotnull(s_store_sk#7) + +(14) BroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Keys [1]: [s_store_sk#7] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#8, sum#9] +Results [3]: [s_store_sk#7, sum#10, sum#11] + +(18) Exchange +Input [3]: [s_store_sk#7, sum#10, sum#11] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 8] +Input [3]: [s_store_sk#7, sum#10, sum#11] +Keys [1]: [s_store_sk#7] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13] +Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15] + +(20) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#19)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] + +(22) Filter [codegen id : 6] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Condition : isnotnull(sr_store_sk#16) + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#20] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [d_date_sk#20] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [3]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18] +Input [5]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19, d_date_sk#20] + +(26) ReusedExchange [Reuses operator id: 14] +Output [1]: [s_store_sk#21] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_store_sk#16] +Right keys [1]: [s_store_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, s_store_sk#21] + +(29) HashAggregate [codegen id : 6] +Input [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Keys [1]: [s_store_sk#21] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#17)), partial_sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum#22, sum#23] +Results [3]: [s_store_sk#21, sum#24, sum#25] + +(30) Exchange +Input [3]: [s_store_sk#21, sum#24, sum#25] +Arguments: hashpartitioning(s_store_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 7] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Keys [1]: [s_store_sk#21] +Functions [2]: [sum(UnscaledValue(sr_return_amt#17)), sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#17))#26, sum(UnscaledValue(sr_net_loss#18))#27] +Results [3]: [s_store_sk#21, MakeDecimal(sum(UnscaledValue(sr_return_amt#17))#26,17,2) AS returns#28, MakeDecimal(sum(UnscaledValue(sr_net_loss#18))#27,17,2) AS profit_loss#29] + +(32) BroadcastExchange +Input [3]: [s_store_sk#21, returns#28, profit_loss#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_store_sk#7] +Right keys [1]: [s_store_sk#21] +Join type: LeftOuter +Join condition: None + +(34) Project [codegen id : 8] +Output [5]: [sales#14, coalesce(returns#28, 0.00) AS returns#30, (profit#15 - coalesce(profit_loss#29, 0.00)) AS profit#31, store channel AS channel#32, s_store_sk#7 AS id#33] +Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29] + +(35) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#37)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] + +(37) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#38] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Input [5]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37, d_date_sk#38] + +(40) HashAggregate [codegen id : 10] +Input [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum#39, sum#40] +Results [3]: [cs_call_center_sk#34, sum#41, sum#42] + +(41) Exchange +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Arguments: hashpartitioning(cs_call_center_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(42) HashAggregate [codegen id : 11] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44] +Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46] + +(43) BroadcastExchange +Input [3]: [cs_call_center_sk#34, sales#45, profit#46] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(44) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#49)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 13] +Input [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] + +(46) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#50] + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cr_returned_date_sk#49] +Right keys [1]: [d_date_sk#50] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 13] +Output [2]: [cr_return_amount#47, cr_net_loss#48] +Input [4]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49, d_date_sk#50] + +(49) HashAggregate [codegen id : 13] +Input [2]: [cr_return_amount#47, cr_net_loss#48] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#47)), partial_sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [2]: [sum#53, sum#54] + +(50) Exchange +Input [2]: [sum#53, sum#54] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate +Input [2]: [sum#53, sum#54] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#47)), sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(UnscaledValue(cr_net_loss#48))#56] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58] + +(52) BroadcastNestedLoopJoin [codegen id : 14] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 14] +Output [5]: [sales#45, returns#57, (profit#46 - profit_loss#58) AS profit#59, catalog channel AS channel#60, cs_call_center_sk#34 AS id#61] +Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58] + +(54) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#65)] +PushedFilters: [IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 17] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] + +(56) Filter [codegen id : 17] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Condition : isnotnull(ws_web_page_sk#62) + +(57) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#66] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 17] +Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64] +Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66] + +(60) Scan parquet spark_catalog.default.web_page +Output [1]: [wp_web_page_sk#67] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 16] +Input [1]: [wp_web_page_sk#67] + +(62) Filter [codegen id : 16] +Input [1]: [wp_web_page_sk#67] +Condition : isnotnull(wp_web_page_sk#67) + +(63) BroadcastExchange +Input [1]: [wp_web_page_sk#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(64) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_web_page_sk#62] +Right keys [1]: [wp_web_page_sk#67] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 17] +Output [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] + +(66) HashAggregate [codegen id : 17] +Input [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#63)), partial_sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum#68, sum#69] +Results [3]: [wp_web_page_sk#67, sum#70, sum#71] + +(67) Exchange +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Arguments: hashpartitioning(wp_web_page_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(68) HashAggregate [codegen id : 22] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73] +Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75] + +(69) Scan parquet spark_catalog.default.web_returns +Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#79)] +PushedFilters: [IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(70) ColumnarToRow [codegen id : 20] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] + +(71) Filter [codegen id : 20] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Condition : isnotnull(wr_web_page_sk#76) + +(72) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#80] + +(73) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [wr_returned_date_sk#79] +Right keys [1]: [d_date_sk#80] +Join type: Inner +Join condition: None + +(74) Project [codegen id : 20] +Output [3]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78] +Input [5]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79, d_date_sk#80] + +(75) ReusedExchange [Reuses operator id: 63] +Output [1]: [wp_web_page_sk#81] + +(76) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [wr_web_page_sk#76] +Right keys [1]: [wp_web_page_sk#81] +Join type: Inner +Join condition: None + +(77) Project [codegen id : 20] +Output [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] + +(78) HashAggregate [codegen id : 20] +Input [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#77)), partial_sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum#82, sum#83] +Results [3]: [wp_web_page_sk#81, sum#84, sum#85] + +(79) Exchange +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Arguments: hashpartitioning(wp_web_page_sk#81, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(80) HashAggregate [codegen id : 21] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [sum(UnscaledValue(wr_return_amt#77)), sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#77))#86, sum(UnscaledValue(wr_net_loss#78))#87] +Results [3]: [wp_web_page_sk#81, MakeDecimal(sum(UnscaledValue(wr_return_amt#77))#86,17,2) AS returns#88, MakeDecimal(sum(UnscaledValue(wr_net_loss#78))#87,17,2) AS profit_loss#89] + +(81) BroadcastExchange +Input [3]: [wp_web_page_sk#81, returns#88, profit_loss#89] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(82) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [wp_web_page_sk#67] +Right keys [1]: [wp_web_page_sk#81] +Join type: LeftOuter +Join condition: None + +(83) Project [codegen id : 22] +Output [5]: [sales#74, coalesce(returns#88, 0.00) AS returns#90, (profit#75 - coalesce(profit_loss#89, 0.00)) AS profit#91, web channel AS channel#92, wp_web_page_sk#67 AS id#93] +Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89] + +(84) Union + +(85) Expand [codegen id : 23] +Input [5]: [sales#14, returns#30, profit#31, channel#32, id#33] +Arguments: [[sales#14, returns#30, profit#31, channel#32, id#33, 0], [sales#14, returns#30, profit#31, channel#32, null, 1], [sales#14, returns#30, profit#31, null, null, 3]], [sales#14, returns#30, profit#31, channel#94, id#95, spark_grouping_id#96] + +(86) HashAggregate [codegen id : 23] +Input [6]: [sales#14, returns#30, profit#31, channel#94, id#95, spark_grouping_id#96] +Keys [3]: [channel#94, id#95, spark_grouping_id#96] +Functions [3]: [partial_sum(sales#14), partial_sum(returns#30), partial_sum(profit#31)] +Aggregate Attributes [6]: [sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Results [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] + +(87) Exchange +Input [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Arguments: hashpartitioning(channel#94, id#95, spark_grouping_id#96, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(88) HashAggregate [codegen id : 24] +Input [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Keys [3]: [channel#94, id#95, spark_grouping_id#96] +Functions [3]: [sum(sales#14), sum(returns#30), sum(profit#31)] +Aggregate Attributes [3]: [sum(sales#14)#109, sum(returns#30)#110, sum(profit#31)#111] +Results [5]: [channel#94, id#95, sum(sales#14)#109 AS sales#112, sum(returns#30)#110 AS returns#113, sum(profit#31)#111 AS profit#114] + +(89) TakeOrderedAndProject +Input [5]: [channel#94, id#95, sales#112, returns#113, profit#114] +Arguments: 100, [channel#94 ASC NULLS FIRST, id#95 ASC NULLS FIRST], [channel#94, id#95, sales#112, returns#113, profit#114] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..b813daff14 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.native_iceberg_compat/simplified.txt @@ -0,0 +1,136 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (24) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (23) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (8) + Project [sales,returns,profit,profit_loss,s_store_sk] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [s_store_sk] #2 + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,s_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange [s_store_sk] #6 + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum] + Project [sr_return_amt,sr_net_loss,s_store_sk] + BroadcastHashJoin [sr_store_sk,s_store_sk] + Project [sr_store_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [s_store_sk] #4 + WholeStageCodegen (14) + Project [sales,returns,profit,profit_loss,cs_call_center_sk] + BroadcastNestedLoopJoin + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [cs_call_center_sk] #8 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange #9 + WholeStageCodegen (13) + HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] + Project [cr_return_amount,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (22) + Project [sales,returns,profit,profit_loss,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [wp_web_page_sk] #10 + WholeStageCodegen (17) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (16) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange [wp_web_page_sk] #13 + WholeStageCodegen (20) + HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum] + Project [wr_return_amt,wr_net_loss,wp_web_page_sk] + BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] + Project [wr_web_page_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [wp_web_page_sk] #11 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_datafusion/explain.txt new file mode 100644 index 0000000000..430c1c511e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_datafusion/explain.txt @@ -0,0 +1,417 @@ +== Physical Plan == +TakeOrderedAndProject (73) ++- * Project (72) + +- * SortMergeJoin Inner (71) + :- * Project (48) + : +- * SortMergeJoin Inner (47) + : :- * Sort (24) + : : +- * HashAggregate (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Project (14) + : : : +- * Filter (13) + : : : +- * SortMergeJoin LeftOuter (12) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.store_returns (6) + : : +- BroadcastExchange (18) + : : +- * Filter (17) + : : +- * ColumnarToRow (16) + : : +- Scan parquet spark_catalog.default.date_dim (15) + : +- * Sort (46) + : +- * Filter (45) + : +- * HashAggregate (44) + : +- Exchange (43) + : +- * HashAggregate (42) + : +- * Project (41) + : +- * BroadcastHashJoin Inner BuildRight (40) + : :- * Project (38) + : : +- * Filter (37) + : : +- * SortMergeJoin LeftOuter (36) + : : :- * Sort (29) + : : : +- Exchange (28) + : : : +- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet spark_catalog.default.web_sales (25) + : : +- * Sort (35) + : : +- Exchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet spark_catalog.default.web_returns (30) + : +- ReusedExchange (39) + +- * Sort (70) + +- * Filter (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * Filter (61) + : +- * SortMergeJoin LeftOuter (60) + : :- * Sort (53) + : : +- Exchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet spark_catalog.default.catalog_sales (49) + : +- * Sort (59) + : +- Exchange (58) + : +- * Project (57) + : +- * Filter (56) + : +- * ColumnarToRow (55) + : +- Scan parquet spark_catalog.default.catalog_returns (54) + +- ReusedExchange (63) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) + +(4) Exchange +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(8) Filter [codegen id : 3] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(9) Project [codegen id : 3] +Output [2]: [sr_item_sk#8, sr_ticket_number#9] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(10) Exchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 6] +Left keys [2]: [ss_ticket_number#3, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Join type: LeftOuter +Join condition: None + +(13) Filter [codegen id : 6] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(14) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] + +(15) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#11, d_year#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#11, d_year#12] + +(17) Filter [codegen id : 5] +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(18) BroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#11, d_year#12] + +(21) HashAggregate [codegen id : 6] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [3]: [sum#13, sum#14, sum#15] +Results [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] + +(22) Exchange +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] +Arguments: hashpartitioning(d_year#12, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(23) HashAggregate [codegen id : 7] +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [3]: [sum(ss_quantity#4)#19, sum(UnscaledValue(ss_wholesale_cost#5))#20, sum(UnscaledValue(ss_sales_price#6))#21] +Results [6]: [d_year#12 AS ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#19 AS ss_qty#23, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#20,17,2) AS ss_wc#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#21,17,2) AS ss_sp#25] + +(24) Sort [codegen id : 7] +Input [6]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25] +Arguments: [ss_sold_year#22 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(25) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#32)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 8] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] + +(27) Filter [codegen id : 8] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Condition : (isnotnull(ws_item_sk#26) AND isnotnull(ws_bill_customer_sk#27)) + +(28) Exchange +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Arguments: hashpartitioning(ws_order_number#28, ws_item_sk#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) Sort [codegen id : 9] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Arguments: [ws_order_number#28 ASC NULLS FIRST, ws_item_sk#26 ASC NULLS FIRST], false, 0 + +(30) Scan parquet spark_catalog.default.web_returns +Output [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 10] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] + +(32) Filter [codegen id : 10] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] +Condition : (isnotnull(wr_order_number#34) AND isnotnull(wr_item_sk#33)) + +(33) Project [codegen id : 10] +Output [2]: [wr_item_sk#33, wr_order_number#34] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] + +(34) Exchange +Input [2]: [wr_item_sk#33, wr_order_number#34] +Arguments: hashpartitioning(wr_order_number#34, wr_item_sk#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 11] +Input [2]: [wr_item_sk#33, wr_order_number#34] +Arguments: [wr_order_number#34 ASC NULLS FIRST, wr_item_sk#33 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin [codegen id : 13] +Left keys [2]: [ws_order_number#28, ws_item_sk#26] +Right keys [2]: [wr_order_number#34, wr_item_sk#33] +Join type: LeftOuter +Join condition: None + +(37) Filter [codegen id : 13] +Input [9]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, wr_item_sk#33, wr_order_number#34] +Condition : isnull(wr_order_number#34) + +(38) Project [codegen id : 13] +Output [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Input [9]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, wr_item_sk#33, wr_order_number#34] + +(39) ReusedExchange [Reuses operator id: 18] +Output [2]: [d_date_sk#36, d_year#37] + +(40) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#36] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 13] +Output [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, d_year#37] +Input [8]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, d_date_sk#36, d_year#37] + +(42) HashAggregate [codegen id : 13] +Input [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, d_year#37] +Keys [3]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27] +Functions [3]: [partial_sum(ws_quantity#29), partial_sum(UnscaledValue(ws_wholesale_cost#30)), partial_sum(UnscaledValue(ws_sales_price#31))] +Aggregate Attributes [3]: [sum#38, sum#39, sum#40] +Results [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] + +(43) Exchange +Input [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] +Arguments: hashpartitioning(d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(44) HashAggregate [codegen id : 14] +Input [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] +Keys [3]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27] +Functions [3]: [sum(ws_quantity#29), sum(UnscaledValue(ws_wholesale_cost#30)), sum(UnscaledValue(ws_sales_price#31))] +Aggregate Attributes [3]: [sum(ws_quantity#29)#44, sum(UnscaledValue(ws_wholesale_cost#30))#45, sum(UnscaledValue(ws_sales_price#31))#46] +Results [6]: [d_year#37 AS ws_sold_year#47, ws_item_sk#26, ws_bill_customer_sk#27 AS ws_customer_sk#48, sum(ws_quantity#29)#44 AS ws_qty#49, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#30))#45,17,2) AS ws_wc#50, MakeDecimal(sum(UnscaledValue(ws_sales_price#31))#46,17,2) AS ws_sp#51] + +(45) Filter [codegen id : 14] +Input [6]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] +Condition : (coalesce(ws_qty#49, 0) > 0) + +(46) Sort [codegen id : 14] +Input [6]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] +Arguments: [ws_sold_year#47 ASC NULLS FIRST, ws_item_sk#26 ASC NULLS FIRST, ws_customer_sk#48 ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 15] +Left keys [3]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 15] +Output [9]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, ws_wc#50, ws_sp#51] +Input [12]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] + +(49) Scan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#58)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 16] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] + +(51) Filter [codegen id : 16] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Condition : (isnotnull(cs_item_sk#53) AND isnotnull(cs_bill_customer_sk#52)) + +(52) Exchange +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Arguments: hashpartitioning(cs_order_number#54, cs_item_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(53) Sort [codegen id : 17] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Arguments: [cs_order_number#54 ASC NULLS FIRST, cs_item_sk#53 ASC NULLS FIRST], false, 0 + +(54) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 18] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] + +(56) Filter [codegen id : 18] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] +Condition : (isnotnull(cr_order_number#60) AND isnotnull(cr_item_sk#59)) + +(57) Project [codegen id : 18] +Output [2]: [cr_item_sk#59, cr_order_number#60] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] + +(58) Exchange +Input [2]: [cr_item_sk#59, cr_order_number#60] +Arguments: hashpartitioning(cr_order_number#60, cr_item_sk#59, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(59) Sort [codegen id : 19] +Input [2]: [cr_item_sk#59, cr_order_number#60] +Arguments: [cr_order_number#60 ASC NULLS FIRST, cr_item_sk#59 ASC NULLS FIRST], false, 0 + +(60) SortMergeJoin [codegen id : 21] +Left keys [2]: [cs_order_number#54, cs_item_sk#53] +Right keys [2]: [cr_order_number#60, cr_item_sk#59] +Join type: LeftOuter +Join condition: None + +(61) Filter [codegen id : 21] +Input [9]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, cr_item_sk#59, cr_order_number#60] +Condition : isnull(cr_order_number#60) + +(62) Project [codegen id : 21] +Output [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Input [9]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, cr_item_sk#59, cr_order_number#60] + +(63) ReusedExchange [Reuses operator id: 18] +Output [2]: [d_date_sk#62, d_year#63] + +(64) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [cs_sold_date_sk#58] +Right keys [1]: [d_date_sk#62] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 21] +Output [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, d_year#63] +Input [8]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, d_date_sk#62, d_year#63] + +(66) HashAggregate [codegen id : 21] +Input [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, d_year#63] +Keys [3]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52] +Functions [3]: [partial_sum(cs_quantity#55), partial_sum(UnscaledValue(cs_wholesale_cost#56)), partial_sum(UnscaledValue(cs_sales_price#57))] +Aggregate Attributes [3]: [sum#64, sum#65, sum#66] +Results [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] + +(67) Exchange +Input [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] +Arguments: hashpartitioning(d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(68) HashAggregate [codegen id : 22] +Input [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] +Keys [3]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52] +Functions [3]: [sum(cs_quantity#55), sum(UnscaledValue(cs_wholesale_cost#56)), sum(UnscaledValue(cs_sales_price#57))] +Aggregate Attributes [3]: [sum(cs_quantity#55)#70, sum(UnscaledValue(cs_wholesale_cost#56))#71, sum(UnscaledValue(cs_sales_price#57))#72] +Results [6]: [d_year#63 AS cs_sold_year#73, cs_item_sk#53, cs_bill_customer_sk#52 AS cs_customer_sk#74, sum(cs_quantity#55)#70 AS cs_qty#75, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#56))#71,17,2) AS cs_wc#76, MakeDecimal(sum(UnscaledValue(cs_sales_price#57))#72,17,2) AS cs_sp#77] + +(69) Filter [codegen id : 22] +Input [6]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] +Condition : (coalesce(cs_qty#75, 0) > 0) + +(70) Sort [codegen id : 22] +Input [6]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] +Arguments: [cs_sold_year#73 ASC NULLS FIRST, cs_item_sk#53 ASC NULLS FIRST, cs_customer_sk#74 ASC NULLS FIRST], false, 0 + +(71) SortMergeJoin [codegen id : 23] +Left keys [3]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74] +Join type: Inner +Join condition: None + +(72) Project [codegen id : 23] +Output [12]: [round((cast(ss_qty#23 as double) / cast(coalesce((ws_qty#49 + cs_qty#75), 1) as double)), 2) AS ratio#78, ss_qty#23 AS store_qty#79, ss_wc#24 AS store_wholesale_cost#80, ss_sp#25 AS store_sales_price#81, (coalesce(ws_qty#49, 0) + coalesce(cs_qty#75, 0)) AS other_chan_qty#82, (coalesce(ws_wc#50, 0.00) + coalesce(cs_wc#76, 0.00)) AS other_chan_wholesale_cost#83, (coalesce(ws_sp#51, 0.00) + coalesce(cs_sp#77, 0.00)) AS other_chan_sales_price#84, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, cs_qty#75] +Input [15]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, ws_wc#50, ws_sp#51, cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] + +(73) TakeOrderedAndProject +Input [12]: [ratio#78, store_qty#79, store_wholesale_cost#80, store_sales_price#81, other_chan_qty#82, other_chan_wholesale_cost#83, other_chan_sales_price#84, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, cs_qty#75] +Arguments: 100, [ratio#78 ASC NULLS FIRST, ss_qty#23 DESC NULLS LAST, ss_wc#24 DESC NULLS LAST, ss_sp#25 DESC NULLS LAST, other_chan_qty#82 ASC NULLS FIRST, other_chan_wholesale_cost#83 ASC NULLS FIRST, other_chan_sales_price#84 ASC NULLS FIRST, round((cast(ss_qty#23 as double) / cast(coalesce((ws_qty#49 + cs_qty#75), 1) as double)), 2) ASC NULLS FIRST], [ratio#78, store_qty#79, store_wholesale_cost#80, store_sales_price#81, other_chan_qty#82, other_chan_wholesale_cost#83, other_chan_sales_price#84] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_datafusion/simplified.txt new file mode 100644 index 0000000000..839bcca08a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_datafusion/simplified.txt @@ -0,0 +1,123 @@ +TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ws_qty,cs_qty,store_qty,store_wholesale_cost,store_sales_price] + WholeStageCodegen (23) + Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp] + SortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,cs_sold_year,cs_item_sk,cs_customer_sk] + InputAdapter + WholeStageCodegen (15) + Project [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] + SortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk] + InputAdapter + WholeStageCodegen (7) + Sort [ss_sold_year,ss_item_sk,ss_customer_sk] + HashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum] [sum(ss_quantity),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price)),ss_sold_year,ss_qty,ss_wc,ss_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,ss_item_sk,ss_customer_sk] #1 + WholeStageCodegen (6) + HashAggregate [d_year,ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price] [sum,sum,sum,sum,sum,sum] + Project [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + Filter [sr_ticket_number] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #3 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (14) + Sort [ws_sold_year,ws_item_sk,ws_customer_sk] + Filter [ws_qty] + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum] [sum(ws_quantity),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price)),ws_sold_year,ws_customer_sk,ws_qty,ws_wc,ws_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 + WholeStageCodegen (13) + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price] [sum,sum,sum,sum,sum,sum] + Project [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + Filter [wr_order_number] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (9) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #6 + WholeStageCodegen (8) + Filter [ws_item_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (11) + Sort [wr_order_number,wr_item_sk] + InputAdapter + Exchange [wr_order_number,wr_item_sk] #7 + WholeStageCodegen (10) + Project [wr_item_sk,wr_order_number] + Filter [wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [cs_sold_year,cs_item_sk,cs_customer_sk] + Filter [cs_qty] + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum] [sum(cs_quantity),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price)),cs_sold_year,cs_customer_sk,cs_qty,cs_wc,cs_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 + WholeStageCodegen (21) + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,cs_quantity,cs_wholesale_cost,cs_sales_price] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + Filter [cr_order_number] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (17) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #9 + WholeStageCodegen (16) + Filter [cs_item_sk,cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (19) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #10 + WholeStageCodegen (18) + Project [cr_item_sk,cr_order_number] + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..430c1c511e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_iceberg_compat/explain.txt @@ -0,0 +1,417 @@ +== Physical Plan == +TakeOrderedAndProject (73) ++- * Project (72) + +- * SortMergeJoin Inner (71) + :- * Project (48) + : +- * SortMergeJoin Inner (47) + : :- * Sort (24) + : : +- * HashAggregate (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Project (14) + : : : +- * Filter (13) + : : : +- * SortMergeJoin LeftOuter (12) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.store_returns (6) + : : +- BroadcastExchange (18) + : : +- * Filter (17) + : : +- * ColumnarToRow (16) + : : +- Scan parquet spark_catalog.default.date_dim (15) + : +- * Sort (46) + : +- * Filter (45) + : +- * HashAggregate (44) + : +- Exchange (43) + : +- * HashAggregate (42) + : +- * Project (41) + : +- * BroadcastHashJoin Inner BuildRight (40) + : :- * Project (38) + : : +- * Filter (37) + : : +- * SortMergeJoin LeftOuter (36) + : : :- * Sort (29) + : : : +- Exchange (28) + : : : +- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet spark_catalog.default.web_sales (25) + : : +- * Sort (35) + : : +- Exchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet spark_catalog.default.web_returns (30) + : +- ReusedExchange (39) + +- * Sort (70) + +- * Filter (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * Filter (61) + : +- * SortMergeJoin LeftOuter (60) + : :- * Sort (53) + : : +- Exchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet spark_catalog.default.catalog_sales (49) + : +- * Sort (59) + : +- Exchange (58) + : +- * Project (57) + : +- * Filter (56) + : +- * ColumnarToRow (55) + : +- Scan parquet spark_catalog.default.catalog_returns (54) + +- ReusedExchange (63) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) + +(4) Exchange +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(8) Filter [codegen id : 3] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(9) Project [codegen id : 3] +Output [2]: [sr_item_sk#8, sr_ticket_number#9] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(10) Exchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 6] +Left keys [2]: [ss_ticket_number#3, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Join type: LeftOuter +Join condition: None + +(13) Filter [codegen id : 6] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(14) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] + +(15) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#11, d_year#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#11, d_year#12] + +(17) Filter [codegen id : 5] +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(18) BroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#11, d_year#12] + +(21) HashAggregate [codegen id : 6] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [3]: [sum#13, sum#14, sum#15] +Results [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] + +(22) Exchange +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] +Arguments: hashpartitioning(d_year#12, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(23) HashAggregate [codegen id : 7] +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [3]: [sum(ss_quantity#4)#19, sum(UnscaledValue(ss_wholesale_cost#5))#20, sum(UnscaledValue(ss_sales_price#6))#21] +Results [6]: [d_year#12 AS ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#19 AS ss_qty#23, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#20,17,2) AS ss_wc#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#21,17,2) AS ss_sp#25] + +(24) Sort [codegen id : 7] +Input [6]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25] +Arguments: [ss_sold_year#22 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(25) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#32)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 8] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] + +(27) Filter [codegen id : 8] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Condition : (isnotnull(ws_item_sk#26) AND isnotnull(ws_bill_customer_sk#27)) + +(28) Exchange +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Arguments: hashpartitioning(ws_order_number#28, ws_item_sk#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) Sort [codegen id : 9] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Arguments: [ws_order_number#28 ASC NULLS FIRST, ws_item_sk#26 ASC NULLS FIRST], false, 0 + +(30) Scan parquet spark_catalog.default.web_returns +Output [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 10] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] + +(32) Filter [codegen id : 10] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] +Condition : (isnotnull(wr_order_number#34) AND isnotnull(wr_item_sk#33)) + +(33) Project [codegen id : 10] +Output [2]: [wr_item_sk#33, wr_order_number#34] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] + +(34) Exchange +Input [2]: [wr_item_sk#33, wr_order_number#34] +Arguments: hashpartitioning(wr_order_number#34, wr_item_sk#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 11] +Input [2]: [wr_item_sk#33, wr_order_number#34] +Arguments: [wr_order_number#34 ASC NULLS FIRST, wr_item_sk#33 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin [codegen id : 13] +Left keys [2]: [ws_order_number#28, ws_item_sk#26] +Right keys [2]: [wr_order_number#34, wr_item_sk#33] +Join type: LeftOuter +Join condition: None + +(37) Filter [codegen id : 13] +Input [9]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, wr_item_sk#33, wr_order_number#34] +Condition : isnull(wr_order_number#34) + +(38) Project [codegen id : 13] +Output [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Input [9]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, wr_item_sk#33, wr_order_number#34] + +(39) ReusedExchange [Reuses operator id: 18] +Output [2]: [d_date_sk#36, d_year#37] + +(40) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#36] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 13] +Output [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, d_year#37] +Input [8]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, d_date_sk#36, d_year#37] + +(42) HashAggregate [codegen id : 13] +Input [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, d_year#37] +Keys [3]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27] +Functions [3]: [partial_sum(ws_quantity#29), partial_sum(UnscaledValue(ws_wholesale_cost#30)), partial_sum(UnscaledValue(ws_sales_price#31))] +Aggregate Attributes [3]: [sum#38, sum#39, sum#40] +Results [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] + +(43) Exchange +Input [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] +Arguments: hashpartitioning(d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(44) HashAggregate [codegen id : 14] +Input [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] +Keys [3]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27] +Functions [3]: [sum(ws_quantity#29), sum(UnscaledValue(ws_wholesale_cost#30)), sum(UnscaledValue(ws_sales_price#31))] +Aggregate Attributes [3]: [sum(ws_quantity#29)#44, sum(UnscaledValue(ws_wholesale_cost#30))#45, sum(UnscaledValue(ws_sales_price#31))#46] +Results [6]: [d_year#37 AS ws_sold_year#47, ws_item_sk#26, ws_bill_customer_sk#27 AS ws_customer_sk#48, sum(ws_quantity#29)#44 AS ws_qty#49, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#30))#45,17,2) AS ws_wc#50, MakeDecimal(sum(UnscaledValue(ws_sales_price#31))#46,17,2) AS ws_sp#51] + +(45) Filter [codegen id : 14] +Input [6]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] +Condition : (coalesce(ws_qty#49, 0) > 0) + +(46) Sort [codegen id : 14] +Input [6]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] +Arguments: [ws_sold_year#47 ASC NULLS FIRST, ws_item_sk#26 ASC NULLS FIRST, ws_customer_sk#48 ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 15] +Left keys [3]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 15] +Output [9]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, ws_wc#50, ws_sp#51] +Input [12]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] + +(49) Scan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#58)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 16] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] + +(51) Filter [codegen id : 16] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Condition : (isnotnull(cs_item_sk#53) AND isnotnull(cs_bill_customer_sk#52)) + +(52) Exchange +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Arguments: hashpartitioning(cs_order_number#54, cs_item_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(53) Sort [codegen id : 17] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Arguments: [cs_order_number#54 ASC NULLS FIRST, cs_item_sk#53 ASC NULLS FIRST], false, 0 + +(54) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 18] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] + +(56) Filter [codegen id : 18] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] +Condition : (isnotnull(cr_order_number#60) AND isnotnull(cr_item_sk#59)) + +(57) Project [codegen id : 18] +Output [2]: [cr_item_sk#59, cr_order_number#60] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] + +(58) Exchange +Input [2]: [cr_item_sk#59, cr_order_number#60] +Arguments: hashpartitioning(cr_order_number#60, cr_item_sk#59, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(59) Sort [codegen id : 19] +Input [2]: [cr_item_sk#59, cr_order_number#60] +Arguments: [cr_order_number#60 ASC NULLS FIRST, cr_item_sk#59 ASC NULLS FIRST], false, 0 + +(60) SortMergeJoin [codegen id : 21] +Left keys [2]: [cs_order_number#54, cs_item_sk#53] +Right keys [2]: [cr_order_number#60, cr_item_sk#59] +Join type: LeftOuter +Join condition: None + +(61) Filter [codegen id : 21] +Input [9]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, cr_item_sk#59, cr_order_number#60] +Condition : isnull(cr_order_number#60) + +(62) Project [codegen id : 21] +Output [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Input [9]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, cr_item_sk#59, cr_order_number#60] + +(63) ReusedExchange [Reuses operator id: 18] +Output [2]: [d_date_sk#62, d_year#63] + +(64) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [cs_sold_date_sk#58] +Right keys [1]: [d_date_sk#62] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 21] +Output [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, d_year#63] +Input [8]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, d_date_sk#62, d_year#63] + +(66) HashAggregate [codegen id : 21] +Input [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, d_year#63] +Keys [3]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52] +Functions [3]: [partial_sum(cs_quantity#55), partial_sum(UnscaledValue(cs_wholesale_cost#56)), partial_sum(UnscaledValue(cs_sales_price#57))] +Aggregate Attributes [3]: [sum#64, sum#65, sum#66] +Results [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] + +(67) Exchange +Input [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] +Arguments: hashpartitioning(d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(68) HashAggregate [codegen id : 22] +Input [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] +Keys [3]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52] +Functions [3]: [sum(cs_quantity#55), sum(UnscaledValue(cs_wholesale_cost#56)), sum(UnscaledValue(cs_sales_price#57))] +Aggregate Attributes [3]: [sum(cs_quantity#55)#70, sum(UnscaledValue(cs_wholesale_cost#56))#71, sum(UnscaledValue(cs_sales_price#57))#72] +Results [6]: [d_year#63 AS cs_sold_year#73, cs_item_sk#53, cs_bill_customer_sk#52 AS cs_customer_sk#74, sum(cs_quantity#55)#70 AS cs_qty#75, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#56))#71,17,2) AS cs_wc#76, MakeDecimal(sum(UnscaledValue(cs_sales_price#57))#72,17,2) AS cs_sp#77] + +(69) Filter [codegen id : 22] +Input [6]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] +Condition : (coalesce(cs_qty#75, 0) > 0) + +(70) Sort [codegen id : 22] +Input [6]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] +Arguments: [cs_sold_year#73 ASC NULLS FIRST, cs_item_sk#53 ASC NULLS FIRST, cs_customer_sk#74 ASC NULLS FIRST], false, 0 + +(71) SortMergeJoin [codegen id : 23] +Left keys [3]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74] +Join type: Inner +Join condition: None + +(72) Project [codegen id : 23] +Output [12]: [round((cast(ss_qty#23 as double) / cast(coalesce((ws_qty#49 + cs_qty#75), 1) as double)), 2) AS ratio#78, ss_qty#23 AS store_qty#79, ss_wc#24 AS store_wholesale_cost#80, ss_sp#25 AS store_sales_price#81, (coalesce(ws_qty#49, 0) + coalesce(cs_qty#75, 0)) AS other_chan_qty#82, (coalesce(ws_wc#50, 0.00) + coalesce(cs_wc#76, 0.00)) AS other_chan_wholesale_cost#83, (coalesce(ws_sp#51, 0.00) + coalesce(cs_sp#77, 0.00)) AS other_chan_sales_price#84, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, cs_qty#75] +Input [15]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, ws_wc#50, ws_sp#51, cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] + +(73) TakeOrderedAndProject +Input [12]: [ratio#78, store_qty#79, store_wholesale_cost#80, store_sales_price#81, other_chan_qty#82, other_chan_wholesale_cost#83, other_chan_sales_price#84, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, cs_qty#75] +Arguments: 100, [ratio#78 ASC NULLS FIRST, ss_qty#23 DESC NULLS LAST, ss_wc#24 DESC NULLS LAST, ss_sp#25 DESC NULLS LAST, other_chan_qty#82 ASC NULLS FIRST, other_chan_wholesale_cost#83 ASC NULLS FIRST, other_chan_sales_price#84 ASC NULLS FIRST, round((cast(ss_qty#23 as double) / cast(coalesce((ws_qty#49 + cs_qty#75), 1) as double)), 2) ASC NULLS FIRST], [ratio#78, store_qty#79, store_wholesale_cost#80, store_sales_price#81, other_chan_qty#82, other_chan_wholesale_cost#83, other_chan_sales_price#84] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..839bcca08a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.native_iceberg_compat/simplified.txt @@ -0,0 +1,123 @@ +TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ws_qty,cs_qty,store_qty,store_wholesale_cost,store_sales_price] + WholeStageCodegen (23) + Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp] + SortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,cs_sold_year,cs_item_sk,cs_customer_sk] + InputAdapter + WholeStageCodegen (15) + Project [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] + SortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk] + InputAdapter + WholeStageCodegen (7) + Sort [ss_sold_year,ss_item_sk,ss_customer_sk] + HashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum] [sum(ss_quantity),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price)),ss_sold_year,ss_qty,ss_wc,ss_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,ss_item_sk,ss_customer_sk] #1 + WholeStageCodegen (6) + HashAggregate [d_year,ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price] [sum,sum,sum,sum,sum,sum] + Project [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + Filter [sr_ticket_number] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #3 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (14) + Sort [ws_sold_year,ws_item_sk,ws_customer_sk] + Filter [ws_qty] + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum] [sum(ws_quantity),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price)),ws_sold_year,ws_customer_sk,ws_qty,ws_wc,ws_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 + WholeStageCodegen (13) + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price] [sum,sum,sum,sum,sum,sum] + Project [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + Filter [wr_order_number] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (9) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #6 + WholeStageCodegen (8) + Filter [ws_item_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (11) + Sort [wr_order_number,wr_item_sk] + InputAdapter + Exchange [wr_order_number,wr_item_sk] #7 + WholeStageCodegen (10) + Project [wr_item_sk,wr_order_number] + Filter [wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [cs_sold_year,cs_item_sk,cs_customer_sk] + Filter [cs_qty] + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum] [sum(cs_quantity),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price)),cs_sold_year,cs_customer_sk,cs_qty,cs_wc,cs_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 + WholeStageCodegen (21) + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,cs_quantity,cs_wholesale_cost,cs_sales_price] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + Filter [cr_order_number] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (17) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #9 + WholeStageCodegen (16) + Filter [cs_item_sk,cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (19) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #10 + WholeStageCodegen (18) + Project [cr_item_sk,cr_order_number] + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_datafusion/explain.txt new file mode 100644 index 0000000000..dd7510c121 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_datafusion/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * Project (33) + +- * BroadcastHashJoin Inner BuildRight (32) + :- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.household_demographics (18) + +- BroadcastExchange (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet spark_catalog.default.customer (28) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] + +(3) Filter [codegen id : 4] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#9] + +(11) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Condition : (((isnotnull(s_number_employees#13) AND (s_number_employees#13 >= 200)) AND (s_number_employees#13 <= 295)) AND isnotnull(s_store_sk#12)) + +(14) Project [codegen id : 2] +Output [2]: [s_store_sk#12, s_city#14] +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] + +(15) BroadcastExchange +Input [2]: [s_store_sk#12, s_city#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#12] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#12, s_city#14] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Condition : (((hd_dep_count#16 = 6) OR (hd_vehicle_count#17 > 2)) AND isnotnull(hd_demo_sk#15)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#15] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#15] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 4] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14, hd_demo_sk#15] + +(25) HashAggregate [codegen id : 4] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum#18, sum#19] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#20, sum#21] + +(26) Exchange +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#20, sum#21] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 6] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#20, sum#21] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#22, sum(UnscaledValue(ss_net_profit#7))#23] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#14, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#22,17,2) AS amt#24, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#23,17,2) AS profit#25] + +(28) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] + +(30) Filter [codegen id : 5] +Input [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] +Condition : isnotnull(c_customer_sk#26) + +(31) BroadcastExchange +Input [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#26] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [7]: [c_last_name#28, c_first_name#27, substr(s_city#14, 1, 30) AS substr(s_city, 1, 30)#29, ss_ticket_number#5, amt#24, profit#25, s_city#14] +Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#14, amt#24, profit#25, c_customer_sk#26, c_first_name#27, c_last_name#28] + +(34) TakeOrderedAndProject +Input [7]: [c_last_name#28, c_first_name#27, substr(s_city, 1, 30)#29, ss_ticket_number#5, amt#24, profit#25, s_city#14] +Arguments: 100, [c_last_name#28 ASC NULLS FIRST, c_first_name#27 ASC NULLS FIRST, substr(s_city#14, 1, 30) ASC NULLS FIRST, profit#25 ASC NULLS FIRST], [c_last_name#28, c_first_name#27, substr(s_city, 1, 30)#29, ss_ticket_number#5, amt#24, profit#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_datafusion/simplified.txt new file mode 100644 index 0000000000..138ec9eb1e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_datafusion/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1, 30),ss_ticket_number,amt] + WholeStageCodegen (6) + Project [c_last_name,c_first_name,s_city,ss_ticket_number,amt,profit] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,sum,sum] [sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),amt,profit,sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,ss_coupon_amt,ss_net_profit] [sum,sum,sum,sum] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dow,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk,s_city] + Filter [s_number_employees,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_number_employees,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..dd7510c121 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_iceberg_compat/explain.txt @@ -0,0 +1,198 @@ +== Physical Plan == +TakeOrderedAndProject (34) ++- * Project (33) + +- * BroadcastHashJoin Inner BuildRight (32) + :- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.household_demographics (18) + +- BroadcastExchange (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet spark_catalog.default.customer (28) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] + +(3) Filter [codegen id : 4] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_store_sk#4) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#9] + +(11) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] +Condition : (((isnotnull(s_number_employees#13) AND (s_number_employees#13 >= 200)) AND (s_number_employees#13 <= 295)) AND isnotnull(s_store_sk#12)) + +(14) Project [codegen id : 2] +Output [2]: [s_store_sk#12, s_city#14] +Input [3]: [s_store_sk#12, s_number_employees#13, s_city#14] + +(15) BroadcastExchange +Input [2]: [s_store_sk#12, s_city#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#12] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#12, s_city#14] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Condition : (((hd_dep_count#16 = 6) OR (hd_vehicle_count#17 > 2)) AND isnotnull(hd_demo_sk#15)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#15] +Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#15] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 4] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14, hd_demo_sk#15] + +(25) HashAggregate [codegen id : 4] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#14] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum#18, sum#19] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#20, sum#21] + +(26) Exchange +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#20, sum#21] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 6] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14, sum#20, sum#21] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#14] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#22, sum(UnscaledValue(ss_net_profit#7))#23] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#14, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#22,17,2) AS amt#24, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#23,17,2) AS profit#25] + +(28) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] + +(30) Filter [codegen id : 5] +Input [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] +Condition : isnotnull(c_customer_sk#26) + +(31) BroadcastExchange +Input [3]: [c_customer_sk#26, c_first_name#27, c_last_name#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#26] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [7]: [c_last_name#28, c_first_name#27, substr(s_city#14, 1, 30) AS substr(s_city, 1, 30)#29, ss_ticket_number#5, amt#24, profit#25, s_city#14] +Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#14, amt#24, profit#25, c_customer_sk#26, c_first_name#27, c_last_name#28] + +(34) TakeOrderedAndProject +Input [7]: [c_last_name#28, c_first_name#27, substr(s_city, 1, 30)#29, ss_ticket_number#5, amt#24, profit#25, s_city#14] +Arguments: 100, [c_last_name#28 ASC NULLS FIRST, c_first_name#27 ASC NULLS FIRST, substr(s_city#14, 1, 30) ASC NULLS FIRST, profit#25 ASC NULLS FIRST], [c_last_name#28, c_first_name#27, substr(s_city, 1, 30)#29, ss_ticket_number#5, amt#24, profit#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..138ec9eb1e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.native_iceberg_compat/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1, 30),ss_ticket_number,amt] + WholeStageCodegen (6) + Project [c_last_name,c_first_name,s_city,ss_ticket_number,amt,profit] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,sum,sum] [sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),amt,profit,sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,ss_coupon_amt,ss_net_profit] [sum,sum,sum,sum] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dow,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [s_store_sk,s_city] + Filter [s_number_employees,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_number_employees,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_datafusion/explain.txt new file mode 100644 index 0000000000..1abf0a4f8e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_datafusion/explain.txt @@ -0,0 +1,278 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.store (11) + +- BroadcastExchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * BroadcastHashJoin LeftSemi BuildRight (37) + :- * Project (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.customer_address (17) + +- BroadcastExchange (36) + +- * Project (35) + +- * Filter (34) + +- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet spark_catalog.default.customer_address (21) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet spark_catalog.default.customer (24) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 1998)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 8] +Output [2]: [ss_store_sk#1, ss_net_profit#2] +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Condition : (isnotnull(s_store_sk#7) AND isnotnull(s_zip#9)) + +(14) BroadcastExchange +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 8] +Output [3]: [ss_net_profit#2, s_store_name#8, s_zip#9] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#7, s_store_name#8, s_zip#9] + +(17) Scan parquet spark_catalog.default.customer_address +Output [1]: [ca_zip#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 6] +Input [1]: [ca_zip#10] + +(19) Filter [codegen id : 6] +Input [1]: [ca_zip#10] +Condition : (substr(ca_zip#10, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#10, 1, 5))) + +(20) Project [codegen id : 6] +Output [1]: [substr(ca_zip#10, 1, 5) AS ca_zip#11] +Input [1]: [ca_zip#10] + +(21) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#12, ca_zip#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#12, ca_zip#13] + +(23) Filter [codegen id : 4] +Input [2]: [ca_address_sk#12, ca_zip#13] +Condition : isnotnull(ca_address_sk#12) + +(24) Scan parquet spark_catalog.default.customer +Output [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 3] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] + +(26) Filter [codegen id : 3] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Condition : ((isnotnull(c_preferred_cust_flag#15) AND (c_preferred_cust_flag#15 = Y)) AND isnotnull(c_current_addr_sk#14)) + +(27) Project [codegen id : 3] +Output [1]: [c_current_addr_sk#14] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] + +(28) BroadcastExchange +Input [1]: [c_current_addr_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(29) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ca_address_sk#12] +Right keys [1]: [c_current_addr_sk#14] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 4] +Output [1]: [ca_zip#13] +Input [3]: [ca_address_sk#12, ca_zip#13, c_current_addr_sk#14] + +(31) HashAggregate [codegen id : 4] +Input [1]: [ca_zip#13] +Keys [1]: [ca_zip#13] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#16] +Results [2]: [ca_zip#13, count#17] + +(32) Exchange +Input [2]: [ca_zip#13, count#17] +Arguments: hashpartitioning(ca_zip#13, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(33) HashAggregate [codegen id : 5] +Input [2]: [ca_zip#13, count#17] +Keys [1]: [ca_zip#13] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#18] +Results [2]: [substr(ca_zip#13, 1, 5) AS ca_zip#19, count(1)#18 AS cnt#20] + +(34) Filter [codegen id : 5] +Input [2]: [ca_zip#19, cnt#20] +Condition : (cnt#20 > 10) + +(35) Project [codegen id : 5] +Output [1]: [ca_zip#19] +Input [2]: [ca_zip#19, cnt#20] + +(36) BroadcastExchange +Input [1]: [ca_zip#19] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [plan_id=5] + +(37) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [coalesce(ca_zip#11, ), isnull(ca_zip#11)] +Right keys [2]: [coalesce(ca_zip#19, ), isnull(ca_zip#19)] +Join type: LeftSemi +Join condition: None + +(38) HashAggregate [codegen id : 6] +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] +Functions: [] +Aggregate Attributes: [] +Results [1]: [ca_zip#11] + +(39) Exchange +Input [1]: [ca_zip#11] +Arguments: hashpartitioning(ca_zip#11, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(40) HashAggregate [codegen id : 7] +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] +Functions: [] +Aggregate Attributes: [] +Results [1]: [ca_zip#11] + +(41) BroadcastExchange +Input [1]: [ca_zip#11] +Arguments: HashedRelationBroadcastMode(List(substr(input[0, string, true], 1, 2)),false), [plan_id=7] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [substr(s_zip#9, 1, 2)] +Right keys [1]: [substr(ca_zip#11, 1, 2)] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 8] +Output [2]: [ss_net_profit#2, s_store_name#8] +Input [4]: [ss_net_profit#2, s_store_name#8, s_zip#9, ca_zip#11] + +(44) HashAggregate [codegen id : 8] +Input [2]: [ss_net_profit#2, s_store_name#8] +Keys [1]: [s_store_name#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#21] +Results [2]: [s_store_name#8, sum#22] + +(45) Exchange +Input [2]: [s_store_name#8, sum#22] +Arguments: hashpartitioning(s_store_name#8, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(46) HashAggregate [codegen id : 9] +Input [2]: [s_store_name#8, sum#22] +Keys [1]: [s_store_name#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#23] +Results [2]: [s_store_name#8, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#23,17,2) AS sum(ss_net_profit)#24] + +(47) TakeOrderedAndProject +Input [2]: [s_store_name#8, sum(ss_net_profit)#24] +Arguments: 100, [s_store_name#8 ASC NULLS FIRST], [s_store_name#8, sum(ss_net_profit)#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_datafusion/simplified.txt new file mode 100644 index 0000000000..30895d0bef --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_datafusion/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] + WholeStageCodegen (9) + HashAggregate [s_store_name,sum] [sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit),sum] + InputAdapter + Exchange [s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [s_store_name,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_store_name] + BroadcastHashJoin [s_zip,ca_zip] + Project [ss_net_profit,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [ca_zip] + InputAdapter + Exchange [ca_zip] #5 + WholeStageCodegen (6) + HashAggregate [ca_zip] + BroadcastHashJoin [ca_zip,ca_zip] + Project [ca_zip] + Filter [ca_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_zip] + Filter [cnt] + HashAggregate [ca_zip,count] [count(1),ca_zip,cnt,count] + InputAdapter + Exchange [ca_zip] #7 + WholeStageCodegen (4) + HashAggregate [ca_zip] [count,count] + Project [ca_zip] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (3) + Project [c_current_addr_sk] + Filter [c_preferred_cust_flag,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_current_addr_sk,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..1abf0a4f8e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_iceberg_compat/explain.txt @@ -0,0 +1,278 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.store (11) + +- BroadcastExchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * BroadcastHashJoin LeftSemi BuildRight (37) + :- * Project (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.customer_address (17) + +- BroadcastExchange (36) + +- * Project (35) + +- * Filter (34) + +- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet spark_catalog.default.customer_address (21) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet spark_catalog.default.customer (24) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 1998)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 8] +Output [2]: [ss_store_sk#1, ss_net_profit#2] +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Condition : (isnotnull(s_store_sk#7) AND isnotnull(s_zip#9)) + +(14) BroadcastExchange +Input [3]: [s_store_sk#7, s_store_name#8, s_zip#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 8] +Output [3]: [ss_net_profit#2, s_store_name#8, s_zip#9] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#7, s_store_name#8, s_zip#9] + +(17) Scan parquet spark_catalog.default.customer_address +Output [1]: [ca_zip#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 6] +Input [1]: [ca_zip#10] + +(19) Filter [codegen id : 6] +Input [1]: [ca_zip#10] +Condition : (substr(ca_zip#10, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#10, 1, 5))) + +(20) Project [codegen id : 6] +Output [1]: [substr(ca_zip#10, 1, 5) AS ca_zip#11] +Input [1]: [ca_zip#10] + +(21) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#12, ca_zip#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#12, ca_zip#13] + +(23) Filter [codegen id : 4] +Input [2]: [ca_address_sk#12, ca_zip#13] +Condition : isnotnull(ca_address_sk#12) + +(24) Scan parquet spark_catalog.default.customer +Output [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 3] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] + +(26) Filter [codegen id : 3] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Condition : ((isnotnull(c_preferred_cust_flag#15) AND (c_preferred_cust_flag#15 = Y)) AND isnotnull(c_current_addr_sk#14)) + +(27) Project [codegen id : 3] +Output [1]: [c_current_addr_sk#14] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] + +(28) BroadcastExchange +Input [1]: [c_current_addr_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(29) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ca_address_sk#12] +Right keys [1]: [c_current_addr_sk#14] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 4] +Output [1]: [ca_zip#13] +Input [3]: [ca_address_sk#12, ca_zip#13, c_current_addr_sk#14] + +(31) HashAggregate [codegen id : 4] +Input [1]: [ca_zip#13] +Keys [1]: [ca_zip#13] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#16] +Results [2]: [ca_zip#13, count#17] + +(32) Exchange +Input [2]: [ca_zip#13, count#17] +Arguments: hashpartitioning(ca_zip#13, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(33) HashAggregate [codegen id : 5] +Input [2]: [ca_zip#13, count#17] +Keys [1]: [ca_zip#13] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#18] +Results [2]: [substr(ca_zip#13, 1, 5) AS ca_zip#19, count(1)#18 AS cnt#20] + +(34) Filter [codegen id : 5] +Input [2]: [ca_zip#19, cnt#20] +Condition : (cnt#20 > 10) + +(35) Project [codegen id : 5] +Output [1]: [ca_zip#19] +Input [2]: [ca_zip#19, cnt#20] + +(36) BroadcastExchange +Input [1]: [ca_zip#19] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [plan_id=5] + +(37) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [coalesce(ca_zip#11, ), isnull(ca_zip#11)] +Right keys [2]: [coalesce(ca_zip#19, ), isnull(ca_zip#19)] +Join type: LeftSemi +Join condition: None + +(38) HashAggregate [codegen id : 6] +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] +Functions: [] +Aggregate Attributes: [] +Results [1]: [ca_zip#11] + +(39) Exchange +Input [1]: [ca_zip#11] +Arguments: hashpartitioning(ca_zip#11, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(40) HashAggregate [codegen id : 7] +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] +Functions: [] +Aggregate Attributes: [] +Results [1]: [ca_zip#11] + +(41) BroadcastExchange +Input [1]: [ca_zip#11] +Arguments: HashedRelationBroadcastMode(List(substr(input[0, string, true], 1, 2)),false), [plan_id=7] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [substr(s_zip#9, 1, 2)] +Right keys [1]: [substr(ca_zip#11, 1, 2)] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 8] +Output [2]: [ss_net_profit#2, s_store_name#8] +Input [4]: [ss_net_profit#2, s_store_name#8, s_zip#9, ca_zip#11] + +(44) HashAggregate [codegen id : 8] +Input [2]: [ss_net_profit#2, s_store_name#8] +Keys [1]: [s_store_name#8] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#21] +Results [2]: [s_store_name#8, sum#22] + +(45) Exchange +Input [2]: [s_store_name#8, sum#22] +Arguments: hashpartitioning(s_store_name#8, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(46) HashAggregate [codegen id : 9] +Input [2]: [s_store_name#8, sum#22] +Keys [1]: [s_store_name#8] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#23] +Results [2]: [s_store_name#8, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#23,17,2) AS sum(ss_net_profit)#24] + +(47) TakeOrderedAndProject +Input [2]: [s_store_name#8, sum(ss_net_profit)#24] +Arguments: 100, [s_store_name#8 ASC NULLS FIRST], [s_store_name#8, sum(ss_net_profit)#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..30895d0bef --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.native_iceberg_compat/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] + WholeStageCodegen (9) + HashAggregate [s_store_name,sum] [sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit),sum] + InputAdapter + Exchange [s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [s_store_name,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_store_name] + BroadcastHashJoin [s_zip,ca_zip] + Project [ss_net_profit,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [ca_zip] + InputAdapter + Exchange [ca_zip] #5 + WholeStageCodegen (6) + HashAggregate [ca_zip] + BroadcastHashJoin [ca_zip,ca_zip] + Project [ca_zip] + Filter [ca_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_zip] + Filter [cnt] + HashAggregate [ca_zip,count] [count(1),ca_zip,cnt,count] + InputAdapter + Exchange [ca_zip] #7 + WholeStageCodegen (4) + HashAggregate [ca_zip] [count,count] + Project [ca_zip] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (3) + Project [c_current_addr_sk] + Filter [c_preferred_cust_flag,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_current_addr_sk,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_datafusion/explain.txt new file mode 100644 index 0000000000..fd964a80ef --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_datafusion/explain.txt @@ -0,0 +1,631 @@ +== Physical Plan == +TakeOrderedAndProject (111) ++- * HashAggregate (110) + +- Exchange (109) + +- * HashAggregate (108) + +- * Expand (107) + +- Union (106) + :- * HashAggregate (43) + : +- Exchange (42) + : +- * HashAggregate (41) + : +- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Project (20) + : : : : +- * BroadcastHashJoin Inner BuildRight (19) + : : : : :- * Project (13) + : : : : : +- * SortMergeJoin LeftOuter (12) + : : : : : :- * Sort (5) + : : : : : : +- Exchange (4) + : : : : : : +- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : +- * Sort (11) + : : : : : +- Exchange (10) + : : : : : +- * Project (9) + : : : : : +- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet spark_catalog.default.store_returns (6) + : : : : +- BroadcastExchange (18) + : : : : +- * Project (17) + : : : : +- * Filter (16) + : : : : +- * ColumnarToRow (15) + : : : : +- Scan parquet spark_catalog.default.date_dim (14) + : : : +- BroadcastExchange (24) + : : : +- * Filter (23) + : : : +- * ColumnarToRow (22) + : : : +- Scan parquet spark_catalog.default.store (21) + : : +- BroadcastExchange (31) + : : +- * Project (30) + : : +- * Filter (29) + : : +- * ColumnarToRow (28) + : : +- Scan parquet spark_catalog.default.item (27) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet spark_catalog.default.promotion (34) + :- * HashAggregate (74) + : +- Exchange (73) + : +- * HashAggregate (72) + : +- * Project (71) + : +- * BroadcastHashJoin Inner BuildRight (70) + : :- * Project (68) + : : +- * BroadcastHashJoin Inner BuildRight (67) + : : :- * Project (65) + : : : +- * BroadcastHashJoin Inner BuildRight (64) + : : : :- * Project (59) + : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : :- * Project (56) + : : : : : +- * SortMergeJoin LeftOuter (55) + : : : : : :- * Sort (48) + : : : : : : +- Exchange (47) + : : : : : : +- * Filter (46) + : : : : : : +- * ColumnarToRow (45) + : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (44) + : : : : : +- * Sort (54) + : : : : : +- Exchange (53) + : : : : : +- * Project (52) + : : : : : +- * Filter (51) + : : : : : +- * ColumnarToRow (50) + : : : : : +- Scan parquet spark_catalog.default.catalog_returns (49) + : : : : +- ReusedExchange (57) + : : : +- BroadcastExchange (63) + : : : +- * Filter (62) + : : : +- * ColumnarToRow (61) + : : : +- Scan parquet spark_catalog.default.catalog_page (60) + : : +- ReusedExchange (66) + : +- ReusedExchange (69) + +- * HashAggregate (105) + +- Exchange (104) + +- * HashAggregate (103) + +- * Project (102) + +- * BroadcastHashJoin Inner BuildRight (101) + :- * Project (99) + : +- * BroadcastHashJoin Inner BuildRight (98) + : :- * Project (96) + : : +- * BroadcastHashJoin Inner BuildRight (95) + : : :- * Project (90) + : : : +- * BroadcastHashJoin Inner BuildRight (89) + : : : :- * Project (87) + : : : : +- * SortMergeJoin LeftOuter (86) + : : : : :- * Sort (79) + : : : : : +- Exchange (78) + : : : : : +- * Filter (77) + : : : : : +- * ColumnarToRow (76) + : : : : : +- Scan parquet spark_catalog.default.web_sales (75) + : : : : +- * Sort (85) + : : : : +- Exchange (84) + : : : : +- * Project (83) + : : : : +- * Filter (82) + : : : : +- * ColumnarToRow (81) + : : : : +- Scan parquet spark_catalog.default.web_returns (80) + : : : +- ReusedExchange (88) + : : +- BroadcastExchange (94) + : : +- * Filter (93) + : : +- * ColumnarToRow (92) + : : +- Scan parquet spark_catalog.default.web_site (91) + : +- ReusedExchange (97) + +- ReusedExchange (100) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(4) Exchange +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(8) Filter [codegen id : 3] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(9) Project [codegen id : 3] +Output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(10) Exchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: hashpartitioning(sr_item_sk#8, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [sr_item_sk#8 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_item_sk#1, ss_ticket_number#4] +Right keys [2]: [sr_item_sk#8, sr_ticket_number#9] +Join type: LeftOuter +Join condition: None + +(13) Project [codegen id : 9] +Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(14) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#13, d_date#14] + +(16) Filter [codegen id : 5] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 2000-08-23)) AND (d_date#14 <= 2000-09-22)) AND isnotnull(d_date_sk#13)) + +(17) Project [codegen id : 5] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(18) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 9] +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] + +(21) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_store_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [2]: [s_store_sk#15, s_store_id#16] + +(23) Filter [codegen id : 6] +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) + +(24) BroadcastExchange +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 9] +Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_sk#15, s_store_id#16] + +(27) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_current_price#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 7] +Input [2]: [i_item_sk#17, i_current_price#18] + +(29) Filter [codegen id : 7] +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) + +(30) Project [codegen id : 7] +Output [1]: [i_item_sk#17] +Input [2]: [i_item_sk#17, i_current_price#18] + +(31) BroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, i_item_sk#17] + +(34) Scan parquet spark_catalog.default.promotion +Output [2]: [p_promo_sk#19, p_channel_tv#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [p_promo_sk#19, p_channel_tv#20] + +(36) Filter [codegen id : 8] +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) + +(37) Project [codegen id : 8] +Output [1]: [p_promo_sk#19] +Input [2]: [p_promo_sk#19, p_channel_tv#20] + +(38) BroadcastExchange +Input [1]: [p_promo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_promo_sk#3] +Right keys [1]: [p_promo_sk#19] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 9] +Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, p_promo_sk#19] + +(41) HashAggregate [codegen id : 9] +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Results [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] + +(42) Exchange +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(43) HashAggregate [codegen id : 10] +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#32, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#33] +Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#34, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#32 AS returns#35, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#33 AS profit#36, store channel AS channel#37, concat(store, s_store_id#16) AS id#38] + +(44) Scan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#45)] +PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 11] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(46) Filter [codegen id : 11] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41)) + +(47) Exchange +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(48) Sort [codegen id : 12] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0 + +(49) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 13] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] + +(51) Filter [codegen id : 13] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Condition : (isnotnull(cr_item_sk#46) AND isnotnull(cr_order_number#47)) + +(52) Project [codegen id : 13] +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] + +(53) Exchange +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: hashpartitioning(cr_item_sk#46, cr_order_number#47, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(54) Sort [codegen id : 14] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST], false, 0 + +(55) SortMergeJoin [codegen id : 19] +Left keys [2]: [cs_item_sk#40, cs_order_number#42] +Right keys [2]: [cr_item_sk#46, cr_order_number#47] +Join type: LeftOuter +Join condition: None + +(56) Project [codegen id : 19] +Output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] + +(57) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#51] + +(58) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#51] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 19] +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#51] + +(60) Scan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 16] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] + +(62) Filter [codegen id : 16] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Condition : isnotnull(cp_catalog_page_sk#52) + +(63) BroadcastExchange +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(64) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_catalog_page_sk#39] +Right keys [1]: [cp_catalog_page_sk#52] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 19] +Output [7]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#52, cp_catalog_page_id#53] + +(66) ReusedExchange [Reuses operator id: 31] +Output [1]: [i_item_sk#54] + +(67) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_item_sk#40] +Right keys [1]: [i_item_sk#54] +Join type: Inner +Join condition: None + +(68) Project [codegen id : 19] +Output [6]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [8]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, i_item_sk#54] + +(69) ReusedExchange [Reuses operator id: 38] +Output [1]: [p_promo_sk#55] + +(70) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_promo_sk#41] +Right keys [1]: [p_promo_sk#55] +Join type: Inner +Join condition: None + +(71) Project [codegen id : 19] +Output [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [7]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, p_promo_sk#55] + +(72) HashAggregate [codegen id : 19] +Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Results [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] + +(73) Exchange +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Arguments: hashpartitioning(cp_catalog_page_id#53, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(74) HashAggregate [codegen id : 20] +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68] +Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#69, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#70, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68 AS profit#71, catalog channel AS channel#72, concat(catalog_page, cp_catalog_page_id#53) AS id#73] + +(75) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#80)] +PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 21] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] + +(77) Filter [codegen id : 21] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Condition : ((isnotnull(ws_web_site_sk#75) AND isnotnull(ws_item_sk#74)) AND isnotnull(ws_promo_sk#76)) + +(78) Exchange +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(79) Sort [codegen id : 22] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0 + +(80) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(81) ColumnarToRow [codegen id : 23] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] + +(82) Filter [codegen id : 23] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Condition : (isnotnull(wr_item_sk#81) AND isnotnull(wr_order_number#82)) + +(83) Project [codegen id : 23] +Output [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] + +(84) Exchange +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: hashpartitioning(wr_item_sk#81, wr_order_number#82, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(85) Sort [codegen id : 24] +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin [codegen id : 29] +Left keys [2]: [ws_item_sk#74, ws_order_number#77] +Right keys [2]: [wr_item_sk#81, wr_order_number#82] +Join type: LeftOuter +Join condition: None + +(87) Project [codegen id : 29] +Output [8]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [11]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] + +(88) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#86] + +(89) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#86] +Join type: Inner +Join condition: None + +(90) Project [codegen id : 29] +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#86] + +(91) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#87, web_site_id#88] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(92) ColumnarToRow [codegen id : 26] +Input [2]: [web_site_sk#87, web_site_id#88] + +(93) Filter [codegen id : 26] +Input [2]: [web_site_sk#87, web_site_id#88] +Condition : isnotnull(web_site_sk#87) + +(94) BroadcastExchange +Input [2]: [web_site_sk#87, web_site_id#88] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] + +(95) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_web_site_sk#75] +Right keys [1]: [web_site_sk#87] +Join type: Inner +Join condition: None + +(96) Project [codegen id : 29] +Output [7]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_sk#87, web_site_id#88] + +(97) ReusedExchange [Reuses operator id: 31] +Output [1]: [i_item_sk#89] + +(98) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#89] +Join type: Inner +Join condition: None + +(99) Project [codegen id : 29] +Output [6]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [8]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, i_item_sk#89] + +(100) ReusedExchange [Reuses operator id: 38] +Output [1]: [p_promo_sk#90] + +(101) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_promo_sk#76] +Right keys [1]: [p_promo_sk#90] +Join type: Inner +Join condition: None + +(102) Project [codegen id : 29] +Output [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [7]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, p_promo_sk#90] + +(103) HashAggregate [codegen id : 29] +Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Keys [1]: [web_site_id#88] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95] +Results [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] + +(104) Exchange +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Arguments: hashpartitioning(web_site_id#88, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(105) HashAggregate [codegen id : 30] +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Keys [1]: [web_site_id#88] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103] +Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#104, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#105, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103 AS profit#106, web channel AS channel#107, concat(web_site, web_site_id#88) AS id#108] + +(106) Union + +(107) Expand [codegen id : 31] +Input [5]: [sales#34, returns#35, profit#36, channel#37, id#38] +Arguments: [[sales#34, returns#35, profit#36, channel#37, id#38, 0], [sales#34, returns#35, profit#36, channel#37, null, 1], [sales#34, returns#35, profit#36, null, null, 3]], [sales#34, returns#35, profit#36, channel#109, id#110, spark_grouping_id#111] + +(108) HashAggregate [codegen id : 31] +Input [6]: [sales#34, returns#35, profit#36, channel#109, id#110, spark_grouping_id#111] +Keys [3]: [channel#109, id#110, spark_grouping_id#111] +Functions [3]: [partial_sum(sales#34), partial_sum(returns#35), partial_sum(profit#36)] +Aggregate Attributes [6]: [sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Results [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] + +(109) Exchange +Input [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Arguments: hashpartitioning(channel#109, id#110, spark_grouping_id#111, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(110) HashAggregate [codegen id : 32] +Input [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Keys [3]: [channel#109, id#110, spark_grouping_id#111] +Functions [3]: [sum(sales#34), sum(returns#35), sum(profit#36)] +Aggregate Attributes [3]: [sum(sales#34)#124, sum(returns#35)#125, sum(profit#36)#126] +Results [5]: [channel#109, id#110, sum(sales#34)#124 AS sales#127, sum(returns#35)#125 AS returns#128, sum(profit#36)#126 AS profit#129] + +(111) TakeOrderedAndProject +Input [5]: [channel#109, id#110, sales#127, returns#128, profit#129] +Arguments: 100, [channel#109 ASC NULLS FIRST, id#110 ASC NULLS FIRST], [channel#109, id#110, sales#127, returns#128, profit#129] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_datafusion/simplified.txt new file mode 100644 index 0000000000..3b9ea7c700 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_datafusion/simplified.txt @@ -0,0 +1,178 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (32) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (31) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (10) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen (9) + HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss] + SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + InputAdapter + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #3 + WholeStageCodegen (1) + Filter [ss_store_sk,ss_item_sk,ss_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #4 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv] + WholeStageCodegen (20) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cp_catalog_page_id] #9 + WholeStageCodegen (19) + HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] + Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (12) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #10 + WholeStageCodegen (11) + Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (14) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #11 + WholeStageCodegen (13) + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (16) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + InputAdapter + ReusedExchange [i_item_sk] #7 + InputAdapter + ReusedExchange [p_promo_sk] #8 + WholeStageCodegen (30) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [web_site_id] #13 + WholeStageCodegen (29) + HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] + Project [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss] + SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + InputAdapter + WholeStageCodegen (22) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #14 + WholeStageCodegen (21) + Filter [ws_web_site_sk,ws_item_sk,ws_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (24) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #15 + WholeStageCodegen (23) + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (26) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] + InputAdapter + ReusedExchange [i_item_sk] #7 + InputAdapter + ReusedExchange [p_promo_sk] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..fd964a80ef --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_iceberg_compat/explain.txt @@ -0,0 +1,631 @@ +== Physical Plan == +TakeOrderedAndProject (111) ++- * HashAggregate (110) + +- Exchange (109) + +- * HashAggregate (108) + +- * Expand (107) + +- Union (106) + :- * HashAggregate (43) + : +- Exchange (42) + : +- * HashAggregate (41) + : +- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Project (20) + : : : : +- * BroadcastHashJoin Inner BuildRight (19) + : : : : :- * Project (13) + : : : : : +- * SortMergeJoin LeftOuter (12) + : : : : : :- * Sort (5) + : : : : : : +- Exchange (4) + : : : : : : +- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : +- * Sort (11) + : : : : : +- Exchange (10) + : : : : : +- * Project (9) + : : : : : +- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet spark_catalog.default.store_returns (6) + : : : : +- BroadcastExchange (18) + : : : : +- * Project (17) + : : : : +- * Filter (16) + : : : : +- * ColumnarToRow (15) + : : : : +- Scan parquet spark_catalog.default.date_dim (14) + : : : +- BroadcastExchange (24) + : : : +- * Filter (23) + : : : +- * ColumnarToRow (22) + : : : +- Scan parquet spark_catalog.default.store (21) + : : +- BroadcastExchange (31) + : : +- * Project (30) + : : +- * Filter (29) + : : +- * ColumnarToRow (28) + : : +- Scan parquet spark_catalog.default.item (27) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet spark_catalog.default.promotion (34) + :- * HashAggregate (74) + : +- Exchange (73) + : +- * HashAggregate (72) + : +- * Project (71) + : +- * BroadcastHashJoin Inner BuildRight (70) + : :- * Project (68) + : : +- * BroadcastHashJoin Inner BuildRight (67) + : : :- * Project (65) + : : : +- * BroadcastHashJoin Inner BuildRight (64) + : : : :- * Project (59) + : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : :- * Project (56) + : : : : : +- * SortMergeJoin LeftOuter (55) + : : : : : :- * Sort (48) + : : : : : : +- Exchange (47) + : : : : : : +- * Filter (46) + : : : : : : +- * ColumnarToRow (45) + : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (44) + : : : : : +- * Sort (54) + : : : : : +- Exchange (53) + : : : : : +- * Project (52) + : : : : : +- * Filter (51) + : : : : : +- * ColumnarToRow (50) + : : : : : +- Scan parquet spark_catalog.default.catalog_returns (49) + : : : : +- ReusedExchange (57) + : : : +- BroadcastExchange (63) + : : : +- * Filter (62) + : : : +- * ColumnarToRow (61) + : : : +- Scan parquet spark_catalog.default.catalog_page (60) + : : +- ReusedExchange (66) + : +- ReusedExchange (69) + +- * HashAggregate (105) + +- Exchange (104) + +- * HashAggregate (103) + +- * Project (102) + +- * BroadcastHashJoin Inner BuildRight (101) + :- * Project (99) + : +- * BroadcastHashJoin Inner BuildRight (98) + : :- * Project (96) + : : +- * BroadcastHashJoin Inner BuildRight (95) + : : :- * Project (90) + : : : +- * BroadcastHashJoin Inner BuildRight (89) + : : : :- * Project (87) + : : : : +- * SortMergeJoin LeftOuter (86) + : : : : :- * Sort (79) + : : : : : +- Exchange (78) + : : : : : +- * Filter (77) + : : : : : +- * ColumnarToRow (76) + : : : : : +- Scan parquet spark_catalog.default.web_sales (75) + : : : : +- * Sort (85) + : : : : +- Exchange (84) + : : : : +- * Project (83) + : : : : +- * Filter (82) + : : : : +- * ColumnarToRow (81) + : : : : +- Scan parquet spark_catalog.default.web_returns (80) + : : : +- ReusedExchange (88) + : : +- BroadcastExchange (94) + : : +- * Filter (93) + : : +- * ColumnarToRow (92) + : : +- Scan parquet spark_catalog.default.web_site (91) + : +- ReusedExchange (97) + +- ReusedExchange (100) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(4) Exchange +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(8) Filter [codegen id : 3] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(9) Project [codegen id : 3] +Output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(10) Exchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: hashpartitioning(sr_item_sk#8, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [sr_item_sk#8 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_item_sk#1, ss_ticket_number#4] +Right keys [2]: [sr_item_sk#8, sr_ticket_number#9] +Join type: LeftOuter +Join condition: None + +(13) Project [codegen id : 9] +Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(14) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#13, d_date#14] + +(16) Filter [codegen id : 5] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 2000-08-23)) AND (d_date#14 <= 2000-09-22)) AND isnotnull(d_date_sk#13)) + +(17) Project [codegen id : 5] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(18) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 9] +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] + +(21) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_store_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [2]: [s_store_sk#15, s_store_id#16] + +(23) Filter [codegen id : 6] +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) + +(24) BroadcastExchange +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 9] +Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_sk#15, s_store_id#16] + +(27) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_current_price#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 7] +Input [2]: [i_item_sk#17, i_current_price#18] + +(29) Filter [codegen id : 7] +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) + +(30) Project [codegen id : 7] +Output [1]: [i_item_sk#17] +Input [2]: [i_item_sk#17, i_current_price#18] + +(31) BroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, i_item_sk#17] + +(34) Scan parquet spark_catalog.default.promotion +Output [2]: [p_promo_sk#19, p_channel_tv#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [p_promo_sk#19, p_channel_tv#20] + +(36) Filter [codegen id : 8] +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) + +(37) Project [codegen id : 8] +Output [1]: [p_promo_sk#19] +Input [2]: [p_promo_sk#19, p_channel_tv#20] + +(38) BroadcastExchange +Input [1]: [p_promo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_promo_sk#3] +Right keys [1]: [p_promo_sk#19] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 9] +Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, p_promo_sk#19] + +(41) HashAggregate [codegen id : 9] +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Results [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] + +(42) Exchange +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(43) HashAggregate [codegen id : 10] +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#32, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#33] +Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#34, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#32 AS returns#35, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#33 AS profit#36, store channel AS channel#37, concat(store, s_store_id#16) AS id#38] + +(44) Scan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#45)] +PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 11] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(46) Filter [codegen id : 11] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41)) + +(47) Exchange +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(48) Sort [codegen id : 12] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0 + +(49) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 13] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] + +(51) Filter [codegen id : 13] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Condition : (isnotnull(cr_item_sk#46) AND isnotnull(cr_order_number#47)) + +(52) Project [codegen id : 13] +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] + +(53) Exchange +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: hashpartitioning(cr_item_sk#46, cr_order_number#47, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(54) Sort [codegen id : 14] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST], false, 0 + +(55) SortMergeJoin [codegen id : 19] +Left keys [2]: [cs_item_sk#40, cs_order_number#42] +Right keys [2]: [cr_item_sk#46, cr_order_number#47] +Join type: LeftOuter +Join condition: None + +(56) Project [codegen id : 19] +Output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] + +(57) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#51] + +(58) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#51] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 19] +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#51] + +(60) Scan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 16] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] + +(62) Filter [codegen id : 16] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Condition : isnotnull(cp_catalog_page_sk#52) + +(63) BroadcastExchange +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(64) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_catalog_page_sk#39] +Right keys [1]: [cp_catalog_page_sk#52] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 19] +Output [7]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#52, cp_catalog_page_id#53] + +(66) ReusedExchange [Reuses operator id: 31] +Output [1]: [i_item_sk#54] + +(67) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_item_sk#40] +Right keys [1]: [i_item_sk#54] +Join type: Inner +Join condition: None + +(68) Project [codegen id : 19] +Output [6]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [8]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, i_item_sk#54] + +(69) ReusedExchange [Reuses operator id: 38] +Output [1]: [p_promo_sk#55] + +(70) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_promo_sk#41] +Right keys [1]: [p_promo_sk#55] +Join type: Inner +Join condition: None + +(71) Project [codegen id : 19] +Output [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [7]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, p_promo_sk#55] + +(72) HashAggregate [codegen id : 19] +Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Results [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] + +(73) Exchange +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Arguments: hashpartitioning(cp_catalog_page_id#53, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(74) HashAggregate [codegen id : 20] +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68] +Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#69, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#70, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68 AS profit#71, catalog channel AS channel#72, concat(catalog_page, cp_catalog_page_id#53) AS id#73] + +(75) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#80)] +PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 21] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] + +(77) Filter [codegen id : 21] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Condition : ((isnotnull(ws_web_site_sk#75) AND isnotnull(ws_item_sk#74)) AND isnotnull(ws_promo_sk#76)) + +(78) Exchange +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(79) Sort [codegen id : 22] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0 + +(80) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(81) ColumnarToRow [codegen id : 23] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] + +(82) Filter [codegen id : 23] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Condition : (isnotnull(wr_item_sk#81) AND isnotnull(wr_order_number#82)) + +(83) Project [codegen id : 23] +Output [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] + +(84) Exchange +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: hashpartitioning(wr_item_sk#81, wr_order_number#82, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(85) Sort [codegen id : 24] +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin [codegen id : 29] +Left keys [2]: [ws_item_sk#74, ws_order_number#77] +Right keys [2]: [wr_item_sk#81, wr_order_number#82] +Join type: LeftOuter +Join condition: None + +(87) Project [codegen id : 29] +Output [8]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [11]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] + +(88) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#86] + +(89) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#86] +Join type: Inner +Join condition: None + +(90) Project [codegen id : 29] +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#86] + +(91) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#87, web_site_id#88] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(92) ColumnarToRow [codegen id : 26] +Input [2]: [web_site_sk#87, web_site_id#88] + +(93) Filter [codegen id : 26] +Input [2]: [web_site_sk#87, web_site_id#88] +Condition : isnotnull(web_site_sk#87) + +(94) BroadcastExchange +Input [2]: [web_site_sk#87, web_site_id#88] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] + +(95) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_web_site_sk#75] +Right keys [1]: [web_site_sk#87] +Join type: Inner +Join condition: None + +(96) Project [codegen id : 29] +Output [7]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_sk#87, web_site_id#88] + +(97) ReusedExchange [Reuses operator id: 31] +Output [1]: [i_item_sk#89] + +(98) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#89] +Join type: Inner +Join condition: None + +(99) Project [codegen id : 29] +Output [6]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [8]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, i_item_sk#89] + +(100) ReusedExchange [Reuses operator id: 38] +Output [1]: [p_promo_sk#90] + +(101) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_promo_sk#76] +Right keys [1]: [p_promo_sk#90] +Join type: Inner +Join condition: None + +(102) Project [codegen id : 29] +Output [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [7]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, p_promo_sk#90] + +(103) HashAggregate [codegen id : 29] +Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Keys [1]: [web_site_id#88] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95] +Results [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] + +(104) Exchange +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Arguments: hashpartitioning(web_site_id#88, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(105) HashAggregate [codegen id : 30] +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Keys [1]: [web_site_id#88] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103] +Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#104, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#105, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103 AS profit#106, web channel AS channel#107, concat(web_site, web_site_id#88) AS id#108] + +(106) Union + +(107) Expand [codegen id : 31] +Input [5]: [sales#34, returns#35, profit#36, channel#37, id#38] +Arguments: [[sales#34, returns#35, profit#36, channel#37, id#38, 0], [sales#34, returns#35, profit#36, channel#37, null, 1], [sales#34, returns#35, profit#36, null, null, 3]], [sales#34, returns#35, profit#36, channel#109, id#110, spark_grouping_id#111] + +(108) HashAggregate [codegen id : 31] +Input [6]: [sales#34, returns#35, profit#36, channel#109, id#110, spark_grouping_id#111] +Keys [3]: [channel#109, id#110, spark_grouping_id#111] +Functions [3]: [partial_sum(sales#34), partial_sum(returns#35), partial_sum(profit#36)] +Aggregate Attributes [6]: [sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Results [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] + +(109) Exchange +Input [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Arguments: hashpartitioning(channel#109, id#110, spark_grouping_id#111, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(110) HashAggregate [codegen id : 32] +Input [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Keys [3]: [channel#109, id#110, spark_grouping_id#111] +Functions [3]: [sum(sales#34), sum(returns#35), sum(profit#36)] +Aggregate Attributes [3]: [sum(sales#34)#124, sum(returns#35)#125, sum(profit#36)#126] +Results [5]: [channel#109, id#110, sum(sales#34)#124 AS sales#127, sum(returns#35)#125 AS returns#128, sum(profit#36)#126 AS profit#129] + +(111) TakeOrderedAndProject +Input [5]: [channel#109, id#110, sales#127, returns#128, profit#129] +Arguments: 100, [channel#109 ASC NULLS FIRST, id#110 ASC NULLS FIRST], [channel#109, id#110, sales#127, returns#128, profit#129] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..3b9ea7c700 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.native_iceberg_compat/simplified.txt @@ -0,0 +1,178 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (32) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen (31) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] + InputAdapter + Union + WholeStageCodegen (10) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen (9) + HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss] + SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + InputAdapter + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #3 + WholeStageCodegen (1) + Filter [ss_store_sk,ss_item_sk,ss_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #4 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (7) + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv] + WholeStageCodegen (20) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cp_catalog_page_id] #9 + WholeStageCodegen (19) + HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] + Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (12) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #10 + WholeStageCodegen (11) + Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (14) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #11 + WholeStageCodegen (13) + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (16) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + InputAdapter + ReusedExchange [i_item_sk] #7 + InputAdapter + ReusedExchange [p_promo_sk] #8 + WholeStageCodegen (30) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [web_site_id] #13 + WholeStageCodegen (29) + HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] + Project [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss] + SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + InputAdapter + WholeStageCodegen (22) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #14 + WholeStageCodegen (21) + Filter [ws_web_site_sk,ws_item_sk,ws_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (24) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #15 + WholeStageCodegen (23) + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (26) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] + InputAdapter + ReusedExchange [i_item_sk] #7 + InputAdapter + ReusedExchange [p_promo_sk] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_datafusion/explain.txt new file mode 100644 index 0000000000..829e66974d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_datafusion/explain.txt @@ -0,0 +1,307 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * Project (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Filter (20) + : : : +- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.catalog_returns (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.customer_address (11) + : : +- BroadcastExchange (37) + : : +- * Filter (36) + : : +- * HashAggregate (35) + : : +- Exchange (34) + : : +- * HashAggregate (33) + : : +- * HashAggregate (32) + : : +- Exchange (31) + : : +- * HashAggregate (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Filter (23) + : : : : +- * ColumnarToRow (22) + : : : : +- Scan parquet spark_catalog.default.catalog_returns (21) + : : : +- ReusedExchange (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet spark_catalog.default.customer (40) + +- BroadcastExchange (49) + +- * Filter (48) + +- * ColumnarToRow (47) + +- Scan parquet spark_catalog.default.customer_address (46) + + +(1) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#4)] +PushedFilters: [IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] + +(3) Filter [codegen id : 3] +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Condition : (isnotnull(cr_returning_addr_sk#2) AND isnotnull(cr_returning_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3] +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_state#8] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_state#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) + +(14) BroadcastExchange +Input [2]: [ca_address_sk#7, ca_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cr_returning_addr_sk#2] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, ca_address_sk#7, ca_state#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#3))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] + +(18) Exchange +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] +Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 11] +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))#11] +Results [3]: [cr_returning_customer_sk#1 AS ctr_customer_sk#12, ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#3))#11,17,2) AS ctr_total_return#14] + +(20) Filter [codegen id : 11] +Input [3]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14] +Condition : isnotnull(ctr_total_return#14) + +(21) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#4)] +PushedFilters: [IsNotNull(cr_returning_addr_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] + +(23) Filter [codegen id : 6] +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Condition : isnotnull(cr_returning_addr_sk#2) + +(24) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [3]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3] +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4, d_date_sk#5] + +(27) ReusedExchange [Reuses operator id: 14] +Output [2]: [ca_address_sk#7, ca_state#8] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cr_returning_addr_sk#2] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 6] +Output [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, ca_address_sk#7, ca_state#8] + +(30) HashAggregate [codegen id : 6] +Input [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#3))] +Aggregate Attributes [1]: [sum#15] +Results [3]: [cr_returning_customer_sk#1, ca_state#8, sum#16] + +(31) Exchange +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#16] +Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(32) HashAggregate [codegen id : 7] +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#16] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))#11] +Results [2]: [ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#3))#11,17,2) AS ctr_total_return#14] + +(33) HashAggregate [codegen id : 7] +Input [2]: [ctr_state#13, ctr_total_return#14] +Keys [1]: [ctr_state#13] +Functions [1]: [partial_avg(ctr_total_return#14)] +Aggregate Attributes [2]: [sum#17, count#18] +Results [3]: [ctr_state#13, sum#19, count#20] + +(34) Exchange +Input [3]: [ctr_state#13, sum#19, count#20] +Arguments: hashpartitioning(ctr_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(35) HashAggregate [codegen id : 8] +Input [3]: [ctr_state#13, sum#19, count#20] +Keys [1]: [ctr_state#13] +Functions [1]: [avg(ctr_total_return#14)] +Aggregate Attributes [1]: [avg(ctr_total_return#14)#21] +Results [2]: [(avg(ctr_total_return#14)#21 * 1.2) AS (avg(ctr_total_return) * 1.2)#22, ctr_state#13 AS ctr_state#13#23] + +(36) Filter [codegen id : 8] +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#22) + +(37) BroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_state#13] +Right keys [1]: [ctr_state#13#23] +Join type: Inner +Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#22) + +(39) Project [codegen id : 11] +Output [2]: [ctr_customer_sk#12, ctr_total_return#14] +Input [5]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14, (avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] + +(40) Scan parquet spark_catalog.default.customer +Output [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] + +(42) Filter [codegen id : 9] +Input [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_current_addr_sk#26)) + +(43) BroadcastExchange +Input [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_customer_sk#12] +Right keys [1]: [c_customer_sk#24] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 11] +Output [6]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Input [8]: [ctr_customer_sk#12, ctr_total_return#14, c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] + +(46) Scan parquet spark_catalog.default.customer_address +Output [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 10] +Input [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] + +(48) Filter [codegen id : 10] +Input [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] +Condition : ((isnotnull(ca_state#37) AND (ca_state#37 = GA)) AND isnotnull(ca_address_sk#30)) + +(49) BroadcastExchange +Input [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(50) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#26] +Right keys [1]: [ca_address_sk#30] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 11] +Output [16]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41, ctr_total_return#14] +Input [18]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] + +(52) TakeOrderedAndProject +Input [16]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41, ctr_total_return#14] +Arguments: 100, [c_customer_id#25 ASC NULLS FIRST, c_salutation#27 ASC NULLS FIRST, c_first_name#28 ASC NULLS FIRST, c_last_name#29 ASC NULLS FIRST, ca_street_number#31 ASC NULLS FIRST, ca_street_name#32 ASC NULLS FIRST, ca_street_type#33 ASC NULLS FIRST, ca_suite_number#34 ASC NULLS FIRST, ca_city#35 ASC NULLS FIRST, ca_county#36 ASC NULLS FIRST, ca_state#37 ASC NULLS FIRST, ca_zip#38 ASC NULLS FIRST, ca_country#39 ASC NULLS FIRST, ca_gmt_offset#40 ASC NULLS FIRST, ca_location_type#41 ASC NULLS FIRST, ctr_total_return#14 ASC NULLS FIRST], [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41, ctr_total_return#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_datafusion/simplified.txt new file mode 100644 index 0000000000..2defde832f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_datafusion/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + WholeStageCodegen (11) + Project [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(avg(ctr_total_return) * 1.2)] + Filter [ctr_total_return] + HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_customer_sk,ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [cr_returning_customer_sk,ca_state] #1 + WholeStageCodegen (3) + HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] + Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + BroadcastHashJoin [cr_returning_addr_sk,ca_address_sk] + Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returning_addr_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (8) + Filter [(avg(ctr_total_return) * 1.2)] + HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),ctr_state,sum,count] + InputAdapter + Exchange [ctr_state] #5 + WholeStageCodegen (7) + HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] + HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [cr_returning_customer_sk,ca_state] #6 + WholeStageCodegen (6) + HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] + Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + BroadcastHashJoin [cr_returning_addr_sk,ca_address_sk] + Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + ReusedExchange [ca_address_sk,ca_state] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..829e66974d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_iceberg_compat/explain.txt @@ -0,0 +1,307 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * Project (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Filter (20) + : : : +- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.catalog_returns (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.customer_address (11) + : : +- BroadcastExchange (37) + : : +- * Filter (36) + : : +- * HashAggregate (35) + : : +- Exchange (34) + : : +- * HashAggregate (33) + : : +- * HashAggregate (32) + : : +- Exchange (31) + : : +- * HashAggregate (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Filter (23) + : : : : +- * ColumnarToRow (22) + : : : : +- Scan parquet spark_catalog.default.catalog_returns (21) + : : : +- ReusedExchange (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet spark_catalog.default.customer (40) + +- BroadcastExchange (49) + +- * Filter (48) + +- * ColumnarToRow (47) + +- Scan parquet spark_catalog.default.customer_address (46) + + +(1) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#4)] +PushedFilters: [IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] + +(3) Filter [codegen id : 3] +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Condition : (isnotnull(cr_returning_addr_sk#2) AND isnotnull(cr_returning_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3] +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_state#8] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#7, ca_state#8] +Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) + +(14) BroadcastExchange +Input [2]: [ca_address_sk#7, ca_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cr_returning_addr_sk#2] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, ca_address_sk#7, ca_state#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#3))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] + +(18) Exchange +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] +Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 11] +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))#11] +Results [3]: [cr_returning_customer_sk#1 AS ctr_customer_sk#12, ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#3))#11,17,2) AS ctr_total_return#14] + +(20) Filter [codegen id : 11] +Input [3]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14] +Condition : isnotnull(ctr_total_return#14) + +(21) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#4)] +PushedFilters: [IsNotNull(cr_returning_addr_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] + +(23) Filter [codegen id : 6] +Input [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] +Condition : isnotnull(cr_returning_addr_sk#2) + +(24) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cr_returned_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [3]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3] +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4, d_date_sk#5] + +(27) ReusedExchange [Reuses operator id: 14] +Output [2]: [ca_address_sk#7, ca_state#8] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cr_returning_addr_sk#2] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 6] +Output [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, ca_address_sk#7, ca_state#8] + +(30) HashAggregate [codegen id : 6] +Input [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#3))] +Aggregate Attributes [1]: [sum#15] +Results [3]: [cr_returning_customer_sk#1, ca_state#8, sum#16] + +(31) Exchange +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#16] +Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(32) HashAggregate [codegen id : 7] +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#16] +Keys [2]: [cr_returning_customer_sk#1, ca_state#8] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))#11] +Results [2]: [ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#3))#11,17,2) AS ctr_total_return#14] + +(33) HashAggregate [codegen id : 7] +Input [2]: [ctr_state#13, ctr_total_return#14] +Keys [1]: [ctr_state#13] +Functions [1]: [partial_avg(ctr_total_return#14)] +Aggregate Attributes [2]: [sum#17, count#18] +Results [3]: [ctr_state#13, sum#19, count#20] + +(34) Exchange +Input [3]: [ctr_state#13, sum#19, count#20] +Arguments: hashpartitioning(ctr_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(35) HashAggregate [codegen id : 8] +Input [3]: [ctr_state#13, sum#19, count#20] +Keys [1]: [ctr_state#13] +Functions [1]: [avg(ctr_total_return#14)] +Aggregate Attributes [1]: [avg(ctr_total_return#14)#21] +Results [2]: [(avg(ctr_total_return#14)#21 * 1.2) AS (avg(ctr_total_return) * 1.2)#22, ctr_state#13 AS ctr_state#13#23] + +(36) Filter [codegen id : 8] +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#22) + +(37) BroadcastExchange +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_state#13] +Right keys [1]: [ctr_state#13#23] +Join type: Inner +Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#22) + +(39) Project [codegen id : 11] +Output [2]: [ctr_customer_sk#12, ctr_total_return#14] +Input [5]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14, (avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] + +(40) Scan parquet spark_catalog.default.customer +Output [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] + +(42) Filter [codegen id : 9] +Input [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_current_addr_sk#26)) + +(43) BroadcastExchange +Input [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_customer_sk#12] +Right keys [1]: [c_customer_sk#24] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 11] +Output [6]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Input [8]: [ctr_customer_sk#12, ctr_total_return#14, c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] + +(46) Scan parquet spark_catalog.default.customer_address +Output [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 10] +Input [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] + +(48) Filter [codegen id : 10] +Input [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] +Condition : ((isnotnull(ca_state#37) AND (ca_state#37 = GA)) AND isnotnull(ca_address_sk#30)) + +(49) BroadcastExchange +Input [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(50) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#26] +Right keys [1]: [ca_address_sk#30] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 11] +Output [16]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41, ctr_total_return#14] +Input [18]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] + +(52) TakeOrderedAndProject +Input [16]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41, ctr_total_return#14] +Arguments: 100, [c_customer_id#25 ASC NULLS FIRST, c_salutation#27 ASC NULLS FIRST, c_first_name#28 ASC NULLS FIRST, c_last_name#29 ASC NULLS FIRST, ca_street_number#31 ASC NULLS FIRST, ca_street_name#32 ASC NULLS FIRST, ca_street_type#33 ASC NULLS FIRST, ca_suite_number#34 ASC NULLS FIRST, ca_city#35 ASC NULLS FIRST, ca_county#36 ASC NULLS FIRST, ca_state#37 ASC NULLS FIRST, ca_zip#38 ASC NULLS FIRST, ca_country#39 ASC NULLS FIRST, ca_gmt_offset#40 ASC NULLS FIRST, ca_location_type#41 ASC NULLS FIRST, ctr_total_return#14 ASC NULLS FIRST], [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41, ctr_total_return#14] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..2defde832f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.native_iceberg_compat/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + WholeStageCodegen (11) + Project [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(avg(ctr_total_return) * 1.2)] + Filter [ctr_total_return] + HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_customer_sk,ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [cr_returning_customer_sk,ca_state] #1 + WholeStageCodegen (3) + HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] + Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + BroadcastHashJoin [cr_returning_addr_sk,ca_address_sk] + Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returning_addr_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (8) + Filter [(avg(ctr_total_return) * 1.2)] + HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(avg(ctr_total_return) * 1.2),ctr_state,sum,count] + InputAdapter + Exchange [ctr_state] #5 + WholeStageCodegen (7) + HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] + HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [cr_returning_customer_sk,ca_state] #6 + WholeStageCodegen (6) + HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] + Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + BroadcastHashJoin [cr_returning_addr_sk,ca_address_sk] + Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Filter [cr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #2 + InputAdapter + ReusedExchange [ca_address_sk,ca_state] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_datafusion/explain.txt new file mode 100644 index 0000000000..67550028f1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_datafusion/explain.txt @@ -0,0 +1,169 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildLeft (24) + :- BroadcastExchange (19) + : +- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.item (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet spark_catalog.default.inventory (5) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet spark_catalog.default.date_dim (12) + +- * Project (23) + +- * Filter (22) + +- * ColumnarToRow (21) + +- Scan parquet spark_catalog.default.store_sales (20) + + +(1) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), LessThanOrEqual(i_current_price,92.00), In(i_manufact_id, [129,270,423,821]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(3) Filter [codegen id : 3] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 62.00)) AND (i_current_price#4 <= 92.00)) AND i_manufact_id#5 IN (129,270,821,423)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 3] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(5) Scan parquet spark_catalog.default.inventory +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#8)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(7) Filter [codegen id : 1] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) + +(8) Project [codegen id : 1] +Output [2]: [inv_item_sk#6, inv_date_sk#8] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(9) BroadcastExchange +Input [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(10) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [inv_item_sk#6] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 3] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] + +(12) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] + +(14) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-05-25)) AND (d_date#10 <= 2000-07-24)) AND isnotnull(d_date_sk#9)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_date#10] + +(16) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 3] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#9] + +(19) BroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(20) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(21) ColumnarToRow +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] + +(22) Filter +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_item_sk#11) + +(23) Project +Output [1]: [ss_item_sk#11] +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#11] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#11] + +(26) HashAggregate [codegen id : 4] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(27) Exchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(28) HashAggregate [codegen id : 5] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(29) TakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_datafusion/simplified.txt new file mode 100644 index 0000000000..1ab87aee6a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_datafusion/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + WholeStageCodegen (5) + HashAggregate [i_item_id,i_item_desc,i_current_price] + InputAdapter + Exchange [i_item_id,i_item_desc,i_current_price] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_current_price] + Project [i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (3) + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + Filter [i_current_price,i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [inv_item_sk,inv_date_sk] + Filter [inv_quantity_on_hand,inv_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + Project [ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..67550028f1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_iceberg_compat/explain.txt @@ -0,0 +1,169 @@ +== Physical Plan == +TakeOrderedAndProject (29) ++- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildLeft (24) + :- BroadcastExchange (19) + : +- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.item (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet spark_catalog.default.inventory (5) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet spark_catalog.default.date_dim (12) + +- * Project (23) + +- * Filter (22) + +- * ColumnarToRow (21) + +- Scan parquet spark_catalog.default.store_sales (20) + + +(1) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), LessThanOrEqual(i_current_price,92.00), In(i_manufact_id, [129,270,423,821]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(3) Filter [codegen id : 3] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 62.00)) AND (i_current_price#4 <= 92.00)) AND i_manufact_id#5 IN (129,270,821,423)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 3] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(5) Scan parquet spark_catalog.default.inventory +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#8)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(7) Filter [codegen id : 1] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) + +(8) Project [codegen id : 1] +Output [2]: [inv_item_sk#6, inv_date_sk#8] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] + +(9) BroadcastExchange +Input [2]: [inv_item_sk#6, inv_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(10) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [inv_item_sk#6] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 3] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] + +(12) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] + +(14) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-05-25)) AND (d_date#10 <= 2000-07-24)) AND isnotnull(d_date_sk#9)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_date#10] + +(16) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [inv_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 3] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#9] + +(19) BroadcastExchange +Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(20) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(21) ColumnarToRow +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] + +(22) Filter +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_item_sk#11) + +(23) Project +Output [1]: [ss_item_sk#11] +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#11] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#11] + +(26) HashAggregate [codegen id : 4] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(27) Exchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(28) HashAggregate [codegen id : 5] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(29) TakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..1ab87aee6a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.native_iceberg_compat/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + WholeStageCodegen (5) + HashAggregate [i_item_id,i_item_desc,i_current_price] + InputAdapter + Exchange [i_item_id,i_item_desc,i_current_price] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_current_price] + Project [i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (3) + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + Filter [i_current_price,i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [inv_item_sk,inv_date_sk] + Filter [inv_quantity_on_hand,inv_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + Project [ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_datafusion/explain.txt new file mode 100644 index 0000000000..2a4ba79bf8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_datafusion/explain.txt @@ -0,0 +1,357 @@ +== Physical Plan == +TakeOrderedAndProject (61) ++- * Project (60) + +- * BroadcastHashJoin Inner BuildRight (59) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * HashAggregate (30) + : : +- Exchange (29) + : : +- * HashAggregate (28) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_returns (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.item (4) + : : +- BroadcastExchange (25) + : : +- * Project (24) + : : +- * BroadcastHashJoin LeftSemi BuildRight (23) + : : :- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin LeftSemi BuildRight (20) + : : :- * ColumnarToRow (14) + : : : +- Scan parquet spark_catalog.default.date_dim (13) + : : +- BroadcastExchange (19) + : : +- * Project (18) + : : +- * Filter (17) + : : +- * ColumnarToRow (16) + : : +- Scan parquet spark_catalog.default.date_dim (15) + : +- BroadcastExchange (43) + : +- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Filter (33) + : : : +- * ColumnarToRow (32) + : : : +- Scan parquet spark_catalog.default.catalog_returns (31) + : : +- ReusedExchange (34) + : +- ReusedExchange (37) + +- BroadcastExchange (58) + +- * HashAggregate (57) + +- Exchange (56) + +- * HashAggregate (55) + +- * Project (54) + +- * BroadcastHashJoin Inner BuildRight (53) + :- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Filter (48) + : : +- * ColumnarToRow (47) + : : +- Scan parquet spark_catalog.default.web_returns (46) + : +- ReusedExchange (49) + +- ReusedExchange (52) + + +(1) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#3)] +PushedFilters: [IsNotNull(sr_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] + +(3) Filter [codegen id : 5] +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Condition : isnotnull(sr_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_item_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(7) BroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [3]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5] +Input [5]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, i_item_sk#4, i_item_id#5] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_date#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#6, d_date#7] + +(12) Filter [codegen id : 4] +Input [2]: [d_date_sk#6, d_date#7] +Condition : isnotnull(d_date_sk#6) + +(13) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [d_date#8, d_week_seq#9] + +(15) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date#10, d_week_seq#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#10, d_week_seq#11] + +(17) Filter [codegen id : 2] +Input [2]: [d_date#10, d_week_seq#11] +Condition : cast(d_date#10 as string) IN (2000-06-30,2000-09-27,2000-11-17) + +(18) Project [codegen id : 2] +Output [1]: [d_week_seq#11] +Input [2]: [d_date#10, d_week_seq#11] + +(19) BroadcastExchange +Input [1]: [d_week_seq#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_week_seq#9] +Right keys [1]: [d_week_seq#11] +Join type: LeftSemi +Join condition: None + +(21) Project [codegen id : 3] +Output [1]: [d_date#8] +Input [2]: [d_date#8, d_week_seq#9] + +(22) BroadcastExchange +Input [1]: [d_date#8] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [d_date#7] +Right keys [1]: [d_date#8] +Join type: LeftSemi +Join condition: None + +(24) Project [codegen id : 4] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_date#7] + +(25) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#3] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 5] +Output [2]: [sr_return_quantity#2, i_item_id#5] +Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5, d_date_sk#6] + +(28) HashAggregate [codegen id : 5] +Input [2]: [sr_return_quantity#2, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(sr_return_quantity#2)] +Aggregate Attributes [1]: [sum#12] +Results [2]: [i_item_id#5, sum#13] + +(29) Exchange +Input [2]: [i_item_id#5, sum#13] +Arguments: hashpartitioning(i_item_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(30) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#5, sum#13] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(sr_return_quantity#2)] +Aggregate Attributes [1]: [sum(sr_return_quantity#2)#14] +Results [2]: [i_item_id#5 AS item_id#15, sum(sr_return_quantity#2)#14 AS sr_item_qty#16] + +(31) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#17, cr_return_quantity#18, cr_returned_date_sk#19] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#19)] +PushedFilters: [IsNotNull(cr_item_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 10] +Input [3]: [cr_item_sk#17, cr_return_quantity#18, cr_returned_date_sk#19] + +(33) Filter [codegen id : 10] +Input [3]: [cr_item_sk#17, cr_return_quantity#18, cr_returned_date_sk#19] +Condition : isnotnull(cr_item_sk#17) + +(34) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#20, i_item_id#21] + +(35) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_item_sk#17] +Right keys [1]: [i_item_sk#20] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 10] +Output [3]: [cr_return_quantity#18, cr_returned_date_sk#19, i_item_id#21] +Input [5]: [cr_item_sk#17, cr_return_quantity#18, cr_returned_date_sk#19, i_item_sk#20, i_item_id#21] + +(37) ReusedExchange [Reuses operator id: 25] +Output [1]: [d_date_sk#22] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_returned_date_sk#19] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [2]: [cr_return_quantity#18, i_item_id#21] +Input [4]: [cr_return_quantity#18, cr_returned_date_sk#19, i_item_id#21, d_date_sk#22] + +(40) HashAggregate [codegen id : 10] +Input [2]: [cr_return_quantity#18, i_item_id#21] +Keys [1]: [i_item_id#21] +Functions [1]: [partial_sum(cr_return_quantity#18)] +Aggregate Attributes [1]: [sum#23] +Results [2]: [i_item_id#21, sum#24] + +(41) Exchange +Input [2]: [i_item_id#21, sum#24] +Arguments: hashpartitioning(i_item_id#21, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(42) HashAggregate [codegen id : 11] +Input [2]: [i_item_id#21, sum#24] +Keys [1]: [i_item_id#21] +Functions [1]: [sum(cr_return_quantity#18)] +Aggregate Attributes [1]: [sum(cr_return_quantity#18)#25] +Results [2]: [i_item_id#21 AS item_id#26, sum(cr_return_quantity#18)#25 AS cr_item_qty#27] + +(43) BroadcastExchange +Input [2]: [item_id#26, cr_item_qty#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#15] +Right keys [1]: [item_id#26] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 18] +Output [3]: [item_id#15, sr_item_qty#16, cr_item_qty#27] +Input [4]: [item_id#15, sr_item_qty#16, item_id#26, cr_item_qty#27] + +(46) Scan parquet spark_catalog.default.web_returns +Output [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#30)] +PushedFilters: [IsNotNull(wr_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 16] +Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] + +(48) Filter [codegen id : 16] +Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Condition : isnotnull(wr_item_sk#28) + +(49) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#31, i_item_id#32] + +(50) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_item_sk#28] +Right keys [1]: [i_item_sk#31] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 16] +Output [3]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32] +Input [5]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30, i_item_sk#31, i_item_id#32] + +(52) ReusedExchange [Reuses operator id: 25] +Output [1]: [d_date_sk#33] + +(53) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_returned_date_sk#30] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(54) Project [codegen id : 16] +Output [2]: [wr_return_quantity#29, i_item_id#32] +Input [4]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32, d_date_sk#33] + +(55) HashAggregate [codegen id : 16] +Input [2]: [wr_return_quantity#29, i_item_id#32] +Keys [1]: [i_item_id#32] +Functions [1]: [partial_sum(wr_return_quantity#29)] +Aggregate Attributes [1]: [sum#34] +Results [2]: [i_item_id#32, sum#35] + +(56) Exchange +Input [2]: [i_item_id#32, sum#35] +Arguments: hashpartitioning(i_item_id#32, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(57) HashAggregate [codegen id : 17] +Input [2]: [i_item_id#32, sum#35] +Keys [1]: [i_item_id#32] +Functions [1]: [sum(wr_return_quantity#29)] +Aggregate Attributes [1]: [sum(wr_return_quantity#29)#36] +Results [2]: [i_item_id#32 AS item_id#37, sum(wr_return_quantity#29)#36 AS wr_item_qty#38] + +(58) BroadcastExchange +Input [2]: [item_id#37, wr_item_qty#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(59) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#15] +Right keys [1]: [item_id#37] +Join type: Inner +Join condition: None + +(60) Project [codegen id : 18] +Output [8]: [item_id#15, sr_item_qty#16, (((cast(sr_item_qty#16 as double) / cast(((sr_item_qty#16 + cr_item_qty#27) + wr_item_qty#38) as double)) / 3.0) * 100.0) AS sr_dev#39, cr_item_qty#27, (((cast(cr_item_qty#27 as double) / cast(((sr_item_qty#16 + cr_item_qty#27) + wr_item_qty#38) as double)) / 3.0) * 100.0) AS cr_dev#40, wr_item_qty#38, (((cast(wr_item_qty#38 as double) / cast(((sr_item_qty#16 + cr_item_qty#27) + wr_item_qty#38) as double)) / 3.0) * 100.0) AS wr_dev#41, (cast(((sr_item_qty#16 + cr_item_qty#27) + wr_item_qty#38) as decimal(20,0)) / 3.0) AS average#42] +Input [5]: [item_id#15, sr_item_qty#16, cr_item_qty#27, item_id#37, wr_item_qty#38] + +(61) TakeOrderedAndProject +Input [8]: [item_id#15, sr_item_qty#16, sr_dev#39, cr_item_qty#27, cr_dev#40, wr_item_qty#38, wr_dev#41, average#42] +Arguments: 100, [item_id#15 ASC NULLS FIRST, sr_item_qty#16 ASC NULLS FIRST], [item_id#15, sr_item_qty#16, sr_dev#39, cr_item_qty#27, cr_dev#40, wr_item_qty#38, wr_dev#41, average#42] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b186ed0d51 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_datafusion/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] + WholeStageCodegen (18) + Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] + BroadcastHashJoin [item_id,item_id] + Project [item_id,sr_item_qty,cr_item_qty] + BroadcastHashJoin [item_id,item_id] + HashAggregate [i_item_id,sum] [sum(sr_return_quantity),item_id,sr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,sr_return_quantity] [sum,sum] + Project [sr_return_quantity,i_item_id] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_return_quantity,sr_returned_date_sk,i_item_id] + BroadcastHashJoin [sr_item_sk,i_item_sk] + Filter [sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [i_item_sk,i_item_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date] + BroadcastHashJoin [d_week_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_week_seq] + Filter [d_date] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (11) + HashAggregate [i_item_id,sum] [sum(cr_return_quantity),item_id,cr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (10) + HashAggregate [i_item_id,cr_return_quantity] [sum,sum] + Project [cr_return_quantity,i_item_id] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cr_return_quantity,cr_returned_date_sk,i_item_id] + BroadcastHashJoin [cr_item_sk,i_item_sk] + Filter [cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,sum] [sum(wr_return_quantity),item_id,wr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #9 + WholeStageCodegen (16) + HashAggregate [i_item_id,wr_return_quantity] [sum,sum] + Project [wr_return_quantity,i_item_id] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_return_quantity,wr_returned_date_sk,i_item_id] + BroadcastHashJoin [wr_item_sk,i_item_sk] + Filter [wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..2a4ba79bf8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_iceberg_compat/explain.txt @@ -0,0 +1,357 @@ +== Physical Plan == +TakeOrderedAndProject (61) ++- * Project (60) + +- * BroadcastHashJoin Inner BuildRight (59) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * HashAggregate (30) + : : +- Exchange (29) + : : +- * HashAggregate (28) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_returns (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.item (4) + : : +- BroadcastExchange (25) + : : +- * Project (24) + : : +- * BroadcastHashJoin LeftSemi BuildRight (23) + : : :- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin LeftSemi BuildRight (20) + : : :- * ColumnarToRow (14) + : : : +- Scan parquet spark_catalog.default.date_dim (13) + : : +- BroadcastExchange (19) + : : +- * Project (18) + : : +- * Filter (17) + : : +- * ColumnarToRow (16) + : : +- Scan parquet spark_catalog.default.date_dim (15) + : +- BroadcastExchange (43) + : +- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Filter (33) + : : : +- * ColumnarToRow (32) + : : : +- Scan parquet spark_catalog.default.catalog_returns (31) + : : +- ReusedExchange (34) + : +- ReusedExchange (37) + +- BroadcastExchange (58) + +- * HashAggregate (57) + +- Exchange (56) + +- * HashAggregate (55) + +- * Project (54) + +- * BroadcastHashJoin Inner BuildRight (53) + :- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Filter (48) + : : +- * ColumnarToRow (47) + : : +- Scan parquet spark_catalog.default.web_returns (46) + : +- ReusedExchange (49) + +- ReusedExchange (52) + + +(1) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#3)] +PushedFilters: [IsNotNull(sr_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] + +(3) Filter [codegen id : 5] +Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3] +Condition : isnotnull(sr_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_item_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(7) BroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [3]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5] +Input [5]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, i_item_sk#4, i_item_id#5] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_date#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#6, d_date#7] + +(12) Filter [codegen id : 4] +Input [2]: [d_date_sk#6, d_date#7] +Condition : isnotnull(d_date_sk#6) + +(13) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [d_date#8, d_week_seq#9] + +(15) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date#10, d_week_seq#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#10, d_week_seq#11] + +(17) Filter [codegen id : 2] +Input [2]: [d_date#10, d_week_seq#11] +Condition : cast(d_date#10 as string) IN (2000-06-30,2000-09-27,2000-11-17) + +(18) Project [codegen id : 2] +Output [1]: [d_week_seq#11] +Input [2]: [d_date#10, d_week_seq#11] + +(19) BroadcastExchange +Input [1]: [d_week_seq#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_week_seq#9] +Right keys [1]: [d_week_seq#11] +Join type: LeftSemi +Join condition: None + +(21) Project [codegen id : 3] +Output [1]: [d_date#8] +Input [2]: [d_date#8, d_week_seq#9] + +(22) BroadcastExchange +Input [1]: [d_date#8] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [d_date#7] +Right keys [1]: [d_date#8] +Join type: LeftSemi +Join condition: None + +(24) Project [codegen id : 4] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_date#7] + +(25) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#3] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 5] +Output [2]: [sr_return_quantity#2, i_item_id#5] +Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#5, d_date_sk#6] + +(28) HashAggregate [codegen id : 5] +Input [2]: [sr_return_quantity#2, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(sr_return_quantity#2)] +Aggregate Attributes [1]: [sum#12] +Results [2]: [i_item_id#5, sum#13] + +(29) Exchange +Input [2]: [i_item_id#5, sum#13] +Arguments: hashpartitioning(i_item_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(30) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#5, sum#13] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(sr_return_quantity#2)] +Aggregate Attributes [1]: [sum(sr_return_quantity#2)#14] +Results [2]: [i_item_id#5 AS item_id#15, sum(sr_return_quantity#2)#14 AS sr_item_qty#16] + +(31) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#17, cr_return_quantity#18, cr_returned_date_sk#19] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#19)] +PushedFilters: [IsNotNull(cr_item_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 10] +Input [3]: [cr_item_sk#17, cr_return_quantity#18, cr_returned_date_sk#19] + +(33) Filter [codegen id : 10] +Input [3]: [cr_item_sk#17, cr_return_quantity#18, cr_returned_date_sk#19] +Condition : isnotnull(cr_item_sk#17) + +(34) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#20, i_item_id#21] + +(35) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_item_sk#17] +Right keys [1]: [i_item_sk#20] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 10] +Output [3]: [cr_return_quantity#18, cr_returned_date_sk#19, i_item_id#21] +Input [5]: [cr_item_sk#17, cr_return_quantity#18, cr_returned_date_sk#19, i_item_sk#20, i_item_id#21] + +(37) ReusedExchange [Reuses operator id: 25] +Output [1]: [d_date_sk#22] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_returned_date_sk#19] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [2]: [cr_return_quantity#18, i_item_id#21] +Input [4]: [cr_return_quantity#18, cr_returned_date_sk#19, i_item_id#21, d_date_sk#22] + +(40) HashAggregate [codegen id : 10] +Input [2]: [cr_return_quantity#18, i_item_id#21] +Keys [1]: [i_item_id#21] +Functions [1]: [partial_sum(cr_return_quantity#18)] +Aggregate Attributes [1]: [sum#23] +Results [2]: [i_item_id#21, sum#24] + +(41) Exchange +Input [2]: [i_item_id#21, sum#24] +Arguments: hashpartitioning(i_item_id#21, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(42) HashAggregate [codegen id : 11] +Input [2]: [i_item_id#21, sum#24] +Keys [1]: [i_item_id#21] +Functions [1]: [sum(cr_return_quantity#18)] +Aggregate Attributes [1]: [sum(cr_return_quantity#18)#25] +Results [2]: [i_item_id#21 AS item_id#26, sum(cr_return_quantity#18)#25 AS cr_item_qty#27] + +(43) BroadcastExchange +Input [2]: [item_id#26, cr_item_qty#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#15] +Right keys [1]: [item_id#26] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 18] +Output [3]: [item_id#15, sr_item_qty#16, cr_item_qty#27] +Input [4]: [item_id#15, sr_item_qty#16, item_id#26, cr_item_qty#27] + +(46) Scan parquet spark_catalog.default.web_returns +Output [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#30)] +PushedFilters: [IsNotNull(wr_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 16] +Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] + +(48) Filter [codegen id : 16] +Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Condition : isnotnull(wr_item_sk#28) + +(49) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#31, i_item_id#32] + +(50) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_item_sk#28] +Right keys [1]: [i_item_sk#31] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 16] +Output [3]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32] +Input [5]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30, i_item_sk#31, i_item_id#32] + +(52) ReusedExchange [Reuses operator id: 25] +Output [1]: [d_date_sk#33] + +(53) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_returned_date_sk#30] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(54) Project [codegen id : 16] +Output [2]: [wr_return_quantity#29, i_item_id#32] +Input [4]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32, d_date_sk#33] + +(55) HashAggregate [codegen id : 16] +Input [2]: [wr_return_quantity#29, i_item_id#32] +Keys [1]: [i_item_id#32] +Functions [1]: [partial_sum(wr_return_quantity#29)] +Aggregate Attributes [1]: [sum#34] +Results [2]: [i_item_id#32, sum#35] + +(56) Exchange +Input [2]: [i_item_id#32, sum#35] +Arguments: hashpartitioning(i_item_id#32, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(57) HashAggregate [codegen id : 17] +Input [2]: [i_item_id#32, sum#35] +Keys [1]: [i_item_id#32] +Functions [1]: [sum(wr_return_quantity#29)] +Aggregate Attributes [1]: [sum(wr_return_quantity#29)#36] +Results [2]: [i_item_id#32 AS item_id#37, sum(wr_return_quantity#29)#36 AS wr_item_qty#38] + +(58) BroadcastExchange +Input [2]: [item_id#37, wr_item_qty#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(59) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#15] +Right keys [1]: [item_id#37] +Join type: Inner +Join condition: None + +(60) Project [codegen id : 18] +Output [8]: [item_id#15, sr_item_qty#16, (((cast(sr_item_qty#16 as double) / cast(((sr_item_qty#16 + cr_item_qty#27) + wr_item_qty#38) as double)) / 3.0) * 100.0) AS sr_dev#39, cr_item_qty#27, (((cast(cr_item_qty#27 as double) / cast(((sr_item_qty#16 + cr_item_qty#27) + wr_item_qty#38) as double)) / 3.0) * 100.0) AS cr_dev#40, wr_item_qty#38, (((cast(wr_item_qty#38 as double) / cast(((sr_item_qty#16 + cr_item_qty#27) + wr_item_qty#38) as double)) / 3.0) * 100.0) AS wr_dev#41, (cast(((sr_item_qty#16 + cr_item_qty#27) + wr_item_qty#38) as decimal(20,0)) / 3.0) AS average#42] +Input [5]: [item_id#15, sr_item_qty#16, cr_item_qty#27, item_id#37, wr_item_qty#38] + +(61) TakeOrderedAndProject +Input [8]: [item_id#15, sr_item_qty#16, sr_dev#39, cr_item_qty#27, cr_dev#40, wr_item_qty#38, wr_dev#41, average#42] +Arguments: 100, [item_id#15 ASC NULLS FIRST, sr_item_qty#16 ASC NULLS FIRST], [item_id#15, sr_item_qty#16, sr_dev#39, cr_item_qty#27, cr_dev#40, wr_item_qty#38, wr_dev#41, average#42] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..b186ed0d51 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.native_iceberg_compat/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] + WholeStageCodegen (18) + Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] + BroadcastHashJoin [item_id,item_id] + Project [item_id,sr_item_qty,cr_item_qty] + BroadcastHashJoin [item_id,item_id] + HashAggregate [i_item_id,sum] [sum(sr_return_quantity),item_id,sr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,sr_return_quantity] [sum,sum] + Project [sr_return_quantity,i_item_id] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_return_quantity,sr_returned_date_sk,i_item_id] + BroadcastHashJoin [sr_item_sk,i_item_sk] + Filter [sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [i_item_sk,i_item_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date] + BroadcastHashJoin [d_week_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_week_seq] + Filter [d_date] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (11) + HashAggregate [i_item_id,sum] [sum(cr_return_quantity),item_id,cr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen (10) + HashAggregate [i_item_id,cr_return_quantity] [sum,sum] + Project [cr_return_quantity,i_item_id] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cr_return_quantity,cr_returned_date_sk,i_item_id] + BroadcastHashJoin [cr_item_sk,i_item_sk] + Filter [cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,sum] [sum(wr_return_quantity),item_id,wr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #9 + WholeStageCodegen (16) + HashAggregate [i_item_id,wr_return_quantity] [sum,sum] + Project [wr_return_quantity,i_item_id] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_return_quantity,wr_returned_date_sk,i_item_id] + BroadcastHashJoin [wr_item_sk,i_item_sk] + Filter [wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_datafusion/explain.txt new file mode 100644 index 0000000000..3ffa871d0b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_datafusion/explain.txt @@ -0,0 +1,210 @@ +== Physical Plan == +TakeOrderedAndProject (37) ++- * Project (36) + +- * BroadcastHashJoin Inner BuildLeft (35) + :- BroadcastExchange (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.customer_address (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.customer_demographics (11) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet spark_catalog.default.household_demographics (17) + : +- BroadcastExchange (27) + : +- * Project (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.income_band (23) + +- * Project (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet spark_catalog.default.store_returns (31) + + +(1) Scan parquet spark_catalog.default.customer +Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] + +(3) Filter [codegen id : 5] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) + +(4) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_city#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_city), EqualTo(ca_city,Edgewood), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [ca_address_sk#7, ca_city#8] + +(6) Filter [codegen id : 1] +Input [2]: [ca_address_sk#7, ca_city#8] +Condition : ((isnotnull(ca_city#8) AND (ca_city#8 = Edgewood)) AND isnotnull(ca_address_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [ca_address_sk#7] +Input [2]: [ca_address_sk#7, ca_city#8] + +(8) BroadcastExchange +Input [1]: [ca_address_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#4] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] +Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] + +(11) Scan parquet spark_catalog.default.customer_demographics +Output [1]: [cd_demo_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [cd_demo_sk#9] + +(13) Filter [codegen id : 2] +Input [1]: [cd_demo_sk#9] +Condition : isnotnull(cd_demo_sk#9) + +(14) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(17) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] + +(19) Filter [codegen id : 3] +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Condition : (isnotnull(hd_demo_sk#10) AND isnotnull(hd_income_band_sk#11)) + +(20) BroadcastExchange +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#10] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11] +Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_demo_sk#10, hd_income_band_sk#11] + +(23) Scan parquet spark_catalog.default.income_band +Output [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] + +(25) Filter [codegen id : 4] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Condition : ((((isnotnull(ib_lower_bound#13) AND isnotnull(ib_upper_bound#14)) AND (ib_lower_bound#13 >= 38128)) AND (ib_upper_bound#14 <= 88128)) AND isnotnull(ib_income_band_sk#12)) + +(26) Project [codegen id : 4] +Output [1]: [ib_income_band_sk#12] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] + +(27) BroadcastExchange +Input [1]: [ib_income_band_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [hd_income_band_sk#11] +Right keys [1]: [ib_income_band_sk#12] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11, ib_income_band_sk#12] + +(30) BroadcastExchange +Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [plan_id=5] + +(31) Scan parquet spark_catalog.default.store_returns +Output [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_cdemo_sk)] +ReadSchema: struct + +(32) ColumnarToRow +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] + +(33) Filter +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Condition : isnotnull(sr_cdemo_sk#15) + +(34) Project +Output [1]: [sr_cdemo_sk#15] +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cd_demo_sk#9] +Right keys [1]: [sr_cdemo_sk#15] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [c_customer_id#1 AS customer_id#17, concat(c_last_name#6, , , c_first_name#5) AS customername#18, c_customer_id#1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, sr_cdemo_sk#15] + +(37) TakeOrderedAndProject +Input [3]: [customer_id#17, customername#18, c_customer_id#1] +Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#17, customername#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c42e500847 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_datafusion/simplified.txt @@ -0,0 +1,54 @@ +TakeOrderedAndProject [c_customer_id,customer_id,customername] + WholeStageCodegen (6) + Project [c_customer_id,c_last_name,c_first_name] + BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (5) + Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_customer_id,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_first_name,c_last_name] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [ca_address_sk] + Filter [ca_city,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [ib_income_band_sk] + Filter [ib_lower_bound,ib_upper_bound,ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + Project [sr_cdemo_sk] + Filter [sr_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_cdemo_sk,sr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..3ffa871d0b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_iceberg_compat/explain.txt @@ -0,0 +1,210 @@ +== Physical Plan == +TakeOrderedAndProject (37) ++- * Project (36) + +- * BroadcastHashJoin Inner BuildLeft (35) + :- BroadcastExchange (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.customer_address (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.customer_demographics (11) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet spark_catalog.default.household_demographics (17) + : +- BroadcastExchange (27) + : +- * Project (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.income_band (23) + +- * Project (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet spark_catalog.default.store_returns (31) + + +(1) Scan parquet spark_catalog.default.customer +Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] + +(3) Filter [codegen id : 5] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) + +(4) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#7, ca_city#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_city), EqualTo(ca_city,Edgewood), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [ca_address_sk#7, ca_city#8] + +(6) Filter [codegen id : 1] +Input [2]: [ca_address_sk#7, ca_city#8] +Condition : ((isnotnull(ca_city#8) AND (ca_city#8 = Edgewood)) AND isnotnull(ca_address_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [ca_address_sk#7] +Input [2]: [ca_address_sk#7, ca_city#8] + +(8) BroadcastExchange +Input [1]: [ca_address_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#4] +Right keys [1]: [ca_address_sk#7] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] +Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] + +(11) Scan parquet spark_catalog.default.customer_demographics +Output [1]: [cd_demo_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [cd_demo_sk#9] + +(13) Filter [codegen id : 2] +Input [1]: [cd_demo_sk#9] +Condition : isnotnull(cd_demo_sk#9) + +(14) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] + +(17) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] + +(19) Filter [codegen id : 3] +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Condition : (isnotnull(hd_demo_sk#10) AND isnotnull(hd_income_band_sk#11)) + +(20) BroadcastExchange +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#10] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11] +Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_demo_sk#10, hd_income_band_sk#11] + +(23) Scan parquet spark_catalog.default.income_band +Output [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] + +(25) Filter [codegen id : 4] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Condition : ((((isnotnull(ib_lower_bound#13) AND isnotnull(ib_upper_bound#14)) AND (ib_lower_bound#13 >= 38128)) AND (ib_upper_bound#14 <= 88128)) AND isnotnull(ib_income_band_sk#12)) + +(26) Project [codegen id : 4] +Output [1]: [ib_income_band_sk#12] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] + +(27) BroadcastExchange +Input [1]: [ib_income_band_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [hd_income_band_sk#11] +Right keys [1]: [ib_income_band_sk#12] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11, ib_income_band_sk#12] + +(30) BroadcastExchange +Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [plan_id=5] + +(31) Scan parquet spark_catalog.default.store_returns +Output [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_cdemo_sk)] +ReadSchema: struct + +(32) ColumnarToRow +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] + +(33) Filter +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Condition : isnotnull(sr_cdemo_sk#15) + +(34) Project +Output [1]: [sr_cdemo_sk#15] +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cd_demo_sk#9] +Right keys [1]: [sr_cdemo_sk#15] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [c_customer_id#1 AS customer_id#17, concat(c_last_name#6, , , c_first_name#5) AS customername#18, c_customer_id#1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, sr_cdemo_sk#15] + +(37) TakeOrderedAndProject +Input [3]: [customer_id#17, customername#18, c_customer_id#1] +Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#17, customername#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c42e500847 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.native_iceberg_compat/simplified.txt @@ -0,0 +1,54 @@ +TakeOrderedAndProject [c_customer_id,customer_id,customername] + WholeStageCodegen (6) + Project [c_customer_id,c_last_name,c_first_name] + BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen (5) + Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [c_customer_id,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_first_name,c_last_name] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [ca_address_sk] + Filter [ca_city,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [ib_income_band_sk] + Filter [ib_lower_bound,ib_upper_bound,ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + Project [sr_cdemo_sk] + Filter [sr_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_cdemo_sk,sr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_datafusion/explain.txt new file mode 100644 index 0000000000..b534d36146 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_datafusion/explain.txt @@ -0,0 +1,300 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin Inner BuildRight (47) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (35) + : : +- * BroadcastHashJoin Inner BuildRight (34) + : : :- * Project (28) + : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (16) + : : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : : : :- BroadcastExchange (4) + : : : : : : : +- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : : : : +- * Project (8) + : : : : : : +- * Filter (7) + : : : : : : +- * ColumnarToRow (6) + : : : : : : +- Scan parquet spark_catalog.default.web_returns (5) + : : : : : +- BroadcastExchange (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet spark_catalog.default.web_page (11) + : : : : +- BroadcastExchange (20) + : : : : +- * Filter (19) + : : : : +- * ColumnarToRow (18) + : : : : +- Scan parquet spark_catalog.default.customer_demographics (17) + : : : +- BroadcastExchange (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet spark_catalog.default.customer_demographics (23) + : : +- BroadcastExchange (33) + : : +- * Project (32) + : : +- * Filter (31) + : : +- * ColumnarToRow (30) + : : +- Scan parquet spark_catalog.default.customer_address (29) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet spark_catalog.default.date_dim (36) + +- BroadcastExchange (46) + +- * Filter (45) + +- * ColumnarToRow (44) + +- Scan parquet spark_catalog.default.reason (43) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#7)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), Or(Or(And(GreaterThanOrEqual(ws_sales_price,100.00),LessThanOrEqual(ws_sales_price,150.00)),And(GreaterThanOrEqual(ws_sales_price,50.00),LessThanOrEqual(ws_sales_price,100.00))),And(GreaterThanOrEqual(ws_sales_price,150.00),LessThanOrEqual(ws_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ws_net_profit,100.00),LessThanOrEqual(ws_net_profit,200.00)),And(GreaterThanOrEqual(ws_net_profit,150.00),LessThanOrEqual(ws_net_profit,300.00))),And(GreaterThanOrEqual(ws_net_profit,50.00),LessThanOrEqual(ws_net_profit,250.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Condition : ((((isnotnull(ws_item_sk#1) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_web_page_sk#2)) AND ((((ws_sales_price#5 >= 100.00) AND (ws_sales_price#5 <= 150.00)) OR ((ws_sales_price#5 >= 50.00) AND (ws_sales_price#5 <= 100.00))) OR ((ws_sales_price#5 >= 150.00) AND (ws_sales_price#5 <= 200.00)))) AND ((((ws_net_profit#6 >= 100.00) AND (ws_net_profit#6 <= 200.00)) OR ((ws_net_profit#6 >= 150.00) AND (ws_net_profit#6 <= 300.00))) OR ((ws_net_profit#6 >= 50.00) AND (ws_net_profit#6 <= 250.00)))) + +(4) BroadcastExchange +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[2, int, false] as bigint) & 4294967295))),false), [plan_id=1] + +(5) Scan parquet spark_catalog.default.web_returns +Output [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)] +ReadSchema: struct + +(6) ColumnarToRow +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] + +(7) Filter +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Condition : (((((isnotnull(wr_item_sk#8) AND isnotnull(wr_order_number#13)) AND isnotnull(wr_refunded_cdemo_sk#9)) AND isnotnull(wr_returning_cdemo_sk#11)) AND isnotnull(wr_refunded_addr_sk#10)) AND isnotnull(wr_reason_sk#12)) + +(8) Project +Output [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] + +(9) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [ws_item_sk#1, ws_order_number#3] +Right keys [2]: [wr_item_sk#8, wr_order_number#13] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 8] +Output [11]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [15]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] + +(11) Scan parquet spark_catalog.default.web_page +Output [1]: [wp_web_page_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [wp_web_page_sk#17] + +(13) Filter [codegen id : 2] +Input [1]: [wp_web_page_sk#17] +Condition : isnotnull(wp_web_page_sk#17) + +(14) BroadcastExchange +Input [1]: [wp_web_page_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_web_page_sk#2] +Right keys [1]: [wp_web_page_sk#17] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 8] +Output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [12]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, wp_web_page_sk#17] + +(17) Scan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree )))] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(19) Filter [codegen id : 3] +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Condition : (((isnotnull(cd_demo_sk#18) AND isnotnull(cd_marital_status#19)) AND isnotnull(cd_education_status#20)) AND ((((cd_marital_status#19 = M) AND (cd_education_status#20 = Advanced Degree )) OR ((cd_marital_status#19 = S) AND (cd_education_status#20 = College ))) OR ((cd_marital_status#19 = W) AND (cd_education_status#20 = 2 yr Degree )))) + +(20) BroadcastExchange +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_refunded_cdemo_sk#9] +Right keys [1]: [cd_demo_sk#18] +Join type: Inner +Join condition: ((((((cd_marital_status#19 = M) AND (cd_education_status#20 = Advanced Degree )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#19 = S) AND (cd_education_status#20 = College )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#19 = W) AND (cd_education_status#20 = 2 yr Degree )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00))) + +(22) Project [codegen id : 8] +Output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20] +Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(23) Scan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + +(25) Filter [codegen id : 4] +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Condition : ((isnotnull(cd_demo_sk#21) AND isnotnull(cd_marital_status#22)) AND isnotnull(cd_education_status#23)) + +(26) BroadcastExchange +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: HashedRelationBroadcastMode(List(input[0, int, false], input[1, string, false], input[2, string, false]),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [wr_returning_cdemo_sk#11, cd_marital_status#19, cd_education_status#20] +Right keys [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 8] +Output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20, cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + +(29) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,NJ,OH]),In(ca_state, [CT,KY,WI])),In(ca_state, [AR,IA,LA]))] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] + +(31) Filter [codegen id : 5] +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Condition : (((isnotnull(ca_country#26) AND (ca_country#26 = United States)) AND isnotnull(ca_address_sk#24)) AND ((ca_state#25 IN (IN,OH,NJ) OR ca_state#25 IN (WI,CT,KY)) OR ca_state#25 IN (LA,IA,AR))) + +(32) Project [codegen id : 5] +Output [2]: [ca_address_sk#24, ca_state#25] +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] + +(33) BroadcastExchange +Input [2]: [ca_address_sk#24, ca_state#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_refunded_addr_sk#10] +Right keys [1]: [ca_address_sk#24] +Join type: Inner +Join condition: ((((ca_state#25 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#25 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#25 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00))) + +(35) Project [codegen id : 8] +Output [5]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, ca_address_sk#24, ca_state#25] + +(36) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#27, d_year#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#27, d_year#28] + +(38) Filter [codegen id : 6] +Input [2]: [d_date_sk#27, d_year#28] +Condition : ((isnotnull(d_year#28) AND (d_year#28 = 2000)) AND isnotnull(d_date_sk#27)) + +(39) Project [codegen id : 6] +Output [1]: [d_date_sk#27] +Input [2]: [d_date_sk#27, d_year#28] + +(40) BroadcastExchange +Input [1]: [d_date_sk#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_date_sk#7] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 8] +Output [4]: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, d_date_sk#27] + +(43) Scan parquet spark_catalog.default.reason +Output [2]: [r_reason_sk#29, r_reason_desc#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [2]: [r_reason_sk#29, r_reason_desc#30] + +(45) Filter [codegen id : 7] +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Condition : isnotnull(r_reason_sk#29) + +(46) BroadcastExchange +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_reason_sk#12] +Right keys [1]: [r_reason_sk#29] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 8] +Output [4]: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] +Input [6]: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, r_reason_sk#29, r_reason_desc#30] + +(49) HashAggregate [codegen id : 8] +Input [4]: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] +Keys [1]: [r_reason_desc#30] +Functions [3]: [partial_avg(ws_quantity#4), partial_avg(UnscaledValue(wr_refunded_cash#15)), partial_avg(UnscaledValue(wr_fee#14))] +Aggregate Attributes [6]: [sum#31, count#32, sum#33, count#34, sum#35, count#36] +Results [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] + +(50) Exchange +Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Arguments: hashpartitioning(r_reason_desc#30, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 9] +Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Keys [1]: [r_reason_desc#30] +Functions [3]: [avg(ws_quantity#4), avg(UnscaledValue(wr_refunded_cash#15)), avg(UnscaledValue(wr_fee#14))] +Aggregate Attributes [3]: [avg(ws_quantity#4)#43, avg(UnscaledValue(wr_refunded_cash#15))#44, avg(UnscaledValue(wr_fee#14))#45] +Results [4]: [substr(r_reason_desc#30, 1, 20) AS substr(r_reason_desc, 1, 20)#46, avg(ws_quantity#4)#43 AS avg(ws_quantity)#47, cast((avg(UnscaledValue(wr_refunded_cash#15))#44 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#48, cast((avg(UnscaledValue(wr_fee#14))#45 / 100.0) as decimal(11,6)) AS avg(wr_fee)#49] + +(52) TakeOrderedAndProject +Input [4]: [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49] +Arguments: 100, [substr(r_reason_desc, 1, 20)#46 ASC NULLS FIRST, avg(ws_quantity)#47 ASC NULLS FIRST, avg(wr_refunded_cash)#48 ASC NULLS FIRST, avg(wr_fee)#49 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c5a45b42cb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_datafusion/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee)] + WholeStageCodegen (9) + HashAggregate [r_reason_desc,sum,count,sum,count,sum,count] [avg(ws_quantity),avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee)),substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee),sum,count,sum,count,sum,count] + InputAdapter + Exchange [r_reason_desc] #1 + WholeStageCodegen (8) + HashAggregate [r_reason_desc,ws_quantity,wr_refunded_cash,wr_fee] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] + BroadcastHashJoin [wr_reason_sk,r_reason_sk] + Project [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_sold_date_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [wr_refunded_addr_sk,ca_address_sk,ca_state,ws_net_profit] + Project [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [wr_returning_cdemo_sk,cd_marital_status,cd_education_status,cd_demo_sk,cd_marital_status,cd_education_status] + Project [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_marital_status,cd_education_status] + BroadcastHashJoin [wr_refunded_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ws_sales_price] + Project [ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sales_price,ws_net_profit] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] + Project [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] + Filter [wr_item_sk,wr_order_number,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.reason [r_reason_sk,r_reason_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..b534d36146 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_iceberg_compat/explain.txt @@ -0,0 +1,300 @@ +== Physical Plan == +TakeOrderedAndProject (52) ++- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin Inner BuildRight (47) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (35) + : : +- * BroadcastHashJoin Inner BuildRight (34) + : : :- * Project (28) + : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (16) + : : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : : : :- BroadcastExchange (4) + : : : : : : : +- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : : : : +- * Project (8) + : : : : : : +- * Filter (7) + : : : : : : +- * ColumnarToRow (6) + : : : : : : +- Scan parquet spark_catalog.default.web_returns (5) + : : : : : +- BroadcastExchange (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet spark_catalog.default.web_page (11) + : : : : +- BroadcastExchange (20) + : : : : +- * Filter (19) + : : : : +- * ColumnarToRow (18) + : : : : +- Scan parquet spark_catalog.default.customer_demographics (17) + : : : +- BroadcastExchange (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet spark_catalog.default.customer_demographics (23) + : : +- BroadcastExchange (33) + : : +- * Project (32) + : : +- * Filter (31) + : : +- * ColumnarToRow (30) + : : +- Scan parquet spark_catalog.default.customer_address (29) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet spark_catalog.default.date_dim (36) + +- BroadcastExchange (46) + +- * Filter (45) + +- * ColumnarToRow (44) + +- Scan parquet spark_catalog.default.reason (43) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#7)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), Or(Or(And(GreaterThanOrEqual(ws_sales_price,100.00),LessThanOrEqual(ws_sales_price,150.00)),And(GreaterThanOrEqual(ws_sales_price,50.00),LessThanOrEqual(ws_sales_price,100.00))),And(GreaterThanOrEqual(ws_sales_price,150.00),LessThanOrEqual(ws_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ws_net_profit,100.00),LessThanOrEqual(ws_net_profit,200.00)),And(GreaterThanOrEqual(ws_net_profit,150.00),LessThanOrEqual(ws_net_profit,300.00))),And(GreaterThanOrEqual(ws_net_profit,50.00),LessThanOrEqual(ws_net_profit,250.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Condition : ((((isnotnull(ws_item_sk#1) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_web_page_sk#2)) AND ((((ws_sales_price#5 >= 100.00) AND (ws_sales_price#5 <= 150.00)) OR ((ws_sales_price#5 >= 50.00) AND (ws_sales_price#5 <= 100.00))) OR ((ws_sales_price#5 >= 150.00) AND (ws_sales_price#5 <= 200.00)))) AND ((((ws_net_profit#6 >= 100.00) AND (ws_net_profit#6 <= 200.00)) OR ((ws_net_profit#6 >= 150.00) AND (ws_net_profit#6 <= 300.00))) OR ((ws_net_profit#6 >= 50.00) AND (ws_net_profit#6 <= 250.00)))) + +(4) BroadcastExchange +Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[2, int, false] as bigint) & 4294967295))),false), [plan_id=1] + +(5) Scan parquet spark_catalog.default.web_returns +Output [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)] +ReadSchema: struct + +(6) ColumnarToRow +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] + +(7) Filter +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] +Condition : (((((isnotnull(wr_item_sk#8) AND isnotnull(wr_order_number#13)) AND isnotnull(wr_refunded_cdemo_sk#9)) AND isnotnull(wr_returning_cdemo_sk#11)) AND isnotnull(wr_refunded_addr_sk#10)) AND isnotnull(wr_reason_sk#12)) + +(8) Project +Output [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Input [9]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15, wr_returned_date_sk#16] + +(9) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [ws_item_sk#1, ws_order_number#3] +Right keys [2]: [wr_item_sk#8, wr_order_number#13] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 8] +Output [11]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [15]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] + +(11) Scan parquet spark_catalog.default.web_page +Output [1]: [wp_web_page_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [wp_web_page_sk#17] + +(13) Filter [codegen id : 2] +Input [1]: [wp_web_page_sk#17] +Condition : isnotnull(wp_web_page_sk#17) + +(14) BroadcastExchange +Input [1]: [wp_web_page_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_web_page_sk#2] +Right keys [1]: [wp_web_page_sk#17] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 8] +Output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [12]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, wp_web_page_sk#17] + +(17) Scan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree )))] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(19) Filter [codegen id : 3] +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Condition : (((isnotnull(cd_demo_sk#18) AND isnotnull(cd_marital_status#19)) AND isnotnull(cd_education_status#20)) AND ((((cd_marital_status#19 = M) AND (cd_education_status#20 = Advanced Degree )) OR ((cd_marital_status#19 = S) AND (cd_education_status#20 = College ))) OR ((cd_marital_status#19 = W) AND (cd_education_status#20 = 2 yr Degree )))) + +(20) BroadcastExchange +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_refunded_cdemo_sk#9] +Right keys [1]: [cd_demo_sk#18] +Join type: Inner +Join condition: ((((((cd_marital_status#19 = M) AND (cd_education_status#20 = Advanced Degree )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#19 = S) AND (cd_education_status#20 = College )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#19 = W) AND (cd_education_status#20 = 2 yr Degree )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00))) + +(22) Project [codegen id : 8] +Output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20] +Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(23) Scan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + +(25) Filter [codegen id : 4] +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Condition : ((isnotnull(cd_demo_sk#21) AND isnotnull(cd_marital_status#22)) AND isnotnull(cd_education_status#23)) + +(26) BroadcastExchange +Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Arguments: HashedRelationBroadcastMode(List(input[0, int, false], input[1, string, false], input[2, string, false]),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [wr_returning_cdemo_sk#11, cd_marital_status#19, cd_education_status#20] +Right keys [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 8] +Output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#19, cd_education_status#20, cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + +(29) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,NJ,OH]),In(ca_state, [CT,KY,WI])),In(ca_state, [AR,IA,LA]))] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] + +(31) Filter [codegen id : 5] +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] +Condition : (((isnotnull(ca_country#26) AND (ca_country#26 = United States)) AND isnotnull(ca_address_sk#24)) AND ((ca_state#25 IN (IN,OH,NJ) OR ca_state#25 IN (WI,CT,KY)) OR ca_state#25 IN (LA,IA,AR))) + +(32) Project [codegen id : 5] +Output [2]: [ca_address_sk#24, ca_state#25] +Input [3]: [ca_address_sk#24, ca_state#25, ca_country#26] + +(33) BroadcastExchange +Input [2]: [ca_address_sk#24, ca_state#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_refunded_addr_sk#10] +Right keys [1]: [ca_address_sk#24] +Join type: Inner +Join condition: ((((ca_state#25 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#25 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#25 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00))) + +(35) Project [codegen id : 8] +Output [5]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, ca_address_sk#24, ca_state#25] + +(36) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#27, d_year#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#27, d_year#28] + +(38) Filter [codegen id : 6] +Input [2]: [d_date_sk#27, d_year#28] +Condition : ((isnotnull(d_year#28) AND (d_year#28 = 2000)) AND isnotnull(d_date_sk#27)) + +(39) Project [codegen id : 6] +Output [1]: [d_date_sk#27] +Input [2]: [d_date_sk#27, d_year#28] + +(40) BroadcastExchange +Input [1]: [d_date_sk#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_date_sk#7] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 8] +Output [4]: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, d_date_sk#27] + +(43) Scan parquet spark_catalog.default.reason +Output [2]: [r_reason_sk#29, r_reason_desc#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [2]: [r_reason_sk#29, r_reason_desc#30] + +(45) Filter [codegen id : 7] +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Condition : isnotnull(r_reason_sk#29) + +(46) BroadcastExchange +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_reason_sk#12] +Right keys [1]: [r_reason_sk#29] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 8] +Output [4]: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] +Input [6]: [ws_quantity#4, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, r_reason_sk#29, r_reason_desc#30] + +(49) HashAggregate [codegen id : 8] +Input [4]: [ws_quantity#4, wr_fee#14, wr_refunded_cash#15, r_reason_desc#30] +Keys [1]: [r_reason_desc#30] +Functions [3]: [partial_avg(ws_quantity#4), partial_avg(UnscaledValue(wr_refunded_cash#15)), partial_avg(UnscaledValue(wr_fee#14))] +Aggregate Attributes [6]: [sum#31, count#32, sum#33, count#34, sum#35, count#36] +Results [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] + +(50) Exchange +Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Arguments: hashpartitioning(r_reason_desc#30, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 9] +Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Keys [1]: [r_reason_desc#30] +Functions [3]: [avg(ws_quantity#4), avg(UnscaledValue(wr_refunded_cash#15)), avg(UnscaledValue(wr_fee#14))] +Aggregate Attributes [3]: [avg(ws_quantity#4)#43, avg(UnscaledValue(wr_refunded_cash#15))#44, avg(UnscaledValue(wr_fee#14))#45] +Results [4]: [substr(r_reason_desc#30, 1, 20) AS substr(r_reason_desc, 1, 20)#46, avg(ws_quantity#4)#43 AS avg(ws_quantity)#47, cast((avg(UnscaledValue(wr_refunded_cash#15))#44 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#48, cast((avg(UnscaledValue(wr_fee#14))#45 / 100.0) as decimal(11,6)) AS avg(wr_fee)#49] + +(52) TakeOrderedAndProject +Input [4]: [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49] +Arguments: 100, [substr(r_reason_desc, 1, 20)#46 ASC NULLS FIRST, avg(ws_quantity)#47 ASC NULLS FIRST, avg(wr_refunded_cash)#48 ASC NULLS FIRST, avg(wr_fee)#49 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c5a45b42cb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.native_iceberg_compat/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee)] + WholeStageCodegen (9) + HashAggregate [r_reason_desc,sum,count,sum,count,sum,count] [avg(ws_quantity),avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee)),substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee),sum,count,sum,count,sum,count] + InputAdapter + Exchange [r_reason_desc] #1 + WholeStageCodegen (8) + HashAggregate [r_reason_desc,ws_quantity,wr_refunded_cash,wr_fee] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] + BroadcastHashJoin [wr_reason_sk,r_reason_sk] + Project [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_sold_date_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [wr_refunded_addr_sk,ca_address_sk,ca_state,ws_net_profit] + Project [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [wr_returning_cdemo_sk,cd_marital_status,cd_education_status,cd_demo_sk,cd_marital_status,cd_education_status] + Project [ws_quantity,ws_net_profit,ws_sold_date_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_marital_status,cd_education_status] + BroadcastHashJoin [wr_refunded_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ws_sales_price] + Project [ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sales_price,ws_net_profit] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk] + Project [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] + Filter [wr_item_sk,wr_order_number,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.reason [r_reason_sk,r_reason_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_datafusion/explain.txt new file mode 100644 index 0000000000..caf90212ac --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_datafusion/explain.txt @@ -0,0 +1,145 @@ +== Physical Plan == +TakeOrderedAndProject (25) ++- * Project (24) + +- Window (23) + +- * Sort (22) + +- Exchange (21) + +- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Expand (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.date_dim (4) + +- BroadcastExchange (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.item (11) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ws_item_sk#1, ws_net_paid#2] +Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#6, i_class#7, i_category#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#6, i_class#7, i_category#8] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Condition : isnotnull(i_item_sk#6) + +(14) BroadcastExchange +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ws_net_paid#2, i_category#8, i_class#7] +Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] + +(17) Expand [codegen id : 3] +Input [3]: [ws_net_paid#2, i_category#8, i_class#7] +Arguments: [[ws_net_paid#2, i_category#8, i_class#7, 0], [ws_net_paid#2, i_category#8, null, 1], [ws_net_paid#2, null, null, 3]], [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] + +(18) HashAggregate [codegen id : 3] +Input [4]: [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum#12] +Results [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] + +(19) Exchange +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] +Arguments: hashpartitioning(i_category#9, i_class#10, spark_grouping_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#14] +Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS total_sum#15, i_category#9, i_class#10, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS lochierarchy#16, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS _w0#17, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS _w1#18, CASE WHEN (cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint) = 0) THEN i_category#9 END AS _w2#19] + +(21) Exchange +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19] +Arguments: hashpartitioning(_w1#18, _w2#19, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(22) Sort [codegen id : 5] +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19] +Arguments: [_w1#18 ASC NULLS FIRST, _w2#19 ASC NULLS FIRST, _w0#17 DESC NULLS LAST], false, 0 + +(23) Window +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19] +Arguments: [rank(_w0#17) windowspecdefinition(_w1#18, _w2#19, _w0#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#20], [_w1#18, _w2#19], [_w0#17 DESC NULLS LAST] + +(24) Project [codegen id : 6] +Output [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] +Input [8]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19, rank_within_parent#20] + +(25) TakeOrderedAndProject +Input [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] +Arguments: 100, [lochierarchy#16 DESC NULLS LAST, CASE WHEN (lochierarchy#16 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#20 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8ae1bf14f1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_datafusion/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] + WholeStageCodegen (6) + Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (5) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,spark_grouping_id,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,lochierarchy,_w0,_w1,_w2,sum] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,spark_grouping_id,ws_net_paid] [sum,sum] + Expand [ws_net_paid,i_category,i_class] + Project [ws_net_paid,i_category,i_class] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..caf90212ac --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_iceberg_compat/explain.txt @@ -0,0 +1,145 @@ +== Physical Plan == +TakeOrderedAndProject (25) ++- * Project (24) + +- Window (23) + +- * Sort (22) + +- Exchange (21) + +- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Expand (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.date_dim (4) + +- BroadcastExchange (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet spark_catalog.default.item (11) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ws_item_sk#1, ws_net_paid#2] +Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#6, i_class#7, i_category#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#6, i_class#7, i_category#8] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Condition : isnotnull(i_item_sk#6) + +(14) BroadcastExchange +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ws_net_paid#2, i_category#8, i_class#7] +Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] + +(17) Expand [codegen id : 3] +Input [3]: [ws_net_paid#2, i_category#8, i_class#7] +Arguments: [[ws_net_paid#2, i_category#8, i_class#7, 0], [ws_net_paid#2, i_category#8, null, 1], [ws_net_paid#2, null, null, 3]], [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] + +(18) HashAggregate [codegen id : 3] +Input [4]: [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum#12] +Results [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] + +(19) Exchange +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] +Arguments: hashpartitioning(i_category#9, i_class#10, spark_grouping_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(20) HashAggregate [codegen id : 4] +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#14] +Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS total_sum#15, i_category#9, i_class#10, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS lochierarchy#16, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS _w0#17, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS _w1#18, CASE WHEN (cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint) = 0) THEN i_category#9 END AS _w2#19] + +(21) Exchange +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19] +Arguments: hashpartitioning(_w1#18, _w2#19, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(22) Sort [codegen id : 5] +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19] +Arguments: [_w1#18 ASC NULLS FIRST, _w2#19 ASC NULLS FIRST, _w0#17 DESC NULLS LAST], false, 0 + +(23) Window +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19] +Arguments: [rank(_w0#17) windowspecdefinition(_w1#18, _w2#19, _w0#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#20], [_w1#18, _w2#19], [_w0#17 DESC NULLS LAST] + +(24) Project [codegen id : 6] +Output [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] +Input [8]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19, rank_within_parent#20] + +(25) TakeOrderedAndProject +Input [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] +Arguments: 100, [lochierarchy#16 DESC NULLS LAST, CASE WHEN (lochierarchy#16 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#20 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8ae1bf14f1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.native_iceberg_compat/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] + WholeStageCodegen (6) + Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [_w0,_w1,_w2] + WholeStageCodegen (5) + Sort [_w1,_w2,_w0] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,spark_grouping_id,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,lochierarchy,_w0,_w1,_w2,sum] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,spark_grouping_id,ws_net_paid] [sum,sum] + Expand [ws_net_paid,i_category,i_class] + Project [ws_net_paid,i_category,i_class] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_datafusion/explain.txt new file mode 100644 index 0000000000..3fb94a8661 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_datafusion/explain.txt @@ -0,0 +1,307 @@ +== Physical Plan == +* HashAggregate (51) ++- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin LeftAnti BuildRight (47) + :- * BroadcastHashJoin LeftAnti BuildRight (33) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.customer (11) + : +- BroadcastExchange (32) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (23) + : +- ReusedExchange (26) + +- BroadcastExchange (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet spark_catalog.default.web_sales (34) + : +- ReusedExchange (37) + +- ReusedExchange (40) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#2)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] + +(3) Filter [codegen id : 3] +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Condition : isnotnull(ss_customer_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#3, d_date#4] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(8) BroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#2] +Right keys [1]: [d_date_sk#3] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_customer_sk#1, d_date#4] +Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#3, d_date#4] + +(11) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(13) Filter [codegen id : 2] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) + +(14) BroadcastExchange +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#6] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [c_last_name#8, c_first_name#7, d_date#4] +Input [5]: [ss_customer_sk#1, d_date#4, c_customer_sk#6, c_first_name#7, c_last_name#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(18) Exchange +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#4, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(20) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#10)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] + +(22) Filter [codegen id : 6] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_bill_customer_sk#9) + +(23) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#11, d_date#12] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [2]: [cs_bill_customer_sk#9, d_date#12] +Input [4]: [cs_bill_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11, d_date#12] + +(26) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_bill_customer_sk#9] +Right keys [1]: [c_customer_sk#13] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [c_last_name#15, c_first_name#14, d_date#12] +Input [5]: [cs_bill_customer_sk#9, d_date#12, c_customer_sk#13, c_first_name#14, c_last_name#15] + +(29) HashAggregate [codegen id : 6] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] + +(30) Exchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, d_date#12, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 7] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] + +(32) BroadcastExchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 12] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)] +Join type: LeftAnti +Join condition: None + +(34) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#17)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 10] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] + +(36) Filter [codegen id : 10] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Condition : isnotnull(ws_bill_customer_sk#16) + +(37) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#18, d_date#19] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#17] +Right keys [1]: [d_date_sk#18] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [2]: [ws_bill_customer_sk#16, d_date#19] +Input [4]: [ws_bill_customer_sk#16, ws_sold_date_sk#17, d_date_sk#18, d_date#19] + +(40) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_bill_customer_sk#16] +Right keys [1]: [c_customer_sk#20] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [3]: [c_last_name#22, c_first_name#21, d_date#19] +Input [5]: [ws_bill_customer_sk#16, d_date#19, c_customer_sk#20, c_first_name#21, c_last_name#22] + +(43) HashAggregate [codegen id : 10] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] + +(44) Exchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(c_last_name#22, c_first_name#21, d_date#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(45) HashAggregate [codegen id : 11] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] + +(46) BroadcastExchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 12] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)] +Join type: LeftAnti +Join condition: None + +(48) Project [codegen id : 12] +Output: [] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(49) HashAggregate [codegen id : 12] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] + +(50) Exchange +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 13] +Input [1]: [count#24] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS count(1)#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9122ac943b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_datafusion/simplified.txt @@ -0,0 +1,77 @@ +WholeStageCodegen (13) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (12) + HashAggregate [count,count] + Project + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen (6) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..3fb94a8661 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_iceberg_compat/explain.txt @@ -0,0 +1,307 @@ +== Physical Plan == +* HashAggregate (51) ++- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin LeftAnti BuildRight (47) + :- * BroadcastHashJoin LeftAnti BuildRight (33) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.customer (11) + : +- BroadcastExchange (32) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (23) + : +- ReusedExchange (26) + +- BroadcastExchange (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet spark_catalog.default.web_sales (34) + : +- ReusedExchange (37) + +- ReusedExchange (40) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#2)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] + +(3) Filter [codegen id : 3] +Input [2]: [ss_customer_sk#1, ss_sold_date_sk#2] +Condition : isnotnull(ss_customer_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#3, d_date#4] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(8) BroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#2] +Right keys [1]: [d_date_sk#3] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_customer_sk#1, d_date#4] +Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#3, d_date#4] + +(11) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] + +(13) Filter [codegen id : 2] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) + +(14) BroadcastExchange +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#6] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [c_last_name#8, c_first_name#7, d_date#4] +Input [5]: [ss_customer_sk#1, d_date#4, c_customer_sk#6, c_first_name#7, c_last_name#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(18) Exchange +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#4, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(20) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#10)] +PushedFilters: [IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] + +(22) Filter [codegen id : 6] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_bill_customer_sk#9) + +(23) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#11, d_date#12] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [2]: [cs_bill_customer_sk#9, d_date#12] +Input [4]: [cs_bill_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11, d_date#12] + +(26) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_bill_customer_sk#9] +Right keys [1]: [c_customer_sk#13] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [c_last_name#15, c_first_name#14, d_date#12] +Input [5]: [cs_bill_customer_sk#9, d_date#12, c_customer_sk#13, c_first_name#14, c_last_name#15] + +(29) HashAggregate [codegen id : 6] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] + +(30) Exchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, d_date#12, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 7] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] + +(32) BroadcastExchange +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 12] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)] +Join type: LeftAnti +Join condition: None + +(34) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#17)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 10] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] + +(36) Filter [codegen id : 10] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Condition : isnotnull(ws_bill_customer_sk#16) + +(37) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#18, d_date#19] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#17] +Right keys [1]: [d_date_sk#18] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [2]: [ws_bill_customer_sk#16, d_date#19] +Input [4]: [ws_bill_customer_sk#16, ws_sold_date_sk#17, d_date_sk#18, d_date#19] + +(40) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_bill_customer_sk#16] +Right keys [1]: [c_customer_sk#20] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [3]: [c_last_name#22, c_first_name#21, d_date#19] +Input [5]: [ws_bill_customer_sk#16, d_date#19, c_customer_sk#20, c_first_name#21, c_last_name#22] + +(43) HashAggregate [codegen id : 10] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] + +(44) Exchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(c_last_name#22, c_first_name#21, d_date#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(45) HashAggregate [codegen id : 11] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] + +(46) BroadcastExchange +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 12] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#4, 1970-01-01), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)] +Join type: LeftAnti +Join condition: None + +(48) Project [codegen id : 12] +Output: [] +Input [3]: [c_last_name#8, c_first_name#7, d_date#4] + +(49) HashAggregate [codegen id : 12] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] + +(50) Exchange +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 13] +Input [1]: [count#24] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS count(1)#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9122ac943b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.native_iceberg_compat/simplified.txt @@ -0,0 +1,77 @@ +WholeStageCodegen (13) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (12) + HashAggregate [count,count] + Project + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen (6) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_datafusion/explain.txt new file mode 100644 index 0000000000..be540f124f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_datafusion/explain.txt @@ -0,0 +1,1031 @@ +== Physical Plan == +* BroadcastNestedLoopJoin Inner BuildRight (182) +:- * BroadcastNestedLoopJoin Inner BuildRight (160) +: :- * BroadcastNestedLoopJoin Inner BuildRight (138) +: : :- * BroadcastNestedLoopJoin Inner BuildRight (116) +: : : :- * BroadcastNestedLoopJoin Inner BuildRight (94) +: : : : :- * BroadcastNestedLoopJoin Inner BuildRight (72) +: : : : : :- * BroadcastNestedLoopJoin Inner BuildRight (50) +: : : : : : :- * HashAggregate (28) +: : : : : : : +- Exchange (27) +: : : : : : : +- * HashAggregate (26) +: : : : : : : +- * Project (25) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (24) +: : : : : : : :- * Project (18) +: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) +: : : : : : : : :- * Project (11) +: : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (10) +: : : : : : : : : :- * Project (4) +: : : : : : : : : : +- * Filter (3) +: : : : : : : : : : +- * ColumnarToRow (2) +: : : : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) +: : : : : : : : : +- BroadcastExchange (9) +: : : : : : : : : +- * Project (8) +: : : : : : : : : +- * Filter (7) +: : : : : : : : : +- * ColumnarToRow (6) +: : : : : : : : : +- Scan parquet spark_catalog.default.household_demographics (5) +: : : : : : : : +- BroadcastExchange (16) +: : : : : : : : +- * Project (15) +: : : : : : : : +- * Filter (14) +: : : : : : : : +- * ColumnarToRow (13) +: : : : : : : : +- Scan parquet spark_catalog.default.time_dim (12) +: : : : : : : +- BroadcastExchange (23) +: : : : : : : +- * Project (22) +: : : : : : : +- * Filter (21) +: : : : : : : +- * ColumnarToRow (20) +: : : : : : : +- Scan parquet spark_catalog.default.store (19) +: : : : : : +- BroadcastExchange (49) +: : : : : : +- * HashAggregate (48) +: : : : : : +- Exchange (47) +: : : : : : +- * HashAggregate (46) +: : : : : : +- * Project (45) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (44) +: : : : : : :- * Project (42) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (41) +: : : : : : : :- * Project (35) +: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (34) +: : : : : : : : :- * Project (32) +: : : : : : : : : +- * Filter (31) +: : : : : : : : : +- * ColumnarToRow (30) +: : : : : : : : : +- Scan parquet spark_catalog.default.store_sales (29) +: : : : : : : : +- ReusedExchange (33) +: : : : : : : +- BroadcastExchange (40) +: : : : : : : +- * Project (39) +: : : : : : : +- * Filter (38) +: : : : : : : +- * ColumnarToRow (37) +: : : : : : : +- Scan parquet spark_catalog.default.time_dim (36) +: : : : : : +- ReusedExchange (43) +: : : : : +- BroadcastExchange (71) +: : : : : +- * HashAggregate (70) +: : : : : +- Exchange (69) +: : : : : +- * HashAggregate (68) +: : : : : +- * Project (67) +: : : : : +- * BroadcastHashJoin Inner BuildRight (66) +: : : : : :- * Project (64) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (63) +: : : : : : :- * Project (57) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (56) +: : : : : : : :- * Project (54) +: : : : : : : : +- * Filter (53) +: : : : : : : : +- * ColumnarToRow (52) +: : : : : : : : +- Scan parquet spark_catalog.default.store_sales (51) +: : : : : : : +- ReusedExchange (55) +: : : : : : +- BroadcastExchange (62) +: : : : : : +- * Project (61) +: : : : : : +- * Filter (60) +: : : : : : +- * ColumnarToRow (59) +: : : : : : +- Scan parquet spark_catalog.default.time_dim (58) +: : : : : +- ReusedExchange (65) +: : : : +- BroadcastExchange (93) +: : : : +- * HashAggregate (92) +: : : : +- Exchange (91) +: : : : +- * HashAggregate (90) +: : : : +- * Project (89) +: : : : +- * BroadcastHashJoin Inner BuildRight (88) +: : : : :- * Project (86) +: : : : : +- * BroadcastHashJoin Inner BuildRight (85) +: : : : : :- * Project (79) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (78) +: : : : : : :- * Project (76) +: : : : : : : +- * Filter (75) +: : : : : : : +- * ColumnarToRow (74) +: : : : : : : +- Scan parquet spark_catalog.default.store_sales (73) +: : : : : : +- ReusedExchange (77) +: : : : : +- BroadcastExchange (84) +: : : : : +- * Project (83) +: : : : : +- * Filter (82) +: : : : : +- * ColumnarToRow (81) +: : : : : +- Scan parquet spark_catalog.default.time_dim (80) +: : : : +- ReusedExchange (87) +: : : +- BroadcastExchange (115) +: : : +- * HashAggregate (114) +: : : +- Exchange (113) +: : : +- * HashAggregate (112) +: : : +- * Project (111) +: : : +- * BroadcastHashJoin Inner BuildRight (110) +: : : :- * Project (108) +: : : : +- * BroadcastHashJoin Inner BuildRight (107) +: : : : :- * Project (101) +: : : : : +- * BroadcastHashJoin Inner BuildRight (100) +: : : : : :- * Project (98) +: : : : : : +- * Filter (97) +: : : : : : +- * ColumnarToRow (96) +: : : : : : +- Scan parquet spark_catalog.default.store_sales (95) +: : : : : +- ReusedExchange (99) +: : : : +- BroadcastExchange (106) +: : : : +- * Project (105) +: : : : +- * Filter (104) +: : : : +- * ColumnarToRow (103) +: : : : +- Scan parquet spark_catalog.default.time_dim (102) +: : : +- ReusedExchange (109) +: : +- BroadcastExchange (137) +: : +- * HashAggregate (136) +: : +- Exchange (135) +: : +- * HashAggregate (134) +: : +- * Project (133) +: : +- * BroadcastHashJoin Inner BuildRight (132) +: : :- * Project (130) +: : : +- * BroadcastHashJoin Inner BuildRight (129) +: : : :- * Project (123) +: : : : +- * BroadcastHashJoin Inner BuildRight (122) +: : : : :- * Project (120) +: : : : : +- * Filter (119) +: : : : : +- * ColumnarToRow (118) +: : : : : +- Scan parquet spark_catalog.default.store_sales (117) +: : : : +- ReusedExchange (121) +: : : +- BroadcastExchange (128) +: : : +- * Project (127) +: : : +- * Filter (126) +: : : +- * ColumnarToRow (125) +: : : +- Scan parquet spark_catalog.default.time_dim (124) +: : +- ReusedExchange (131) +: +- BroadcastExchange (159) +: +- * HashAggregate (158) +: +- Exchange (157) +: +- * HashAggregate (156) +: +- * Project (155) +: +- * BroadcastHashJoin Inner BuildRight (154) +: :- * Project (152) +: : +- * BroadcastHashJoin Inner BuildRight (151) +: : :- * Project (145) +: : : +- * BroadcastHashJoin Inner BuildRight (144) +: : : :- * Project (142) +: : : : +- * Filter (141) +: : : : +- * ColumnarToRow (140) +: : : : +- Scan parquet spark_catalog.default.store_sales (139) +: : : +- ReusedExchange (143) +: : +- BroadcastExchange (150) +: : +- * Project (149) +: : +- * Filter (148) +: : +- * ColumnarToRow (147) +: : +- Scan parquet spark_catalog.default.time_dim (146) +: +- ReusedExchange (153) ++- BroadcastExchange (181) + +- * HashAggregate (180) + +- Exchange (179) + +- * HashAggregate (178) + +- * Project (177) + +- * BroadcastHashJoin Inner BuildRight (176) + :- * Project (174) + : +- * BroadcastHashJoin Inner BuildRight (173) + : :- * Project (167) + : : +- * BroadcastHashJoin Inner BuildRight (166) + : : :- * Project (164) + : : : +- * Filter (163) + : : : +- * ColumnarToRow (162) + : : : +- Scan parquet spark_catalog.default.store_sales (161) + : : +- ReusedExchange (165) + : +- BroadcastExchange (172) + : +- * Project (171) + : +- * Filter (170) + : +- * ColumnarToRow (169) + : +- Scan parquet spark_catalog.default.time_dim (168) + +- ReusedExchange (175) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Project [codegen id : 4] +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(5) Scan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,2),LessThanOrEqual(hd_vehicle_count,4))),And(EqualTo(hd_dep_count,0),LessThanOrEqual(hd_vehicle_count,2))), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] + +(7) Filter [codegen id : 1] +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Condition : (((((hd_dep_count#6 = 4) AND (hd_vehicle_count#7 <= 6)) OR ((hd_dep_count#6 = 2) AND (hd_vehicle_count#7 <= 4))) OR ((hd_dep_count#6 = 0) AND (hd_vehicle_count#7 <= 2))) AND isnotnull(hd_demo_sk#5)) + +(8) Project [codegen id : 1] +Output [1]: [hd_demo_sk#5] +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] + +(9) BroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(10) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#5] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 4] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] + +(12) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(14) Filter [codegen id : 2] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 8)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(15) Project [codegen id : 2] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(16) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 4] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(19) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_store_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#11, s_store_name#12] + +(21) Filter [codegen id : 3] +Input [2]: [s_store_sk#11, s_store_name#12] +Condition : ((isnotnull(s_store_name#12) AND (s_store_name#12 = ese)) AND isnotnull(s_store_sk#11)) + +(22) Project [codegen id : 3] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_store_name#12] + +(23) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#11] + +(26) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#13] +Results [1]: [count#14] + +(27) Exchange +Input [1]: [count#14] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(28) HashAggregate [codegen id : 40] +Input [1]: [count#14] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#15] +Results [1]: [count(1)#15 AS h8_30_to_9#16] + +(29) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 8] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] + +(31) Filter [codegen id : 8] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_hdemo_sk#18) AND isnotnull(ss_sold_time_sk#17)) AND isnotnull(ss_store_sk#19)) + +(32) Project [codegen id : 8] +Output [3]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] + +(33) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#21] + +(34) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_hdemo_sk#18] +Right keys [1]: [hd_demo_sk#21] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 8] +Output [2]: [ss_sold_time_sk#17, ss_store_sk#19] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, hd_demo_sk#21] + +(36) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#22, t_hour#23, t_minute#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [3]: [t_time_sk#22, t_hour#23, t_minute#24] + +(38) Filter [codegen id : 6] +Input [3]: [t_time_sk#22, t_hour#23, t_minute#24] +Condition : ((((isnotnull(t_hour#23) AND isnotnull(t_minute#24)) AND (t_hour#23 = 9)) AND (t_minute#24 < 30)) AND isnotnull(t_time_sk#22)) + +(39) Project [codegen id : 6] +Output [1]: [t_time_sk#22] +Input [3]: [t_time_sk#22, t_hour#23, t_minute#24] + +(40) BroadcastExchange +Input [1]: [t_time_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_time_sk#17] +Right keys [1]: [t_time_sk#22] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 8] +Output [1]: [ss_store_sk#19] +Input [3]: [ss_sold_time_sk#17, ss_store_sk#19, t_time_sk#22] + +(43) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#25] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#19] +Right keys [1]: [s_store_sk#25] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 8] +Output: [] +Input [2]: [ss_store_sk#19, s_store_sk#25] + +(46) HashAggregate [codegen id : 8] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#26] +Results [1]: [count#27] + +(47) Exchange +Input [1]: [count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(48) HashAggregate [codegen id : 9] +Input [1]: [count#27] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#28] +Results [1]: [count(1)#28 AS h9_to_9_30#29] + +(49) BroadcastExchange +Input [1]: [h9_to_9_30#29] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(50) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(51) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 13] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] + +(53) Filter [codegen id : 13] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] +Condition : ((isnotnull(ss_hdemo_sk#31) AND isnotnull(ss_sold_time_sk#30)) AND isnotnull(ss_store_sk#32)) + +(54) Project [codegen id : 13] +Output [3]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] + +(55) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#34] + +(56) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_hdemo_sk#31] +Right keys [1]: [hd_demo_sk#34] +Join type: Inner +Join condition: None + +(57) Project [codegen id : 13] +Output [2]: [ss_sold_time_sk#30, ss_store_sk#32] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, hd_demo_sk#34] + +(58) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#35, t_hour#36, t_minute#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 11] +Input [3]: [t_time_sk#35, t_hour#36, t_minute#37] + +(60) Filter [codegen id : 11] +Input [3]: [t_time_sk#35, t_hour#36, t_minute#37] +Condition : ((((isnotnull(t_hour#36) AND isnotnull(t_minute#37)) AND (t_hour#36 = 9)) AND (t_minute#37 >= 30)) AND isnotnull(t_time_sk#35)) + +(61) Project [codegen id : 11] +Output [1]: [t_time_sk#35] +Input [3]: [t_time_sk#35, t_hour#36, t_minute#37] + +(62) BroadcastExchange +Input [1]: [t_time_sk#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(63) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_sold_time_sk#30] +Right keys [1]: [t_time_sk#35] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 13] +Output [1]: [ss_store_sk#32] +Input [3]: [ss_sold_time_sk#30, ss_store_sk#32, t_time_sk#35] + +(65) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#38] + +(66) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_store_sk#32] +Right keys [1]: [s_store_sk#38] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 13] +Output: [] +Input [2]: [ss_store_sk#32, s_store_sk#38] + +(68) HashAggregate [codegen id : 13] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#39] +Results [1]: [count#40] + +(69) Exchange +Input [1]: [count#40] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] + +(70) HashAggregate [codegen id : 14] +Input [1]: [count#40] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#41] +Results [1]: [count(1)#41 AS h9_30_to_10#42] + +(71) BroadcastExchange +Input [1]: [h9_30_to_10#42] +Arguments: IdentityBroadcastMode, [plan_id=10] + +(72) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(73) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(74) ColumnarToRow [codegen id : 18] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] + +(75) Filter [codegen id : 18] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] +Condition : ((isnotnull(ss_hdemo_sk#44) AND isnotnull(ss_sold_time_sk#43)) AND isnotnull(ss_store_sk#45)) + +(76) Project [codegen id : 18] +Output [3]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] + +(77) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#47] + +(78) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ss_hdemo_sk#44] +Right keys [1]: [hd_demo_sk#47] +Join type: Inner +Join condition: None + +(79) Project [codegen id : 18] +Output [2]: [ss_sold_time_sk#43, ss_store_sk#45] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, hd_demo_sk#47] + +(80) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#48, t_hour#49, t_minute#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(81) ColumnarToRow [codegen id : 16] +Input [3]: [t_time_sk#48, t_hour#49, t_minute#50] + +(82) Filter [codegen id : 16] +Input [3]: [t_time_sk#48, t_hour#49, t_minute#50] +Condition : ((((isnotnull(t_hour#49) AND isnotnull(t_minute#50)) AND (t_hour#49 = 10)) AND (t_minute#50 < 30)) AND isnotnull(t_time_sk#48)) + +(83) Project [codegen id : 16] +Output [1]: [t_time_sk#48] +Input [3]: [t_time_sk#48, t_hour#49, t_minute#50] + +(84) BroadcastExchange +Input [1]: [t_time_sk#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] + +(85) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ss_sold_time_sk#43] +Right keys [1]: [t_time_sk#48] +Join type: Inner +Join condition: None + +(86) Project [codegen id : 18] +Output [1]: [ss_store_sk#45] +Input [3]: [ss_sold_time_sk#43, ss_store_sk#45, t_time_sk#48] + +(87) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#51] + +(88) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ss_store_sk#45] +Right keys [1]: [s_store_sk#51] +Join type: Inner +Join condition: None + +(89) Project [codegen id : 18] +Output: [] +Input [2]: [ss_store_sk#45, s_store_sk#51] + +(90) HashAggregate [codegen id : 18] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#52] +Results [1]: [count#53] + +(91) Exchange +Input [1]: [count#53] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] + +(92) HashAggregate [codegen id : 19] +Input [1]: [count#53] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#54] +Results [1]: [count(1)#54 AS h10_to_10_30#55] + +(93) BroadcastExchange +Input [1]: [h10_to_10_30#55] +Arguments: IdentityBroadcastMode, [plan_id=13] + +(94) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(95) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 23] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] + +(97) Filter [codegen id : 23] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] +Condition : ((isnotnull(ss_hdemo_sk#57) AND isnotnull(ss_sold_time_sk#56)) AND isnotnull(ss_store_sk#58)) + +(98) Project [codegen id : 23] +Output [3]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] + +(99) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#60] + +(100) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [ss_hdemo_sk#57] +Right keys [1]: [hd_demo_sk#60] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 23] +Output [2]: [ss_sold_time_sk#56, ss_store_sk#58] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, hd_demo_sk#60] + +(102) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#61, t_hour#62, t_minute#63] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(103) ColumnarToRow [codegen id : 21] +Input [3]: [t_time_sk#61, t_hour#62, t_minute#63] + +(104) Filter [codegen id : 21] +Input [3]: [t_time_sk#61, t_hour#62, t_minute#63] +Condition : ((((isnotnull(t_hour#62) AND isnotnull(t_minute#63)) AND (t_hour#62 = 10)) AND (t_minute#63 >= 30)) AND isnotnull(t_time_sk#61)) + +(105) Project [codegen id : 21] +Output [1]: [t_time_sk#61] +Input [3]: [t_time_sk#61, t_hour#62, t_minute#63] + +(106) BroadcastExchange +Input [1]: [t_time_sk#61] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] + +(107) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [ss_sold_time_sk#56] +Right keys [1]: [t_time_sk#61] +Join type: Inner +Join condition: None + +(108) Project [codegen id : 23] +Output [1]: [ss_store_sk#58] +Input [3]: [ss_sold_time_sk#56, ss_store_sk#58, t_time_sk#61] + +(109) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#64] + +(110) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [ss_store_sk#58] +Right keys [1]: [s_store_sk#64] +Join type: Inner +Join condition: None + +(111) Project [codegen id : 23] +Output: [] +Input [2]: [ss_store_sk#58, s_store_sk#64] + +(112) HashAggregate [codegen id : 23] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#65] +Results [1]: [count#66] + +(113) Exchange +Input [1]: [count#66] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(114) HashAggregate [codegen id : 24] +Input [1]: [count#66] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#67] +Results [1]: [count(1)#67 AS h10_30_to_11#68] + +(115) BroadcastExchange +Input [1]: [h10_30_to_11#68] +Arguments: IdentityBroadcastMode, [plan_id=16] + +(116) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(117) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(118) ColumnarToRow [codegen id : 28] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] + +(119) Filter [codegen id : 28] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Condition : ((isnotnull(ss_hdemo_sk#70) AND isnotnull(ss_sold_time_sk#69)) AND isnotnull(ss_store_sk#71)) + +(120) Project [codegen id : 28] +Output [3]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] + +(121) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#73] + +(122) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ss_hdemo_sk#70] +Right keys [1]: [hd_demo_sk#73] +Join type: Inner +Join condition: None + +(123) Project [codegen id : 28] +Output [2]: [ss_sold_time_sk#69, ss_store_sk#71] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, hd_demo_sk#73] + +(124) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#74, t_hour#75, t_minute#76] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(125) ColumnarToRow [codegen id : 26] +Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] + +(126) Filter [codegen id : 26] +Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] +Condition : ((((isnotnull(t_hour#75) AND isnotnull(t_minute#76)) AND (t_hour#75 = 11)) AND (t_minute#76 < 30)) AND isnotnull(t_time_sk#74)) + +(127) Project [codegen id : 26] +Output [1]: [t_time_sk#74] +Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] + +(128) BroadcastExchange +Input [1]: [t_time_sk#74] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=17] + +(129) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ss_sold_time_sk#69] +Right keys [1]: [t_time_sk#74] +Join type: Inner +Join condition: None + +(130) Project [codegen id : 28] +Output [1]: [ss_store_sk#71] +Input [3]: [ss_sold_time_sk#69, ss_store_sk#71, t_time_sk#74] + +(131) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#77] + +(132) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ss_store_sk#71] +Right keys [1]: [s_store_sk#77] +Join type: Inner +Join condition: None + +(133) Project [codegen id : 28] +Output: [] +Input [2]: [ss_store_sk#71, s_store_sk#77] + +(134) HashAggregate [codegen id : 28] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#78] +Results [1]: [count#79] + +(135) Exchange +Input [1]: [count#79] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] + +(136) HashAggregate [codegen id : 29] +Input [1]: [count#79] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#80] +Results [1]: [count(1)#80 AS h11_to_11_30#81] + +(137) BroadcastExchange +Input [1]: [h11_to_11_30#81] +Arguments: IdentityBroadcastMode, [plan_id=19] + +(138) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(139) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(140) ColumnarToRow [codegen id : 33] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] + +(141) Filter [codegen id : 33] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] +Condition : ((isnotnull(ss_hdemo_sk#83) AND isnotnull(ss_sold_time_sk#82)) AND isnotnull(ss_store_sk#84)) + +(142) Project [codegen id : 33] +Output [3]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] + +(143) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#86] + +(144) BroadcastHashJoin [codegen id : 33] +Left keys [1]: [ss_hdemo_sk#83] +Right keys [1]: [hd_demo_sk#86] +Join type: Inner +Join condition: None + +(145) Project [codegen id : 33] +Output [2]: [ss_sold_time_sk#82, ss_store_sk#84] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, hd_demo_sk#86] + +(146) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#87, t_hour#88, t_minute#89] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(147) ColumnarToRow [codegen id : 31] +Input [3]: [t_time_sk#87, t_hour#88, t_minute#89] + +(148) Filter [codegen id : 31] +Input [3]: [t_time_sk#87, t_hour#88, t_minute#89] +Condition : ((((isnotnull(t_hour#88) AND isnotnull(t_minute#89)) AND (t_hour#88 = 11)) AND (t_minute#89 >= 30)) AND isnotnull(t_time_sk#87)) + +(149) Project [codegen id : 31] +Output [1]: [t_time_sk#87] +Input [3]: [t_time_sk#87, t_hour#88, t_minute#89] + +(150) BroadcastExchange +Input [1]: [t_time_sk#87] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] + +(151) BroadcastHashJoin [codegen id : 33] +Left keys [1]: [ss_sold_time_sk#82] +Right keys [1]: [t_time_sk#87] +Join type: Inner +Join condition: None + +(152) Project [codegen id : 33] +Output [1]: [ss_store_sk#84] +Input [3]: [ss_sold_time_sk#82, ss_store_sk#84, t_time_sk#87] + +(153) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#90] + +(154) BroadcastHashJoin [codegen id : 33] +Left keys [1]: [ss_store_sk#84] +Right keys [1]: [s_store_sk#90] +Join type: Inner +Join condition: None + +(155) Project [codegen id : 33] +Output: [] +Input [2]: [ss_store_sk#84, s_store_sk#90] + +(156) HashAggregate [codegen id : 33] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#91] +Results [1]: [count#92] + +(157) Exchange +Input [1]: [count#92] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=21] + +(158) HashAggregate [codegen id : 34] +Input [1]: [count#92] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#93] +Results [1]: [count(1)#93 AS h11_30_to_12#94] + +(159) BroadcastExchange +Input [1]: [h11_30_to_12#94] +Arguments: IdentityBroadcastMode, [plan_id=22] + +(160) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(161) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(162) ColumnarToRow [codegen id : 38] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] + +(163) Filter [codegen id : 38] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] +Condition : ((isnotnull(ss_hdemo_sk#96) AND isnotnull(ss_sold_time_sk#95)) AND isnotnull(ss_store_sk#97)) + +(164) Project [codegen id : 38] +Output [3]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] + +(165) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#99] + +(166) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_hdemo_sk#96] +Right keys [1]: [hd_demo_sk#99] +Join type: Inner +Join condition: None + +(167) Project [codegen id : 38] +Output [2]: [ss_sold_time_sk#95, ss_store_sk#97] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, hd_demo_sk#99] + +(168) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#100, t_hour#101, t_minute#102] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(169) ColumnarToRow [codegen id : 36] +Input [3]: [t_time_sk#100, t_hour#101, t_minute#102] + +(170) Filter [codegen id : 36] +Input [3]: [t_time_sk#100, t_hour#101, t_minute#102] +Condition : ((((isnotnull(t_hour#101) AND isnotnull(t_minute#102)) AND (t_hour#101 = 12)) AND (t_minute#102 < 30)) AND isnotnull(t_time_sk#100)) + +(171) Project [codegen id : 36] +Output [1]: [t_time_sk#100] +Input [3]: [t_time_sk#100, t_hour#101, t_minute#102] + +(172) BroadcastExchange +Input [1]: [t_time_sk#100] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=23] + +(173) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_sold_time_sk#95] +Right keys [1]: [t_time_sk#100] +Join type: Inner +Join condition: None + +(174) Project [codegen id : 38] +Output [1]: [ss_store_sk#97] +Input [3]: [ss_sold_time_sk#95, ss_store_sk#97, t_time_sk#100] + +(175) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#103] + +(176) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_store_sk#97] +Right keys [1]: [s_store_sk#103] +Join type: Inner +Join condition: None + +(177) Project [codegen id : 38] +Output: [] +Input [2]: [ss_store_sk#97, s_store_sk#103] + +(178) HashAggregate [codegen id : 38] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#104] +Results [1]: [count#105] + +(179) Exchange +Input [1]: [count#105] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=24] + +(180) HashAggregate [codegen id : 39] +Input [1]: [count#105] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#106] +Results [1]: [count(1)#106 AS h12_to_12_30#107] + +(181) BroadcastExchange +Input [1]: [h12_to_12_30#107] +Arguments: IdentityBroadcastMode, [plan_id=25] + +(182) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_datafusion/simplified.txt new file mode 100644 index 0000000000..12778886b5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_datafusion/simplified.txt @@ -0,0 +1,265 @@ +WholeStageCodegen (40) + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + HashAggregate [count] [count(1),h8_30_to_9,count] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + HashAggregate [count] [count(1),h9_to_9_30,count] + InputAdapter + Exchange #6 + WholeStageCodegen (8) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (14) + HashAggregate [count] [count(1),h9_30_to_10,count] + InputAdapter + Exchange #9 + WholeStageCodegen (13) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (11) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (19) + HashAggregate [count] [count(1),h10_to_10_30,count] + InputAdapter + Exchange #12 + WholeStageCodegen (18) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (16) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (24) + HashAggregate [count] [count(1),h10_30_to_11,count] + InputAdapter + Exchange #15 + WholeStageCodegen (23) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (21) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (29) + HashAggregate [count] [count(1),h11_to_11_30,count] + InputAdapter + Exchange #18 + WholeStageCodegen (28) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (26) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #20 + WholeStageCodegen (34) + HashAggregate [count] [count(1),h11_30_to_12,count] + InputAdapter + Exchange #21 + WholeStageCodegen (33) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #22 + WholeStageCodegen (31) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #23 + WholeStageCodegen (39) + HashAggregate [count] [count(1),h12_to_12_30,count] + InputAdapter + Exchange #24 + WholeStageCodegen (38) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #25 + WholeStageCodegen (36) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..be540f124f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_iceberg_compat/explain.txt @@ -0,0 +1,1031 @@ +== Physical Plan == +* BroadcastNestedLoopJoin Inner BuildRight (182) +:- * BroadcastNestedLoopJoin Inner BuildRight (160) +: :- * BroadcastNestedLoopJoin Inner BuildRight (138) +: : :- * BroadcastNestedLoopJoin Inner BuildRight (116) +: : : :- * BroadcastNestedLoopJoin Inner BuildRight (94) +: : : : :- * BroadcastNestedLoopJoin Inner BuildRight (72) +: : : : : :- * BroadcastNestedLoopJoin Inner BuildRight (50) +: : : : : : :- * HashAggregate (28) +: : : : : : : +- Exchange (27) +: : : : : : : +- * HashAggregate (26) +: : : : : : : +- * Project (25) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (24) +: : : : : : : :- * Project (18) +: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) +: : : : : : : : :- * Project (11) +: : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (10) +: : : : : : : : : :- * Project (4) +: : : : : : : : : : +- * Filter (3) +: : : : : : : : : : +- * ColumnarToRow (2) +: : : : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) +: : : : : : : : : +- BroadcastExchange (9) +: : : : : : : : : +- * Project (8) +: : : : : : : : : +- * Filter (7) +: : : : : : : : : +- * ColumnarToRow (6) +: : : : : : : : : +- Scan parquet spark_catalog.default.household_demographics (5) +: : : : : : : : +- BroadcastExchange (16) +: : : : : : : : +- * Project (15) +: : : : : : : : +- * Filter (14) +: : : : : : : : +- * ColumnarToRow (13) +: : : : : : : : +- Scan parquet spark_catalog.default.time_dim (12) +: : : : : : : +- BroadcastExchange (23) +: : : : : : : +- * Project (22) +: : : : : : : +- * Filter (21) +: : : : : : : +- * ColumnarToRow (20) +: : : : : : : +- Scan parquet spark_catalog.default.store (19) +: : : : : : +- BroadcastExchange (49) +: : : : : : +- * HashAggregate (48) +: : : : : : +- Exchange (47) +: : : : : : +- * HashAggregate (46) +: : : : : : +- * Project (45) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (44) +: : : : : : :- * Project (42) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (41) +: : : : : : : :- * Project (35) +: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (34) +: : : : : : : : :- * Project (32) +: : : : : : : : : +- * Filter (31) +: : : : : : : : : +- * ColumnarToRow (30) +: : : : : : : : : +- Scan parquet spark_catalog.default.store_sales (29) +: : : : : : : : +- ReusedExchange (33) +: : : : : : : +- BroadcastExchange (40) +: : : : : : : +- * Project (39) +: : : : : : : +- * Filter (38) +: : : : : : : +- * ColumnarToRow (37) +: : : : : : : +- Scan parquet spark_catalog.default.time_dim (36) +: : : : : : +- ReusedExchange (43) +: : : : : +- BroadcastExchange (71) +: : : : : +- * HashAggregate (70) +: : : : : +- Exchange (69) +: : : : : +- * HashAggregate (68) +: : : : : +- * Project (67) +: : : : : +- * BroadcastHashJoin Inner BuildRight (66) +: : : : : :- * Project (64) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (63) +: : : : : : :- * Project (57) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (56) +: : : : : : : :- * Project (54) +: : : : : : : : +- * Filter (53) +: : : : : : : : +- * ColumnarToRow (52) +: : : : : : : : +- Scan parquet spark_catalog.default.store_sales (51) +: : : : : : : +- ReusedExchange (55) +: : : : : : +- BroadcastExchange (62) +: : : : : : +- * Project (61) +: : : : : : +- * Filter (60) +: : : : : : +- * ColumnarToRow (59) +: : : : : : +- Scan parquet spark_catalog.default.time_dim (58) +: : : : : +- ReusedExchange (65) +: : : : +- BroadcastExchange (93) +: : : : +- * HashAggregate (92) +: : : : +- Exchange (91) +: : : : +- * HashAggregate (90) +: : : : +- * Project (89) +: : : : +- * BroadcastHashJoin Inner BuildRight (88) +: : : : :- * Project (86) +: : : : : +- * BroadcastHashJoin Inner BuildRight (85) +: : : : : :- * Project (79) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (78) +: : : : : : :- * Project (76) +: : : : : : : +- * Filter (75) +: : : : : : : +- * ColumnarToRow (74) +: : : : : : : +- Scan parquet spark_catalog.default.store_sales (73) +: : : : : : +- ReusedExchange (77) +: : : : : +- BroadcastExchange (84) +: : : : : +- * Project (83) +: : : : : +- * Filter (82) +: : : : : +- * ColumnarToRow (81) +: : : : : +- Scan parquet spark_catalog.default.time_dim (80) +: : : : +- ReusedExchange (87) +: : : +- BroadcastExchange (115) +: : : +- * HashAggregate (114) +: : : +- Exchange (113) +: : : +- * HashAggregate (112) +: : : +- * Project (111) +: : : +- * BroadcastHashJoin Inner BuildRight (110) +: : : :- * Project (108) +: : : : +- * BroadcastHashJoin Inner BuildRight (107) +: : : : :- * Project (101) +: : : : : +- * BroadcastHashJoin Inner BuildRight (100) +: : : : : :- * Project (98) +: : : : : : +- * Filter (97) +: : : : : : +- * ColumnarToRow (96) +: : : : : : +- Scan parquet spark_catalog.default.store_sales (95) +: : : : : +- ReusedExchange (99) +: : : : +- BroadcastExchange (106) +: : : : +- * Project (105) +: : : : +- * Filter (104) +: : : : +- * ColumnarToRow (103) +: : : : +- Scan parquet spark_catalog.default.time_dim (102) +: : : +- ReusedExchange (109) +: : +- BroadcastExchange (137) +: : +- * HashAggregate (136) +: : +- Exchange (135) +: : +- * HashAggregate (134) +: : +- * Project (133) +: : +- * BroadcastHashJoin Inner BuildRight (132) +: : :- * Project (130) +: : : +- * BroadcastHashJoin Inner BuildRight (129) +: : : :- * Project (123) +: : : : +- * BroadcastHashJoin Inner BuildRight (122) +: : : : :- * Project (120) +: : : : : +- * Filter (119) +: : : : : +- * ColumnarToRow (118) +: : : : : +- Scan parquet spark_catalog.default.store_sales (117) +: : : : +- ReusedExchange (121) +: : : +- BroadcastExchange (128) +: : : +- * Project (127) +: : : +- * Filter (126) +: : : +- * ColumnarToRow (125) +: : : +- Scan parquet spark_catalog.default.time_dim (124) +: : +- ReusedExchange (131) +: +- BroadcastExchange (159) +: +- * HashAggregate (158) +: +- Exchange (157) +: +- * HashAggregate (156) +: +- * Project (155) +: +- * BroadcastHashJoin Inner BuildRight (154) +: :- * Project (152) +: : +- * BroadcastHashJoin Inner BuildRight (151) +: : :- * Project (145) +: : : +- * BroadcastHashJoin Inner BuildRight (144) +: : : :- * Project (142) +: : : : +- * Filter (141) +: : : : +- * ColumnarToRow (140) +: : : : +- Scan parquet spark_catalog.default.store_sales (139) +: : : +- ReusedExchange (143) +: : +- BroadcastExchange (150) +: : +- * Project (149) +: : +- * Filter (148) +: : +- * ColumnarToRow (147) +: : +- Scan parquet spark_catalog.default.time_dim (146) +: +- ReusedExchange (153) ++- BroadcastExchange (181) + +- * HashAggregate (180) + +- Exchange (179) + +- * HashAggregate (178) + +- * Project (177) + +- * BroadcastHashJoin Inner BuildRight (176) + :- * Project (174) + : +- * BroadcastHashJoin Inner BuildRight (173) + : :- * Project (167) + : : +- * BroadcastHashJoin Inner BuildRight (166) + : : :- * Project (164) + : : : +- * Filter (163) + : : : +- * ColumnarToRow (162) + : : : +- Scan parquet spark_catalog.default.store_sales (161) + : : +- ReusedExchange (165) + : +- BroadcastExchange (172) + : +- * Project (171) + : +- * Filter (170) + : +- * ColumnarToRow (169) + : +- Scan parquet spark_catalog.default.time_dim (168) + +- ReusedExchange (175) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Project [codegen id : 4] +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(5) Scan parquet spark_catalog.default.household_demographics +Output [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,2),LessThanOrEqual(hd_vehicle_count,4))),And(EqualTo(hd_dep_count,0),LessThanOrEqual(hd_vehicle_count,2))), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] + +(7) Filter [codegen id : 1] +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] +Condition : (((((hd_dep_count#6 = 4) AND (hd_vehicle_count#7 <= 6)) OR ((hd_dep_count#6 = 2) AND (hd_vehicle_count#7 <= 4))) OR ((hd_dep_count#6 = 0) AND (hd_vehicle_count#7 <= 2))) AND isnotnull(hd_demo_sk#5)) + +(8) Project [codegen id : 1] +Output [1]: [hd_demo_sk#5] +Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] + +(9) BroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(10) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#5] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 4] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] + +(12) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(14) Filter [codegen id : 2] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 8)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(15) Project [codegen id : 2] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(16) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 4] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(19) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_store_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#11, s_store_name#12] + +(21) Filter [codegen id : 3] +Input [2]: [s_store_sk#11, s_store_name#12] +Condition : ((isnotnull(s_store_name#12) AND (s_store_name#12 = ese)) AND isnotnull(s_store_sk#11)) + +(22) Project [codegen id : 3] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_store_name#12] + +(23) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#11] + +(26) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#13] +Results [1]: [count#14] + +(27) Exchange +Input [1]: [count#14] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(28) HashAggregate [codegen id : 40] +Input [1]: [count#14] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#15] +Results [1]: [count(1)#15 AS h8_30_to_9#16] + +(29) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 8] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] + +(31) Filter [codegen id : 8] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_hdemo_sk#18) AND isnotnull(ss_sold_time_sk#17)) AND isnotnull(ss_store_sk#19)) + +(32) Project [codegen id : 8] +Output [3]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] + +(33) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#21] + +(34) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_hdemo_sk#18] +Right keys [1]: [hd_demo_sk#21] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 8] +Output [2]: [ss_sold_time_sk#17, ss_store_sk#19] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, hd_demo_sk#21] + +(36) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#22, t_hour#23, t_minute#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [3]: [t_time_sk#22, t_hour#23, t_minute#24] + +(38) Filter [codegen id : 6] +Input [3]: [t_time_sk#22, t_hour#23, t_minute#24] +Condition : ((((isnotnull(t_hour#23) AND isnotnull(t_minute#24)) AND (t_hour#23 = 9)) AND (t_minute#24 < 30)) AND isnotnull(t_time_sk#22)) + +(39) Project [codegen id : 6] +Output [1]: [t_time_sk#22] +Input [3]: [t_time_sk#22, t_hour#23, t_minute#24] + +(40) BroadcastExchange +Input [1]: [t_time_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_time_sk#17] +Right keys [1]: [t_time_sk#22] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 8] +Output [1]: [ss_store_sk#19] +Input [3]: [ss_sold_time_sk#17, ss_store_sk#19, t_time_sk#22] + +(43) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#25] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#19] +Right keys [1]: [s_store_sk#25] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 8] +Output: [] +Input [2]: [ss_store_sk#19, s_store_sk#25] + +(46) HashAggregate [codegen id : 8] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#26] +Results [1]: [count#27] + +(47) Exchange +Input [1]: [count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(48) HashAggregate [codegen id : 9] +Input [1]: [count#27] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#28] +Results [1]: [count(1)#28 AS h9_to_9_30#29] + +(49) BroadcastExchange +Input [1]: [h9_to_9_30#29] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(50) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(51) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 13] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] + +(53) Filter [codegen id : 13] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] +Condition : ((isnotnull(ss_hdemo_sk#31) AND isnotnull(ss_sold_time_sk#30)) AND isnotnull(ss_store_sk#32)) + +(54) Project [codegen id : 13] +Output [3]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] + +(55) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#34] + +(56) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_hdemo_sk#31] +Right keys [1]: [hd_demo_sk#34] +Join type: Inner +Join condition: None + +(57) Project [codegen id : 13] +Output [2]: [ss_sold_time_sk#30, ss_store_sk#32] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, hd_demo_sk#34] + +(58) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#35, t_hour#36, t_minute#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 11] +Input [3]: [t_time_sk#35, t_hour#36, t_minute#37] + +(60) Filter [codegen id : 11] +Input [3]: [t_time_sk#35, t_hour#36, t_minute#37] +Condition : ((((isnotnull(t_hour#36) AND isnotnull(t_minute#37)) AND (t_hour#36 = 9)) AND (t_minute#37 >= 30)) AND isnotnull(t_time_sk#35)) + +(61) Project [codegen id : 11] +Output [1]: [t_time_sk#35] +Input [3]: [t_time_sk#35, t_hour#36, t_minute#37] + +(62) BroadcastExchange +Input [1]: [t_time_sk#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(63) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_sold_time_sk#30] +Right keys [1]: [t_time_sk#35] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 13] +Output [1]: [ss_store_sk#32] +Input [3]: [ss_sold_time_sk#30, ss_store_sk#32, t_time_sk#35] + +(65) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#38] + +(66) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ss_store_sk#32] +Right keys [1]: [s_store_sk#38] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 13] +Output: [] +Input [2]: [ss_store_sk#32, s_store_sk#38] + +(68) HashAggregate [codegen id : 13] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#39] +Results [1]: [count#40] + +(69) Exchange +Input [1]: [count#40] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] + +(70) HashAggregate [codegen id : 14] +Input [1]: [count#40] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#41] +Results [1]: [count(1)#41 AS h9_30_to_10#42] + +(71) BroadcastExchange +Input [1]: [h9_30_to_10#42] +Arguments: IdentityBroadcastMode, [plan_id=10] + +(72) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(73) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(74) ColumnarToRow [codegen id : 18] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] + +(75) Filter [codegen id : 18] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] +Condition : ((isnotnull(ss_hdemo_sk#44) AND isnotnull(ss_sold_time_sk#43)) AND isnotnull(ss_store_sk#45)) + +(76) Project [codegen id : 18] +Output [3]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] + +(77) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#47] + +(78) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ss_hdemo_sk#44] +Right keys [1]: [hd_demo_sk#47] +Join type: Inner +Join condition: None + +(79) Project [codegen id : 18] +Output [2]: [ss_sold_time_sk#43, ss_store_sk#45] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, hd_demo_sk#47] + +(80) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#48, t_hour#49, t_minute#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(81) ColumnarToRow [codegen id : 16] +Input [3]: [t_time_sk#48, t_hour#49, t_minute#50] + +(82) Filter [codegen id : 16] +Input [3]: [t_time_sk#48, t_hour#49, t_minute#50] +Condition : ((((isnotnull(t_hour#49) AND isnotnull(t_minute#50)) AND (t_hour#49 = 10)) AND (t_minute#50 < 30)) AND isnotnull(t_time_sk#48)) + +(83) Project [codegen id : 16] +Output [1]: [t_time_sk#48] +Input [3]: [t_time_sk#48, t_hour#49, t_minute#50] + +(84) BroadcastExchange +Input [1]: [t_time_sk#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] + +(85) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ss_sold_time_sk#43] +Right keys [1]: [t_time_sk#48] +Join type: Inner +Join condition: None + +(86) Project [codegen id : 18] +Output [1]: [ss_store_sk#45] +Input [3]: [ss_sold_time_sk#43, ss_store_sk#45, t_time_sk#48] + +(87) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#51] + +(88) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ss_store_sk#45] +Right keys [1]: [s_store_sk#51] +Join type: Inner +Join condition: None + +(89) Project [codegen id : 18] +Output: [] +Input [2]: [ss_store_sk#45, s_store_sk#51] + +(90) HashAggregate [codegen id : 18] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#52] +Results [1]: [count#53] + +(91) Exchange +Input [1]: [count#53] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] + +(92) HashAggregate [codegen id : 19] +Input [1]: [count#53] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#54] +Results [1]: [count(1)#54 AS h10_to_10_30#55] + +(93) BroadcastExchange +Input [1]: [h10_to_10_30#55] +Arguments: IdentityBroadcastMode, [plan_id=13] + +(94) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(95) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 23] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] + +(97) Filter [codegen id : 23] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] +Condition : ((isnotnull(ss_hdemo_sk#57) AND isnotnull(ss_sold_time_sk#56)) AND isnotnull(ss_store_sk#58)) + +(98) Project [codegen id : 23] +Output [3]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] + +(99) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#60] + +(100) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [ss_hdemo_sk#57] +Right keys [1]: [hd_demo_sk#60] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 23] +Output [2]: [ss_sold_time_sk#56, ss_store_sk#58] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, hd_demo_sk#60] + +(102) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#61, t_hour#62, t_minute#63] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(103) ColumnarToRow [codegen id : 21] +Input [3]: [t_time_sk#61, t_hour#62, t_minute#63] + +(104) Filter [codegen id : 21] +Input [3]: [t_time_sk#61, t_hour#62, t_minute#63] +Condition : ((((isnotnull(t_hour#62) AND isnotnull(t_minute#63)) AND (t_hour#62 = 10)) AND (t_minute#63 >= 30)) AND isnotnull(t_time_sk#61)) + +(105) Project [codegen id : 21] +Output [1]: [t_time_sk#61] +Input [3]: [t_time_sk#61, t_hour#62, t_minute#63] + +(106) BroadcastExchange +Input [1]: [t_time_sk#61] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] + +(107) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [ss_sold_time_sk#56] +Right keys [1]: [t_time_sk#61] +Join type: Inner +Join condition: None + +(108) Project [codegen id : 23] +Output [1]: [ss_store_sk#58] +Input [3]: [ss_sold_time_sk#56, ss_store_sk#58, t_time_sk#61] + +(109) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#64] + +(110) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [ss_store_sk#58] +Right keys [1]: [s_store_sk#64] +Join type: Inner +Join condition: None + +(111) Project [codegen id : 23] +Output: [] +Input [2]: [ss_store_sk#58, s_store_sk#64] + +(112) HashAggregate [codegen id : 23] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#65] +Results [1]: [count#66] + +(113) Exchange +Input [1]: [count#66] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(114) HashAggregate [codegen id : 24] +Input [1]: [count#66] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#67] +Results [1]: [count(1)#67 AS h10_30_to_11#68] + +(115) BroadcastExchange +Input [1]: [h10_30_to_11#68] +Arguments: IdentityBroadcastMode, [plan_id=16] + +(116) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(117) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(118) ColumnarToRow [codegen id : 28] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] + +(119) Filter [codegen id : 28] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Condition : ((isnotnull(ss_hdemo_sk#70) AND isnotnull(ss_sold_time_sk#69)) AND isnotnull(ss_store_sk#71)) + +(120) Project [codegen id : 28] +Output [3]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] + +(121) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#73] + +(122) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ss_hdemo_sk#70] +Right keys [1]: [hd_demo_sk#73] +Join type: Inner +Join condition: None + +(123) Project [codegen id : 28] +Output [2]: [ss_sold_time_sk#69, ss_store_sk#71] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, hd_demo_sk#73] + +(124) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#74, t_hour#75, t_minute#76] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(125) ColumnarToRow [codegen id : 26] +Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] + +(126) Filter [codegen id : 26] +Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] +Condition : ((((isnotnull(t_hour#75) AND isnotnull(t_minute#76)) AND (t_hour#75 = 11)) AND (t_minute#76 < 30)) AND isnotnull(t_time_sk#74)) + +(127) Project [codegen id : 26] +Output [1]: [t_time_sk#74] +Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] + +(128) BroadcastExchange +Input [1]: [t_time_sk#74] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=17] + +(129) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ss_sold_time_sk#69] +Right keys [1]: [t_time_sk#74] +Join type: Inner +Join condition: None + +(130) Project [codegen id : 28] +Output [1]: [ss_store_sk#71] +Input [3]: [ss_sold_time_sk#69, ss_store_sk#71, t_time_sk#74] + +(131) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#77] + +(132) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [ss_store_sk#71] +Right keys [1]: [s_store_sk#77] +Join type: Inner +Join condition: None + +(133) Project [codegen id : 28] +Output: [] +Input [2]: [ss_store_sk#71, s_store_sk#77] + +(134) HashAggregate [codegen id : 28] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#78] +Results [1]: [count#79] + +(135) Exchange +Input [1]: [count#79] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] + +(136) HashAggregate [codegen id : 29] +Input [1]: [count#79] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#80] +Results [1]: [count(1)#80 AS h11_to_11_30#81] + +(137) BroadcastExchange +Input [1]: [h11_to_11_30#81] +Arguments: IdentityBroadcastMode, [plan_id=19] + +(138) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(139) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(140) ColumnarToRow [codegen id : 33] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] + +(141) Filter [codegen id : 33] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] +Condition : ((isnotnull(ss_hdemo_sk#83) AND isnotnull(ss_sold_time_sk#82)) AND isnotnull(ss_store_sk#84)) + +(142) Project [codegen id : 33] +Output [3]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] + +(143) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#86] + +(144) BroadcastHashJoin [codegen id : 33] +Left keys [1]: [ss_hdemo_sk#83] +Right keys [1]: [hd_demo_sk#86] +Join type: Inner +Join condition: None + +(145) Project [codegen id : 33] +Output [2]: [ss_sold_time_sk#82, ss_store_sk#84] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, hd_demo_sk#86] + +(146) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#87, t_hour#88, t_minute#89] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(147) ColumnarToRow [codegen id : 31] +Input [3]: [t_time_sk#87, t_hour#88, t_minute#89] + +(148) Filter [codegen id : 31] +Input [3]: [t_time_sk#87, t_hour#88, t_minute#89] +Condition : ((((isnotnull(t_hour#88) AND isnotnull(t_minute#89)) AND (t_hour#88 = 11)) AND (t_minute#89 >= 30)) AND isnotnull(t_time_sk#87)) + +(149) Project [codegen id : 31] +Output [1]: [t_time_sk#87] +Input [3]: [t_time_sk#87, t_hour#88, t_minute#89] + +(150) BroadcastExchange +Input [1]: [t_time_sk#87] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] + +(151) BroadcastHashJoin [codegen id : 33] +Left keys [1]: [ss_sold_time_sk#82] +Right keys [1]: [t_time_sk#87] +Join type: Inner +Join condition: None + +(152) Project [codegen id : 33] +Output [1]: [ss_store_sk#84] +Input [3]: [ss_sold_time_sk#82, ss_store_sk#84, t_time_sk#87] + +(153) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#90] + +(154) BroadcastHashJoin [codegen id : 33] +Left keys [1]: [ss_store_sk#84] +Right keys [1]: [s_store_sk#90] +Join type: Inner +Join condition: None + +(155) Project [codegen id : 33] +Output: [] +Input [2]: [ss_store_sk#84, s_store_sk#90] + +(156) HashAggregate [codegen id : 33] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#91] +Results [1]: [count#92] + +(157) Exchange +Input [1]: [count#92] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=21] + +(158) HashAggregate [codegen id : 34] +Input [1]: [count#92] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#93] +Results [1]: [count(1)#93 AS h11_30_to_12#94] + +(159) BroadcastExchange +Input [1]: [h11_30_to_12#94] +Arguments: IdentityBroadcastMode, [plan_id=22] + +(160) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + +(161) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(162) ColumnarToRow [codegen id : 38] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] + +(163) Filter [codegen id : 38] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] +Condition : ((isnotnull(ss_hdemo_sk#96) AND isnotnull(ss_sold_time_sk#95)) AND isnotnull(ss_store_sk#97)) + +(164) Project [codegen id : 38] +Output [3]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] + +(165) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#99] + +(166) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_hdemo_sk#96] +Right keys [1]: [hd_demo_sk#99] +Join type: Inner +Join condition: None + +(167) Project [codegen id : 38] +Output [2]: [ss_sold_time_sk#95, ss_store_sk#97] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, hd_demo_sk#99] + +(168) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#100, t_hour#101, t_minute#102] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(169) ColumnarToRow [codegen id : 36] +Input [3]: [t_time_sk#100, t_hour#101, t_minute#102] + +(170) Filter [codegen id : 36] +Input [3]: [t_time_sk#100, t_hour#101, t_minute#102] +Condition : ((((isnotnull(t_hour#101) AND isnotnull(t_minute#102)) AND (t_hour#101 = 12)) AND (t_minute#102 < 30)) AND isnotnull(t_time_sk#100)) + +(171) Project [codegen id : 36] +Output [1]: [t_time_sk#100] +Input [3]: [t_time_sk#100, t_hour#101, t_minute#102] + +(172) BroadcastExchange +Input [1]: [t_time_sk#100] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=23] + +(173) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_sold_time_sk#95] +Right keys [1]: [t_time_sk#100] +Join type: Inner +Join condition: None + +(174) Project [codegen id : 38] +Output [1]: [ss_store_sk#97] +Input [3]: [ss_sold_time_sk#95, ss_store_sk#97, t_time_sk#100] + +(175) ReusedExchange [Reuses operator id: 23] +Output [1]: [s_store_sk#103] + +(176) BroadcastHashJoin [codegen id : 38] +Left keys [1]: [ss_store_sk#97] +Right keys [1]: [s_store_sk#103] +Join type: Inner +Join condition: None + +(177) Project [codegen id : 38] +Output: [] +Input [2]: [ss_store_sk#97, s_store_sk#103] + +(178) HashAggregate [codegen id : 38] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#104] +Results [1]: [count#105] + +(179) Exchange +Input [1]: [count#105] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=24] + +(180) HashAggregate [codegen id : 39] +Input [1]: [count#105] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#106] +Results [1]: [count(1)#106 AS h12_to_12_30#107] + +(181) BroadcastExchange +Input [1]: [h12_to_12_30#107] +Arguments: IdentityBroadcastMode, [plan_id=25] + +(182) BroadcastNestedLoopJoin [codegen id : 40] +Join type: Inner +Join condition: None + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..12778886b5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.native_iceberg_compat/simplified.txt @@ -0,0 +1,265 @@ +WholeStageCodegen (40) + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + HashAggregate [count] [count(1),h8_30_to_9,count] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + HashAggregate [count] [count(1),h9_to_9_30,count] + InputAdapter + Exchange #6 + WholeStageCodegen (8) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (14) + HashAggregate [count] [count(1),h9_30_to_10,count] + InputAdapter + Exchange #9 + WholeStageCodegen (13) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (11) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (19) + HashAggregate [count] [count(1),h10_to_10_30,count] + InputAdapter + Exchange #12 + WholeStageCodegen (18) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (16) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (24) + HashAggregate [count] [count(1),h10_30_to_11,count] + InputAdapter + Exchange #15 + WholeStageCodegen (23) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (21) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (29) + HashAggregate [count] [count(1),h11_to_11_30,count] + InputAdapter + Exchange #18 + WholeStageCodegen (28) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (26) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #20 + WholeStageCodegen (34) + HashAggregate [count] [count(1),h11_30_to_12,count] + InputAdapter + Exchange #21 + WholeStageCodegen (33) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #22 + WholeStageCodegen (31) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 + InputAdapter + BroadcastExchange #23 + WholeStageCodegen (39) + HashAggregate [count] [count(1),h12_to_12_30,count] + InputAdapter + Exchange #24 + WholeStageCodegen (38) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #25 + WholeStageCodegen (36) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_datafusion/explain.txt new file mode 100644 index 0000000000..f984b6c76b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_datafusion/explain.txt @@ -0,0 +1,179 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Project (30) + +- * Filter (29) + +- Window (28) + +- * Sort (27) + +- Exchange (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.item (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.store_sales (4) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet spark_catalog.default.date_dim (10) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.store (17) + + +(1) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(In(i_category, [Books ,Electronics ,Sports ]),In(i_class, [computers ,football ,stereo ])),And(In(i_category, [Jewelry ,Men ,Women ]),In(i_class, [birdal ,dresses ,shirts ]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] + +(3) Filter [codegen id : 4] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Condition : (((i_category#4 IN (Books ,Electronics ,Sports ) AND i_class#3 IN (computers ,stereo ,football )) OR (i_category#4 IN (Men ,Jewelry ,Women ) AND i_class#3 IN (shirts ,birdal ,dresses ))) AND isnotnull(i_item_sk#1)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(6) Filter [codegen id : 1] +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Condition : (isnotnull(ss_item_sk#5) AND isnotnull(ss_store_sk#6)) + +(7) BroadcastExchange +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((isnotnull(d_year#10) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(13) Project [codegen id : 2] +Output [2]: [d_date_sk#9, d_moy#11] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(14) BroadcastExchange +Input [2]: [d_date_sk#9, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#9, d_moy#11] + +(17) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] + +(19) Filter [codegen id : 3] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Condition : isnotnull(s_store_sk#12) + +(20) BroadcastExchange +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11, s_store_sk#12, s_store_name#13, s_company_name#14] + +(23) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] + +(24) Exchange +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#17] +Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS _w0#19] + +(26) Exchange +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(27) Sort [codegen id : 6] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, s_company_name#14 ASC NULLS FIRST], false, 0 + +(28) Window +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#4, i_brand#2, s_store_name#13, s_company_name#14] + +(29) Filter [codegen id : 7] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] +Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END + +(30) Project [codegen id : 7] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] + +(31) TakeOrderedAndProject +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Arguments: 100, [(sum_sales#18 - avg_monthly_sales#20) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8ea1fd5d3d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_datafusion/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_class,i_brand,s_company_name,d_moy] + WholeStageCodegen (7) + Project [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,s_store_name,s_company_name] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,ss_sales_price] [sum,sum] + Project [i_brand,i_class,i_category,ss_sales_price,d_moy,s_store_name,s_company_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_moy] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f984b6c76b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_iceberg_compat/explain.txt @@ -0,0 +1,179 @@ +== Physical Plan == +TakeOrderedAndProject (31) ++- * Project (30) + +- * Filter (29) + +- Window (28) + +- * Sort (27) + +- Exchange (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.item (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.store_sales (4) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet spark_catalog.default.date_dim (10) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.store (17) + + +(1) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(In(i_category, [Books ,Electronics ,Sports ]),In(i_class, [computers ,football ,stereo ])),And(In(i_category, [Jewelry ,Men ,Women ]),In(i_class, [birdal ,dresses ,shirts ]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] + +(3) Filter [codegen id : 4] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Condition : (((i_category#4 IN (Books ,Electronics ,Sports ) AND i_class#3 IN (computers ,stereo ,football )) OR (i_category#4 IN (Men ,Jewelry ,Women ) AND i_class#3 IN (shirts ,birdal ,dresses ))) AND isnotnull(i_item_sk#1)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(6) Filter [codegen id : 1] +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Condition : (isnotnull(ss_item_sk#5) AND isnotnull(ss_store_sk#6)) + +(7) BroadcastExchange +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((isnotnull(d_year#10) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9)) + +(13) Project [codegen id : 2] +Output [2]: [d_date_sk#9, d_moy#11] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(14) BroadcastExchange +Input [2]: [d_date_sk#9, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#9, d_moy#11] + +(17) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] + +(19) Filter [codegen id : 3] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Condition : isnotnull(s_store_sk#12) + +(20) BroadcastExchange +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11, s_store_sk#12, s_store_name#13, s_company_name#14] + +(23) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] + +(24) Exchange +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#17] +Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS _w0#19] + +(26) Exchange +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(27) Sort [codegen id : 6] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, s_company_name#14 ASC NULLS FIRST], false, 0 + +(28) Window +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#4, i_brand#2, s_store_name#13, s_company_name#14] + +(29) Filter [codegen id : 7] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] +Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END + +(30) Project [codegen id : 7] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] + +(31) TakeOrderedAndProject +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Arguments: 100, [(sum_sales#18 - avg_monthly_sales#20) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8ea1fd5d3d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.native_iceberg_compat/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_class,i_brand,s_company_name,d_moy] + WholeStageCodegen (7) + Project [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,s_store_name,s_company_name] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,ss_sales_price] [sum,sum] + Project [i_brand,i_class,i_category,ss_sales_price,d_moy,s_store_name,s_company_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk,d_moy] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_datafusion/explain.txt new file mode 100644 index 0000000000..e32db67408 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_datafusion/explain.txt @@ -0,0 +1,303 @@ +== Physical Plan == +* Project (4) ++- * Filter (3) + +- * ColumnarToRow (2) + +- Scan parquet spark_catalog.default.reason (1) + + +(1) Scan parquet spark_catalog.default.reason +Output [1]: [r_reason_sk#1] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [1]: [r_reason_sk#1] + +(3) Filter [codegen id : 1] +Input [1]: [r_reason_sk#1] +Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) + +(4) Project [codegen id : 1] +Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_net_paid) END AS bucket1#4, CASE WHEN (Subquery scalar-subquery#5, [id=#6].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_net_paid) END AS bucket2#7, CASE WHEN (Subquery scalar-subquery#8, [id=#9].count(1) > 365541424) THEN ReusedSubquery Subquery scalar-subquery#8, [id=#9].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#8, [id=#9].avg(ss_net_paid) END AS bucket3#10, CASE WHEN (Subquery scalar-subquery#11, [id=#12].count(1) > 216357808) THEN ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_net_paid) END AS bucket4#13, CASE WHEN (Subquery scalar-subquery#14, [id=#15].count(1) > 184483884) THEN ReusedSubquery Subquery scalar-subquery#14, [id=#15].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#14, [id=#15].avg(ss_net_paid) END AS bucket5#16] +Input [1]: [r_reason_sk#1] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] +* Project (12) ++- * HashAggregate (11) + +- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet spark_catalog.default.store_sales (5) + + +(5) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] + +(7) Filter [codegen id : 1] +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_quantity#17) AND (ss_quantity#17 >= 1)) AND (ss_quantity#17 <= 20)) + +(8) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#18, ss_net_paid#19] +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] + +(9) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#18, ss_net_paid#19] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#18)), partial_avg(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [5]: [count#21, sum#22, count#23, sum#24, count#25] +Results [5]: [count#26, sum#27, count#28, sum#29, count#30] + +(10) Exchange +Input [5]: [count#26, sum#27, count#28, sum#29, count#30] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=1] + +(11) HashAggregate [codegen id : 2] +Input [5]: [count#26, sum#27, count#28, sum#29, count#30] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#18)), avg(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [3]: [count(1)#31, avg(UnscaledValue(ss_ext_discount_amt#18))#32, avg(UnscaledValue(ss_net_paid#19))#33] +Results [3]: [count(1)#31 AS count(1)#34, cast((avg(UnscaledValue(ss_ext_discount_amt#18))#32 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#35, cast((avg(UnscaledValue(ss_net_paid#19))#33 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#36] + +(12) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#34, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#35, avg(ss_net_paid), avg(ss_net_paid)#36) AS mergedValue#37] +Input [3]: [count(1)#34, avg(ss_ext_discount_amt)#35, avg(ss_net_paid)#36] + +Subquery:2 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] + +Subquery:3 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] + +Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#5, [id=#6] +* Project (20) ++- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet spark_catalog.default.store_sales (13) + + +(13) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] + +(15) Filter [codegen id : 1] +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] +Condition : ((isnotnull(ss_quantity#38) AND (ss_quantity#38 >= 21)) AND (ss_quantity#38 <= 40)) + +(16) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#39, ss_net_paid#40] +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] + +(17) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#39, ss_net_paid#40] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#39)), partial_avg(UnscaledValue(ss_net_paid#40))] +Aggregate Attributes [5]: [count#42, sum#43, count#44, sum#45, count#46] +Results [5]: [count#47, sum#48, count#49, sum#50, count#51] + +(18) Exchange +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(19) HashAggregate [codegen id : 2] +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#39)), avg(UnscaledValue(ss_net_paid#40))] +Aggregate Attributes [3]: [count(1)#52, avg(UnscaledValue(ss_ext_discount_amt#39))#53, avg(UnscaledValue(ss_net_paid#40))#54] +Results [3]: [count(1)#52 AS count(1)#55, cast((avg(UnscaledValue(ss_ext_discount_amt#39))#53 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#56, cast((avg(UnscaledValue(ss_net_paid#40))#54 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#57] + +(20) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#55, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#56, avg(ss_net_paid), avg(ss_net_paid)#57) AS mergedValue#58] +Input [3]: [count(1)#55, avg(ss_ext_discount_amt)#56, avg(ss_net_paid)#57] + +Subquery:5 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] + +Subquery:6 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] + +Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#8, [id=#9] +* Project (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * Filter (23) + +- * ColumnarToRow (22) + +- Scan parquet spark_catalog.default.store_sales (21) + + +(21) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] + +(23) Filter [codegen id : 1] +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] +Condition : ((isnotnull(ss_quantity#59) AND (ss_quantity#59 >= 41)) AND (ss_quantity#59 <= 60)) + +(24) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#60, ss_net_paid#61] +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] + +(25) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#60, ss_net_paid#61] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#60)), partial_avg(UnscaledValue(ss_net_paid#61))] +Aggregate Attributes [5]: [count#63, sum#64, count#65, sum#66, count#67] +Results [5]: [count#68, sum#69, count#70, sum#71, count#72] + +(26) Exchange +Input [5]: [count#68, sum#69, count#70, sum#71, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] + +(27) HashAggregate [codegen id : 2] +Input [5]: [count#68, sum#69, count#70, sum#71, count#72] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#60)), avg(UnscaledValue(ss_net_paid#61))] +Aggregate Attributes [3]: [count(1)#73, avg(UnscaledValue(ss_ext_discount_amt#60))#74, avg(UnscaledValue(ss_net_paid#61))#75] +Results [3]: [count(1)#73 AS count(1)#76, cast((avg(UnscaledValue(ss_ext_discount_amt#60))#74 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#77, cast((avg(UnscaledValue(ss_net_paid#61))#75 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#78] + +(28) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#76, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#77, avg(ss_net_paid), avg(ss_net_paid)#78) AS mergedValue#79] +Input [3]: [count(1)#76, avg(ss_ext_discount_amt)#77, avg(ss_net_paid)#78] + +Subquery:8 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] + +Subquery:9 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] + +Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +* Project (36) ++- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet spark_catalog.default.store_sales (29) + + +(29) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] + +(31) Filter [codegen id : 1] +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] +Condition : ((isnotnull(ss_quantity#80) AND (ss_quantity#80 >= 61)) AND (ss_quantity#80 <= 80)) + +(32) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#81, ss_net_paid#82] +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] + +(33) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#81, ss_net_paid#82] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#81)), partial_avg(UnscaledValue(ss_net_paid#82))] +Aggregate Attributes [5]: [count#84, sum#85, count#86, sum#87, count#88] +Results [5]: [count#89, sum#90, count#91, sum#92, count#93] + +(34) Exchange +Input [5]: [count#89, sum#90, count#91, sum#92, count#93] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(35) HashAggregate [codegen id : 2] +Input [5]: [count#89, sum#90, count#91, sum#92, count#93] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#81)), avg(UnscaledValue(ss_net_paid#82))] +Aggregate Attributes [3]: [count(1)#94, avg(UnscaledValue(ss_ext_discount_amt#81))#95, avg(UnscaledValue(ss_net_paid#82))#96] +Results [3]: [count(1)#94 AS count(1)#97, cast((avg(UnscaledValue(ss_ext_discount_amt#81))#95 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#98, cast((avg(UnscaledValue(ss_net_paid#82))#96 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#99] + +(36) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#97, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#98, avg(ss_net_paid), avg(ss_net_paid)#99) AS mergedValue#100] +Input [3]: [count(1)#97, avg(ss_ext_discount_amt)#98, avg(ss_net_paid)#99] + +Subquery:11 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] + +Subquery:12 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] + +Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#14, [id=#15] +* Project (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.store_sales (37) + + +(37) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] + +(39) Filter [codegen id : 1] +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] +Condition : ((isnotnull(ss_quantity#101) AND (ss_quantity#101 >= 81)) AND (ss_quantity#101 <= 100)) + +(40) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#102, ss_net_paid#103] +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] + +(41) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#102, ss_net_paid#103] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#102)), partial_avg(UnscaledValue(ss_net_paid#103))] +Aggregate Attributes [5]: [count#105, sum#106, count#107, sum#108, count#109] +Results [5]: [count#110, sum#111, count#112, sum#113, count#114] + +(42) Exchange +Input [5]: [count#110, sum#111, count#112, sum#113, count#114] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(43) HashAggregate [codegen id : 2] +Input [5]: [count#110, sum#111, count#112, sum#113, count#114] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#102)), avg(UnscaledValue(ss_net_paid#103))] +Aggregate Attributes [3]: [count(1)#115, avg(UnscaledValue(ss_ext_discount_amt#102))#116, avg(UnscaledValue(ss_net_paid#103))#117] +Results [3]: [count(1)#115 AS count(1)#118, cast((avg(UnscaledValue(ss_ext_discount_amt#102))#116 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#119, cast((avg(UnscaledValue(ss_net_paid#103))#117 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#120] + +(44) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#118, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#119, avg(ss_net_paid), avg(ss_net_paid)#120) AS mergedValue#121] +Input [3]: [count(1)#118, avg(ss_ext_discount_amt)#119, avg(ss_net_paid)#120] + +Subquery:14 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] + +Subquery:15 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_datafusion/simplified.txt new file mode 100644 index 0000000000..817fb0007f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_datafusion/simplified.txt @@ -0,0 +1,81 @@ +WholeStageCodegen (1) + Project + Subquery #1 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #1 + ReusedSubquery [mergedValue] #1 + Subquery #2 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #2 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #2 + ReusedSubquery [mergedValue] #2 + Subquery #3 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #3 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #3 + ReusedSubquery [mergedValue] #3 + Subquery #4 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #4 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #4 + ReusedSubquery [mergedValue] #4 + Subquery #5 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #5 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #5 + ReusedSubquery [mergedValue] #5 + Filter [r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.reason [r_reason_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..e32db67408 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_iceberg_compat/explain.txt @@ -0,0 +1,303 @@ +== Physical Plan == +* Project (4) ++- * Filter (3) + +- * ColumnarToRow (2) + +- Scan parquet spark_catalog.default.reason (1) + + +(1) Scan parquet spark_catalog.default.reason +Output [1]: [r_reason_sk#1] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [1]: [r_reason_sk#1] + +(3) Filter [codegen id : 1] +Input [1]: [r_reason_sk#1] +Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) + +(4) Project [codegen id : 1] +Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_net_paid) END AS bucket1#4, CASE WHEN (Subquery scalar-subquery#5, [id=#6].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_net_paid) END AS bucket2#7, CASE WHEN (Subquery scalar-subquery#8, [id=#9].count(1) > 365541424) THEN ReusedSubquery Subquery scalar-subquery#8, [id=#9].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#8, [id=#9].avg(ss_net_paid) END AS bucket3#10, CASE WHEN (Subquery scalar-subquery#11, [id=#12].count(1) > 216357808) THEN ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_net_paid) END AS bucket4#13, CASE WHEN (Subquery scalar-subquery#14, [id=#15].count(1) > 184483884) THEN ReusedSubquery Subquery scalar-subquery#14, [id=#15].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#14, [id=#15].avg(ss_net_paid) END AS bucket5#16] +Input [1]: [r_reason_sk#1] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] +* Project (12) ++- * HashAggregate (11) + +- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet spark_catalog.default.store_sales (5) + + +(5) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] + +(7) Filter [codegen id : 1] +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_quantity#17) AND (ss_quantity#17 >= 1)) AND (ss_quantity#17 <= 20)) + +(8) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#18, ss_net_paid#19] +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] + +(9) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#18, ss_net_paid#19] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#18)), partial_avg(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [5]: [count#21, sum#22, count#23, sum#24, count#25] +Results [5]: [count#26, sum#27, count#28, sum#29, count#30] + +(10) Exchange +Input [5]: [count#26, sum#27, count#28, sum#29, count#30] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=1] + +(11) HashAggregate [codegen id : 2] +Input [5]: [count#26, sum#27, count#28, sum#29, count#30] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#18)), avg(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [3]: [count(1)#31, avg(UnscaledValue(ss_ext_discount_amt#18))#32, avg(UnscaledValue(ss_net_paid#19))#33] +Results [3]: [count(1)#31 AS count(1)#34, cast((avg(UnscaledValue(ss_ext_discount_amt#18))#32 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#35, cast((avg(UnscaledValue(ss_net_paid#19))#33 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#36] + +(12) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#34, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#35, avg(ss_net_paid), avg(ss_net_paid)#36) AS mergedValue#37] +Input [3]: [count(1)#34, avg(ss_ext_discount_amt)#35, avg(ss_net_paid)#36] + +Subquery:2 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] + +Subquery:3 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] + +Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#5, [id=#6] +* Project (20) ++- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet spark_catalog.default.store_sales (13) + + +(13) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] + +(15) Filter [codegen id : 1] +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] +Condition : ((isnotnull(ss_quantity#38) AND (ss_quantity#38 >= 21)) AND (ss_quantity#38 <= 40)) + +(16) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#39, ss_net_paid#40] +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] + +(17) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#39, ss_net_paid#40] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#39)), partial_avg(UnscaledValue(ss_net_paid#40))] +Aggregate Attributes [5]: [count#42, sum#43, count#44, sum#45, count#46] +Results [5]: [count#47, sum#48, count#49, sum#50, count#51] + +(18) Exchange +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] + +(19) HashAggregate [codegen id : 2] +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#39)), avg(UnscaledValue(ss_net_paid#40))] +Aggregate Attributes [3]: [count(1)#52, avg(UnscaledValue(ss_ext_discount_amt#39))#53, avg(UnscaledValue(ss_net_paid#40))#54] +Results [3]: [count(1)#52 AS count(1)#55, cast((avg(UnscaledValue(ss_ext_discount_amt#39))#53 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#56, cast((avg(UnscaledValue(ss_net_paid#40))#54 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#57] + +(20) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#55, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#56, avg(ss_net_paid), avg(ss_net_paid)#57) AS mergedValue#58] +Input [3]: [count(1)#55, avg(ss_ext_discount_amt)#56, avg(ss_net_paid)#57] + +Subquery:5 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] + +Subquery:6 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] + +Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#8, [id=#9] +* Project (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * Filter (23) + +- * ColumnarToRow (22) + +- Scan parquet spark_catalog.default.store_sales (21) + + +(21) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] + +(23) Filter [codegen id : 1] +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] +Condition : ((isnotnull(ss_quantity#59) AND (ss_quantity#59 >= 41)) AND (ss_quantity#59 <= 60)) + +(24) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#60, ss_net_paid#61] +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] + +(25) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#60, ss_net_paid#61] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#60)), partial_avg(UnscaledValue(ss_net_paid#61))] +Aggregate Attributes [5]: [count#63, sum#64, count#65, sum#66, count#67] +Results [5]: [count#68, sum#69, count#70, sum#71, count#72] + +(26) Exchange +Input [5]: [count#68, sum#69, count#70, sum#71, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] + +(27) HashAggregate [codegen id : 2] +Input [5]: [count#68, sum#69, count#70, sum#71, count#72] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#60)), avg(UnscaledValue(ss_net_paid#61))] +Aggregate Attributes [3]: [count(1)#73, avg(UnscaledValue(ss_ext_discount_amt#60))#74, avg(UnscaledValue(ss_net_paid#61))#75] +Results [3]: [count(1)#73 AS count(1)#76, cast((avg(UnscaledValue(ss_ext_discount_amt#60))#74 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#77, cast((avg(UnscaledValue(ss_net_paid#61))#75 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#78] + +(28) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#76, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#77, avg(ss_net_paid), avg(ss_net_paid)#78) AS mergedValue#79] +Input [3]: [count(1)#76, avg(ss_ext_discount_amt)#77, avg(ss_net_paid)#78] + +Subquery:8 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] + +Subquery:9 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] + +Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +* Project (36) ++- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet spark_catalog.default.store_sales (29) + + +(29) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] + +(31) Filter [codegen id : 1] +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] +Condition : ((isnotnull(ss_quantity#80) AND (ss_quantity#80 >= 61)) AND (ss_quantity#80 <= 80)) + +(32) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#81, ss_net_paid#82] +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] + +(33) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#81, ss_net_paid#82] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#81)), partial_avg(UnscaledValue(ss_net_paid#82))] +Aggregate Attributes [5]: [count#84, sum#85, count#86, sum#87, count#88] +Results [5]: [count#89, sum#90, count#91, sum#92, count#93] + +(34) Exchange +Input [5]: [count#89, sum#90, count#91, sum#92, count#93] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(35) HashAggregate [codegen id : 2] +Input [5]: [count#89, sum#90, count#91, sum#92, count#93] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#81)), avg(UnscaledValue(ss_net_paid#82))] +Aggregate Attributes [3]: [count(1)#94, avg(UnscaledValue(ss_ext_discount_amt#81))#95, avg(UnscaledValue(ss_net_paid#82))#96] +Results [3]: [count(1)#94 AS count(1)#97, cast((avg(UnscaledValue(ss_ext_discount_amt#81))#95 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#98, cast((avg(UnscaledValue(ss_net_paid#82))#96 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#99] + +(36) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#97, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#98, avg(ss_net_paid), avg(ss_net_paid)#99) AS mergedValue#100] +Input [3]: [count(1)#97, avg(ss_ext_discount_amt)#98, avg(ss_net_paid)#99] + +Subquery:11 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] + +Subquery:12 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] + +Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#14, [id=#15] +* Project (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.store_sales (37) + + +(37) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] + +(39) Filter [codegen id : 1] +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] +Condition : ((isnotnull(ss_quantity#101) AND (ss_quantity#101 >= 81)) AND (ss_quantity#101 <= 100)) + +(40) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#102, ss_net_paid#103] +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] + +(41) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#102, ss_net_paid#103] +Keys: [] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#102)), partial_avg(UnscaledValue(ss_net_paid#103))] +Aggregate Attributes [5]: [count#105, sum#106, count#107, sum#108, count#109] +Results [5]: [count#110, sum#111, count#112, sum#113, count#114] + +(42) Exchange +Input [5]: [count#110, sum#111, count#112, sum#113, count#114] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(43) HashAggregate [codegen id : 2] +Input [5]: [count#110, sum#111, count#112, sum#113, count#114] +Keys: [] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#102)), avg(UnscaledValue(ss_net_paid#103))] +Aggregate Attributes [3]: [count(1)#115, avg(UnscaledValue(ss_ext_discount_amt#102))#116, avg(UnscaledValue(ss_net_paid#103))#117] +Results [3]: [count(1)#115 AS count(1)#118, cast((avg(UnscaledValue(ss_ext_discount_amt#102))#116 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#119, cast((avg(UnscaledValue(ss_net_paid#103))#117 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#120] + +(44) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#118, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#119, avg(ss_net_paid), avg(ss_net_paid)#120) AS mergedValue#121] +Input [3]: [count(1)#118, avg(ss_ext_discount_amt)#119, avg(ss_net_paid)#120] + +Subquery:14 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] + +Subquery:15 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..817fb0007f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.native_iceberg_compat/simplified.txt @@ -0,0 +1,81 @@ +WholeStageCodegen (1) + Project + Subquery #1 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #1 + ReusedSubquery [mergedValue] #1 + Subquery #2 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #2 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #2 + ReusedSubquery [mergedValue] #2 + Subquery #3 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #3 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #3 + ReusedSubquery [mergedValue] #3 + Subquery #4 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #4 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #4 + ReusedSubquery [mergedValue] #4 + Subquery #5 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #5 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #5 + ReusedSubquery [mergedValue] #5 + Filter [r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.reason [r_reason_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_datafusion/explain.txt new file mode 100644 index 0000000000..7f295e73ec --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_datafusion/explain.txt @@ -0,0 +1,292 @@ +== Physical Plan == +* Project (51) ++- * BroadcastNestedLoopJoin Inner BuildRight (50) + :- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (18) + : : +- * BroadcastHashJoin Inner BuildRight (17) + : : :- * Project (11) + : : : +- * BroadcastHashJoin Inner BuildRight (10) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : +- BroadcastExchange (9) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet spark_catalog.default.household_demographics (5) + : : +- BroadcastExchange (16) + : : +- * Project (15) + : : +- * Filter (14) + : : +- * ColumnarToRow (13) + : : +- Scan parquet spark_catalog.default.time_dim (12) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet spark_catalog.default.web_page (19) + +- BroadcastExchange (49) + +- * HashAggregate (48) + +- Exchange (47) + +- * HashAggregate (46) + +- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (35) + : : +- * BroadcastHashJoin Inner BuildRight (34) + : : :- * Project (32) + : : : +- * Filter (31) + : : : +- * ColumnarToRow (30) + : : : +- Scan parquet spark_catalog.default.web_sales (29) + : : +- ReusedExchange (33) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet spark_catalog.default.time_dim (36) + +- ReusedExchange (43) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(4) Project [codegen id : 4] +Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] + +(5) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#5, hd_dep_count#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] + +(7) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Condition : ((isnotnull(hd_dep_count#6) AND (hd_dep_count#6 = 6)) AND isnotnull(hd_demo_sk#5)) + +(8) Project [codegen id : 1] +Output [1]: [hd_demo_sk#5] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] + +(9) BroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(10) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_ship_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#5] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 4] +Output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#5] + +(12) Scan parquet spark_catalog.default.time_dim +Output [2]: [t_time_sk#7, t_hour#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [t_time_sk#7, t_hour#8] + +(14) Filter [codegen id : 2] +Input [2]: [t_time_sk#7, t_hour#8] +Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 8)) AND (t_hour#8 <= 9)) AND isnotnull(t_time_sk#7)) + +(15) Project [codegen id : 2] +Output [1]: [t_time_sk#7] +Input [2]: [t_time_sk#7, t_hour#8] + +(16) BroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_time_sk#1] +Right keys [1]: [t_time_sk#7] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 4] +Output [1]: [ws_web_page_sk#3] +Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] + +(19) Scan parquet spark_catalog.default.web_page +Output [2]: [wp_web_page_sk#9, wp_char_count#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,5200), IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 3] +Input [2]: [wp_web_page_sk#9, wp_char_count#10] + +(21) Filter [codegen id : 3] +Input [2]: [wp_web_page_sk#9, wp_char_count#10] +Condition : (((isnotnull(wp_char_count#10) AND (wp_char_count#10 >= 5000)) AND (wp_char_count#10 <= 5200)) AND isnotnull(wp_web_page_sk#9)) + +(22) Project [codegen id : 3] +Output [1]: [wp_web_page_sk#9] +Input [2]: [wp_web_page_sk#9, wp_char_count#10] + +(23) BroadcastExchange +Input [1]: [wp_web_page_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#9] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output: [] +Input [2]: [ws_web_page_sk#3, wp_web_page_sk#9] + +(26) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#11] +Results [1]: [count#12] + +(27) Exchange +Input [1]: [count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(28) HashAggregate [codegen id : 10] +Input [1]: [count#12] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#13] +Results [1]: [count(1)#13 AS amc#14] + +(29) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 8] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] + +(31) Filter [codegen id : 8] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] +Condition : ((isnotnull(ws_ship_hdemo_sk#16) AND isnotnull(ws_sold_time_sk#15)) AND isnotnull(ws_web_page_sk#17)) + +(32) Project [codegen id : 8] +Output [3]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] + +(33) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#19] + +(34) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_ship_hdemo_sk#16] +Right keys [1]: [hd_demo_sk#19] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 8] +Output [2]: [ws_sold_time_sk#15, ws_web_page_sk#17] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, hd_demo_sk#19] + +(36) Scan parquet spark_catalog.default.time_dim +Output [2]: [t_time_sk#20, t_hour#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [2]: [t_time_sk#20, t_hour#21] + +(38) Filter [codegen id : 6] +Input [2]: [t_time_sk#20, t_hour#21] +Condition : (((isnotnull(t_hour#21) AND (t_hour#21 >= 19)) AND (t_hour#21 <= 20)) AND isnotnull(t_time_sk#20)) + +(39) Project [codegen id : 6] +Output [1]: [t_time_sk#20] +Input [2]: [t_time_sk#20, t_hour#21] + +(40) BroadcastExchange +Input [1]: [t_time_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_time_sk#15] +Right keys [1]: [t_time_sk#20] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 8] +Output [1]: [ws_web_page_sk#17] +Input [3]: [ws_sold_time_sk#15, ws_web_page_sk#17, t_time_sk#20] + +(43) ReusedExchange [Reuses operator id: 23] +Output [1]: [wp_web_page_sk#22] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_web_page_sk#17] +Right keys [1]: [wp_web_page_sk#22] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 8] +Output: [] +Input [2]: [ws_web_page_sk#17, wp_web_page_sk#22] + +(46) HashAggregate [codegen id : 8] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] + +(47) Exchange +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(48) HashAggregate [codegen id : 9] +Input [1]: [count#24] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS pmc#26] + +(49) BroadcastExchange +Input [1]: [pmc#26] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(50) BroadcastNestedLoopJoin [codegen id : 10] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 10] +Output [1]: [(cast(amc#14 as decimal(15,4)) / cast(pmc#26 as decimal(15,4))) AS am_pm_ratio#27] +Input [2]: [amc#14, pmc#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9cf499b546 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_datafusion/simplified.txt @@ -0,0 +1,74 @@ +WholeStageCodegen (10) + Project [amc,pmc] + BroadcastNestedLoopJoin + HashAggregate [count] [count(1),amc,count] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [wp_web_page_sk] + Filter [wp_char_count,wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_page [wp_web_page_sk,wp_char_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + HashAggregate [count] [count(1),pmc,count] + InputAdapter + Exchange #6 + WholeStageCodegen (8) + HashAggregate [count,count] + Project + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour] + InputAdapter + ReusedExchange [wp_web_page_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..7f295e73ec --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_iceberg_compat/explain.txt @@ -0,0 +1,292 @@ +== Physical Plan == +* Project (51) ++- * BroadcastNestedLoopJoin Inner BuildRight (50) + :- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (18) + : : +- * BroadcastHashJoin Inner BuildRight (17) + : : :- * Project (11) + : : : +- * BroadcastHashJoin Inner BuildRight (10) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : +- BroadcastExchange (9) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet spark_catalog.default.household_demographics (5) + : : +- BroadcastExchange (16) + : : +- * Project (15) + : : +- * Filter (14) + : : +- * ColumnarToRow (13) + : : +- Scan parquet spark_catalog.default.time_dim (12) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet spark_catalog.default.web_page (19) + +- BroadcastExchange (49) + +- * HashAggregate (48) + +- Exchange (47) + +- * HashAggregate (46) + +- * Project (45) + +- * BroadcastHashJoin Inner BuildRight (44) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (35) + : : +- * BroadcastHashJoin Inner BuildRight (34) + : : :- * Project (32) + : : : +- * Filter (31) + : : : +- * ColumnarToRow (30) + : : : +- Scan parquet spark_catalog.default.web_sales (29) + : : +- ReusedExchange (33) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet spark_catalog.default.time_dim (36) + +- ReusedExchange (43) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(4) Project [codegen id : 4] +Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4] + +(5) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#5, hd_dep_count#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] + +(7) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Condition : ((isnotnull(hd_dep_count#6) AND (hd_dep_count#6 = 6)) AND isnotnull(hd_demo_sk#5)) + +(8) Project [codegen id : 1] +Output [1]: [hd_demo_sk#5] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] + +(9) BroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(10) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_ship_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#5] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 4] +Output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#5] + +(12) Scan parquet spark_catalog.default.time_dim +Output [2]: [t_time_sk#7, t_hour#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [t_time_sk#7, t_hour#8] + +(14) Filter [codegen id : 2] +Input [2]: [t_time_sk#7, t_hour#8] +Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 8)) AND (t_hour#8 <= 9)) AND isnotnull(t_time_sk#7)) + +(15) Project [codegen id : 2] +Output [1]: [t_time_sk#7] +Input [2]: [t_time_sk#7, t_hour#8] + +(16) BroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_time_sk#1] +Right keys [1]: [t_time_sk#7] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 4] +Output [1]: [ws_web_page_sk#3] +Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] + +(19) Scan parquet spark_catalog.default.web_page +Output [2]: [wp_web_page_sk#9, wp_char_count#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,5200), IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 3] +Input [2]: [wp_web_page_sk#9, wp_char_count#10] + +(21) Filter [codegen id : 3] +Input [2]: [wp_web_page_sk#9, wp_char_count#10] +Condition : (((isnotnull(wp_char_count#10) AND (wp_char_count#10 >= 5000)) AND (wp_char_count#10 <= 5200)) AND isnotnull(wp_web_page_sk#9)) + +(22) Project [codegen id : 3] +Output [1]: [wp_web_page_sk#9] +Input [2]: [wp_web_page_sk#9, wp_char_count#10] + +(23) BroadcastExchange +Input [1]: [wp_web_page_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#9] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output: [] +Input [2]: [ws_web_page_sk#3, wp_web_page_sk#9] + +(26) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#11] +Results [1]: [count#12] + +(27) Exchange +Input [1]: [count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(28) HashAggregate [codegen id : 10] +Input [1]: [count#12] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#13] +Results [1]: [count(1)#13 AS amc#14] + +(29) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 8] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] + +(31) Filter [codegen id : 8] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] +Condition : ((isnotnull(ws_ship_hdemo_sk#16) AND isnotnull(ws_sold_time_sk#15)) AND isnotnull(ws_web_page_sk#17)) + +(32) Project [codegen id : 8] +Output [3]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] + +(33) ReusedExchange [Reuses operator id: 9] +Output [1]: [hd_demo_sk#19] + +(34) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_ship_hdemo_sk#16] +Right keys [1]: [hd_demo_sk#19] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 8] +Output [2]: [ws_sold_time_sk#15, ws_web_page_sk#17] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, hd_demo_sk#19] + +(36) Scan parquet spark_catalog.default.time_dim +Output [2]: [t_time_sk#20, t_hour#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [2]: [t_time_sk#20, t_hour#21] + +(38) Filter [codegen id : 6] +Input [2]: [t_time_sk#20, t_hour#21] +Condition : (((isnotnull(t_hour#21) AND (t_hour#21 >= 19)) AND (t_hour#21 <= 20)) AND isnotnull(t_time_sk#20)) + +(39) Project [codegen id : 6] +Output [1]: [t_time_sk#20] +Input [2]: [t_time_sk#20, t_hour#21] + +(40) BroadcastExchange +Input [1]: [t_time_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_time_sk#15] +Right keys [1]: [t_time_sk#20] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 8] +Output [1]: [ws_web_page_sk#17] +Input [3]: [ws_sold_time_sk#15, ws_web_page_sk#17, t_time_sk#20] + +(43) ReusedExchange [Reuses operator id: 23] +Output [1]: [wp_web_page_sk#22] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_web_page_sk#17] +Right keys [1]: [wp_web_page_sk#22] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 8] +Output: [] +Input [2]: [ws_web_page_sk#17, wp_web_page_sk#22] + +(46) HashAggregate [codegen id : 8] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] + +(47) Exchange +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(48) HashAggregate [codegen id : 9] +Input [1]: [count#24] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS pmc#26] + +(49) BroadcastExchange +Input [1]: [pmc#26] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(50) BroadcastNestedLoopJoin [codegen id : 10] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 10] +Output [1]: [(cast(amc#14 as decimal(15,4)) / cast(pmc#26 as decimal(15,4))) AS am_pm_ratio#27] +Input [2]: [amc#14, pmc#26] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9cf499b546 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.native_iceberg_compat/simplified.txt @@ -0,0 +1,74 @@ +WholeStageCodegen (10) + Project [amc,pmc] + BroadcastNestedLoopJoin + HashAggregate [count] [count(1),amc,count] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [wp_web_page_sk] + Filter [wp_char_count,wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_page [wp_web_page_sk,wp_char_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (9) + HashAggregate [count] [count(1),pmc,count] + InputAdapter + Exchange #6 + WholeStageCodegen (8) + HashAggregate [count,count] + Project + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Project [t_time_sk] + Filter [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour] + InputAdapter + ReusedExchange [wp_web_page_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_datafusion/explain.txt new file mode 100644 index 0000000000..1083b68c93 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_datafusion/explain.txt @@ -0,0 +1,271 @@ +== Physical Plan == +* Sort (47) ++- Exchange (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (22) + : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (9) + : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet spark_catalog.default.call_center (1) + : : : : : +- BroadcastExchange (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.catalog_returns (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * Filter (12) + : : : : +- * ColumnarToRow (11) + : : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet spark_catalog.default.customer (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * Filter (25) + : : +- * ColumnarToRow (24) + : : +- Scan parquet spark_catalog.default.customer_address (23) + : +- BroadcastExchange (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet spark_catalog.default.customer_demographics (30) + +- BroadcastExchange (40) + +- * Project (39) + +- * Filter (38) + +- * ColumnarToRow (37) + +- Scan parquet spark_catalog.default.household_demographics (36) + + +(1) Scan parquet spark_catalog.default.call_center +Output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] + +(3) Filter [codegen id : 7] +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Condition : isnotnull(cc_call_center_sk#1) + +(4) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#8)] +PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] + +(6) Filter [codegen id : 1] +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Condition : (isnotnull(cr_call_center_sk#6) AND isnotnull(cr_returning_customer_sk#5)) + +(7) BroadcastExchange +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cc_call_center_sk#1] +Right keys [1]: [cr_call_center_sk#6] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8] +Input [8]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#11)) AND (d_year#10 = 1998)) AND (d_moy#11 = 11)) AND isnotnull(d_date_sk#9)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cr_returned_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 7] +Output [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7] +Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8, d_date_sk#9] + +(17) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(19) Filter [codegen id : 3] +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Condition : (((isnotnull(c_customer_sk#12) AND isnotnull(c_current_addr_sk#15)) AND isnotnull(c_current_cdemo_sk#13)) AND isnotnull(c_current_hdemo_sk#14)) + +(20) BroadcastExchange +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cr_returning_customer_sk#5] +Right keys [1]: [c_customer_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 7] +Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(23) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#16, ca_gmt_offset#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] + +(25) Filter [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] +Condition : ((isnotnull(ca_gmt_offset#17) AND (ca_gmt_offset#17 = -7.00)) AND isnotnull(ca_address_sk#16)) + +(26) Project [codegen id : 4] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] + +(27) BroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#15] +Right keys [1]: [ca_address_sk#16] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14] +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15, ca_address_sk#16] + +(30) Scan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown )),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree ))), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(32) Filter [codegen id : 5] +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Condition : ((((cd_marital_status#19 = M) AND (cd_education_status#20 = Unknown )) OR ((cd_marital_status#19 = W) AND (cd_education_status#20 = Advanced Degree ))) AND isnotnull(cd_demo_sk#18)) + +(33) BroadcastExchange +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#13] +Right keys [1]: [cd_demo_sk#18] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 7] +Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20] +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(36) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#21, hd_buy_potential#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] + +(38) Filter [codegen id : 6] +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] +Condition : ((isnotnull(hd_buy_potential#22) AND StartsWith(hd_buy_potential#22, Unknown)) AND isnotnull(hd_demo_sk#21)) + +(39) Project [codegen id : 6] +Output [1]: [hd_demo_sk#21] +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] + +(40) BroadcastExchange +Input [1]: [hd_demo_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_hdemo_sk#14] +Right keys [1]: [hd_demo_sk#21] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20, hd_demo_sk#21] + +(43) HashAggregate [codegen id : 7] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#7))] +Aggregate Attributes [1]: [sum#23] +Results [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#24] + +(44) Exchange +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#24] +Arguments: hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 8] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#24] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20] +Functions [1]: [sum(UnscaledValue(cr_net_loss#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#7))#25] +Results [4]: [cc_call_center_id#2 AS Call_Center#26, cc_name#3 AS Call_Center_Name#27, cc_manager#4 AS Manager#28, MakeDecimal(sum(UnscaledValue(cr_net_loss#7))#25,17,2) AS Returns_Loss#29] + +(46) Exchange +Input [4]: [Call_Center#26, Call_Center_Name#27, Manager#28, Returns_Loss#29] +Arguments: rangepartitioning(Returns_Loss#29 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(47) Sort [codegen id : 9] +Input [4]: [Call_Center#26, Call_Center_Name#27, Manager#28, Returns_Loss#29] +Arguments: [Returns_Loss#29 DESC NULLS LAST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ac1b59c58b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_datafusion/simplified.txt @@ -0,0 +1,71 @@ +WholeStageCodegen (9) + Sort [Returns_Loss] + InputAdapter + Exchange [Returns_Loss] #1 + WholeStageCodegen (8) + HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,sum] [sum(UnscaledValue(cr_net_loss)),Call_Center,Call_Center_Name,Manager,Returns_Loss,sum] + InputAdapter + Exchange [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status] #2 + WholeStageCodegen (7) + HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,cr_net_loss] [sum,sum] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,cd_marital_status,cd_education_status] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_hdemo_sk,cd_marital_status,cd_education_status] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,cr_returned_date_sk] + BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] + Filter [cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [cr_call_center_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..1083b68c93 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_iceberg_compat/explain.txt @@ -0,0 +1,271 @@ +== Physical Plan == +* Sort (47) ++- Exchange (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (22) + : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (9) + : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet spark_catalog.default.call_center (1) + : : : : : +- BroadcastExchange (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.catalog_returns (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * Filter (12) + : : : : +- * ColumnarToRow (11) + : : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet spark_catalog.default.customer (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * Filter (25) + : : +- * ColumnarToRow (24) + : : +- Scan parquet spark_catalog.default.customer_address (23) + : +- BroadcastExchange (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet spark_catalog.default.customer_demographics (30) + +- BroadcastExchange (40) + +- * Project (39) + +- * Filter (38) + +- * ColumnarToRow (37) + +- Scan parquet spark_catalog.default.household_demographics (36) + + +(1) Scan parquet spark_catalog.default.call_center +Output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] + +(3) Filter [codegen id : 7] +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Condition : isnotnull(cc_call_center_sk#1) + +(4) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#8)] +PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] + +(6) Filter [codegen id : 1] +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Condition : (isnotnull(cr_call_center_sk#6) AND isnotnull(cr_returning_customer_sk#5)) + +(7) BroadcastExchange +Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cc_call_center_sk#1] +Right keys [1]: [cr_call_center_sk#6] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8] +Input [8]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#11)) AND (d_year#10 = 1998)) AND (d_moy#11 = 11)) AND isnotnull(d_date_sk#9)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cr_returned_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 7] +Output [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7] +Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8, d_date_sk#9] + +(17) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(19) Filter [codegen id : 3] +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Condition : (((isnotnull(c_customer_sk#12) AND isnotnull(c_current_addr_sk#15)) AND isnotnull(c_current_cdemo_sk#13)) AND isnotnull(c_current_hdemo_sk#14)) + +(20) BroadcastExchange +Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cr_returning_customer_sk#5] +Right keys [1]: [c_customer_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 7] +Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] + +(23) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#16, ca_gmt_offset#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] + +(25) Filter [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] +Condition : ((isnotnull(ca_gmt_offset#17) AND (ca_gmt_offset#17 = -7.00)) AND isnotnull(ca_address_sk#16)) + +(26) Project [codegen id : 4] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_gmt_offset#17] + +(27) BroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#15] +Right keys [1]: [ca_address_sk#16] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14] +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15, ca_address_sk#16] + +(30) Scan parquet spark_catalog.default.customer_demographics +Output [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown )),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree ))), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(32) Filter [codegen id : 5] +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Condition : ((((cd_marital_status#19 = M) AND (cd_education_status#20 = Unknown )) OR ((cd_marital_status#19 = W) AND (cd_education_status#20 = Advanced Degree ))) AND isnotnull(cd_demo_sk#18)) + +(33) BroadcastExchange +Input [3]: [cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#13] +Right keys [1]: [cd_demo_sk#18] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 7] +Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20] +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, cd_demo_sk#18, cd_marital_status#19, cd_education_status#20] + +(36) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#21, hd_buy_potential#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] + +(38) Filter [codegen id : 6] +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] +Condition : ((isnotnull(hd_buy_potential#22) AND StartsWith(hd_buy_potential#22, Unknown)) AND isnotnull(hd_demo_sk#21)) + +(39) Project [codegen id : 6] +Output [1]: [hd_demo_sk#21] +Input [2]: [hd_demo_sk#21, hd_buy_potential#22] + +(40) BroadcastExchange +Input [1]: [hd_demo_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_hdemo_sk#14] +Right keys [1]: [hd_demo_sk#21] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#19, cd_education_status#20, hd_demo_sk#21] + +(43) HashAggregate [codegen id : 7] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#19, cd_education_status#20] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#7))] +Aggregate Attributes [1]: [sum#23] +Results [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#24] + +(44) Exchange +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#24] +Arguments: hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 8] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20, sum#24] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#19, cd_education_status#20] +Functions [1]: [sum(UnscaledValue(cr_net_loss#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#7))#25] +Results [4]: [cc_call_center_id#2 AS Call_Center#26, cc_name#3 AS Call_Center_Name#27, cc_manager#4 AS Manager#28, MakeDecimal(sum(UnscaledValue(cr_net_loss#7))#25,17,2) AS Returns_Loss#29] + +(46) Exchange +Input [4]: [Call_Center#26, Call_Center_Name#27, Manager#28, Returns_Loss#29] +Arguments: rangepartitioning(Returns_Loss#29 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(47) Sort [codegen id : 9] +Input [4]: [Call_Center#26, Call_Center_Name#27, Manager#28, Returns_Loss#29] +Arguments: [Returns_Loss#29 DESC NULLS LAST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..ac1b59c58b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.native_iceberg_compat/simplified.txt @@ -0,0 +1,71 @@ +WholeStageCodegen (9) + Sort [Returns_Loss] + InputAdapter + Exchange [Returns_Loss] #1 + WholeStageCodegen (8) + HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,sum] [sum(UnscaledValue(cr_net_loss)),Call_Center,Call_Center_Name,Manager,Returns_Loss,sum] + InputAdapter + Exchange [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status] #2 + WholeStageCodegen (7) + HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,cr_net_loss] [sum,sum] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,cd_marital_status,cd_education_status] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_hdemo_sk,cd_marital_status,cd_education_status] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss,cr_returned_date_sk] + BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] + Filter [cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [cr_call_center_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_datafusion/explain.txt new file mode 100644 index 0000000000..3175a02337 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_datafusion/explain.txt @@ -0,0 +1,197 @@ +== Physical Plan == +* HashAggregate (33) ++- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.item (4) + : +- BroadcastExchange (25) + : +- * Filter (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * BroadcastHashJoin Inner BuildRight (19) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.web_sales (11) + : +- BroadcastExchange (18) + : +- * Project (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet spark_catalog.default.date_dim (14) + +- ReusedExchange (28) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 6] +Input [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Condition : (isnotnull(ws_item_sk#1) AND isnotnull(ws_ext_discount_amt#2)) + +(4) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 350)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [i_item_sk#4] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(8) BroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 6] +Output [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] +Input [4]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] + +(11) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#8)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] + +(13) Filter [codegen id : 3] +Input [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Condition : isnotnull(ws_item_sk#6) + +(14) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] + +(16) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-01-27)) AND (d_date#10 <= 2000-04-26)) AND isnotnull(d_date_sk#9)) + +(17) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_date#10] + +(18) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(19) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 3] +Output [2]: [ws_item_sk#6, ws_ext_discount_amt#7] +Input [4]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8, d_date_sk#9] + +(21) HashAggregate [codegen id : 3] +Input [2]: [ws_item_sk#6, ws_ext_discount_amt#7] +Keys [1]: [ws_item_sk#6] +Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#7))] +Aggregate Attributes [2]: [sum#11, count#12] +Results [3]: [ws_item_sk#6, sum#13, count#14] + +(22) Exchange +Input [3]: [ws_item_sk#6, sum#13, count#14] +Arguments: hashpartitioning(ws_item_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(23) HashAggregate [codegen id : 4] +Input [3]: [ws_item_sk#6, sum#13, count#14] +Keys [1]: [ws_item_sk#6] +Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#7))] +Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#7))#15] +Results [2]: [(1.3 * cast((avg(UnscaledValue(ws_ext_discount_amt#7))#15 / 100.0) as decimal(11,6))) AS (1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] + +(24) Filter [codegen id : 4] +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] +Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#16) + +(25) BroadcastExchange +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_sk#4] +Right keys [1]: [ws_item_sk#6] +Join type: Inner +Join condition: (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#16) + +(27) Project [codegen id : 6] +Output [2]: [ws_ext_discount_amt#2, ws_sold_date_sk#3] +Input [5]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4, (1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] + +(28) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#17] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#17] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 6] +Output [1]: [ws_ext_discount_amt#2] +Input [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, d_date_sk#17] + +(31) HashAggregate [codegen id : 6] +Input [1]: [ws_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum#18] +Results [1]: [sum#19] + +(32) Exchange +Input [1]: [sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 7] +Input [1]: [sum#19] +Keys: [] +Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))#20] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#2))#20,17,2) AS Excess Discount Amount #21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6f9df5402f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_datafusion/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (7) + HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [ws_ext_discount_amt] [sum,sum] + Project [ws_ext_discount_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ext_discount_amt,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk,ws_ext_discount_amt,(1.3 * avg(ws_ext_discount_amt))] + Project [ws_ext_discount_amt,ws_sold_date_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk,ws_ext_discount_amt] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_item_sk] + Filter [i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Filter [(1.3 * avg(ws_ext_discount_amt))] + HashAggregate [ws_item_sk,sum,count] [avg(UnscaledValue(ws_ext_discount_amt)),(1.3 * avg(ws_ext_discount_amt)),sum,count] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,ws_ext_discount_amt] [sum,count,sum,count] + Project [ws_item_sk,ws_ext_discount_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..3175a02337 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_iceberg_compat/explain.txt @@ -0,0 +1,197 @@ +== Physical Plan == +* HashAggregate (33) ++- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.item (4) + : +- BroadcastExchange (25) + : +- * Filter (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * BroadcastHashJoin Inner BuildRight (19) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.web_sales (11) + : +- BroadcastExchange (18) + : +- * Project (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet spark_catalog.default.date_dim (14) + +- ReusedExchange (28) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 6] +Input [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3] +Condition : (isnotnull(ws_item_sk#1) AND isnotnull(ws_ext_discount_amt#2)) + +(4) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 350)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [i_item_sk#4] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(8) BroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 6] +Output [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] +Input [4]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4] + +(11) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#8)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] + +(13) Filter [codegen id : 3] +Input [3]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8] +Condition : isnotnull(ws_item_sk#6) + +(14) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#9, d_date#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] + +(16) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-01-27)) AND (d_date#10 <= 2000-04-26)) AND isnotnull(d_date_sk#9)) + +(17) Project [codegen id : 2] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_date#10] + +(18) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(19) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#8] +Right keys [1]: [d_date_sk#9] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 3] +Output [2]: [ws_item_sk#6, ws_ext_discount_amt#7] +Input [4]: [ws_item_sk#6, ws_ext_discount_amt#7, ws_sold_date_sk#8, d_date_sk#9] + +(21) HashAggregate [codegen id : 3] +Input [2]: [ws_item_sk#6, ws_ext_discount_amt#7] +Keys [1]: [ws_item_sk#6] +Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#7))] +Aggregate Attributes [2]: [sum#11, count#12] +Results [3]: [ws_item_sk#6, sum#13, count#14] + +(22) Exchange +Input [3]: [ws_item_sk#6, sum#13, count#14] +Arguments: hashpartitioning(ws_item_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(23) HashAggregate [codegen id : 4] +Input [3]: [ws_item_sk#6, sum#13, count#14] +Keys [1]: [ws_item_sk#6] +Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#7))] +Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#7))#15] +Results [2]: [(1.3 * cast((avg(UnscaledValue(ws_ext_discount_amt#7))#15 / 100.0) as decimal(11,6))) AS (1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] + +(24) Filter [codegen id : 4] +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] +Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#16) + +(25) BroadcastExchange +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_sk#4] +Right keys [1]: [ws_item_sk#6] +Join type: Inner +Join condition: (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#16) + +(27) Project [codegen id : 6] +Output [2]: [ws_ext_discount_amt#2, ws_sold_date_sk#3] +Input [5]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#4, (1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#6] + +(28) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#17] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#17] +Join type: Inner +Join condition: None + +(30) Project [codegen id : 6] +Output [1]: [ws_ext_discount_amt#2] +Input [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, d_date_sk#17] + +(31) HashAggregate [codegen id : 6] +Input [1]: [ws_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum#18] +Results [1]: [sum#19] + +(32) Exchange +Input [1]: [sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(33) HashAggregate [codegen id : 7] +Input [1]: [sum#19] +Keys: [] +Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))#20] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#2))#20,17,2) AS Excess Discount Amount #21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..6f9df5402f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.native_iceberg_compat/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen (7) + HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [ws_ext_discount_amt] [sum,sum] + Project [ws_ext_discount_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ext_discount_amt,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk,ws_ext_discount_amt,(1.3 * avg(ws_ext_discount_amt))] + Project [ws_ext_discount_amt,ws_sold_date_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk,ws_ext_discount_amt] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [i_item_sk] + Filter [i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (4) + Filter [(1.3 * avg(ws_ext_discount_amt))] + HashAggregate [ws_item_sk,sum,count] [avg(UnscaledValue(ws_ext_discount_amt)),(1.3 * avg(ws_ext_discount_amt)),sum,count] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,ws_ext_discount_amt] [sum,count,sum,count] + Project [ws_item_sk,ws_ext_discount_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_datafusion/explain.txt new file mode 100644 index 0000000000..172c035654 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_datafusion/explain.txt @@ -0,0 +1,138 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Project (13) + : +- * SortMergeJoin Inner (12) + : :- * Sort (5) + : : +- Exchange (4) + : : +- * Project (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.store_sales (1) + : +- * Sort (11) + : +- Exchange (10) + : +- * Project (9) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet spark_catalog.default.store_returns (6) + +- BroadcastExchange (18) + +- * Project (17) + +- * Filter (16) + +- * ColumnarToRow (15) + +- Scan parquet spark_catalog.default.reason (14) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] + +(3) Project [codegen id : 1] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] + +(4) Exchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#3 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_reason_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(8) Filter [codegen id : 3] +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_item_sk#7) AND isnotnull(sr_ticket_number#9)) AND isnotnull(sr_reason_sk#8)) + +(9) Project [codegen id : 3] +Output [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(10) Exchange +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: hashpartitioning(sr_item_sk#7, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [sr_item_sk#7 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 6] +Left keys [2]: [ss_item_sk#1, ss_ticket_number#3] +Right keys [2]: [sr_item_sk#7, sr_ticket_number#9] +Join type: Inner +Join condition: None + +(13) Project [codegen id : 6] +Output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] + +(14) Scan parquet spark_catalog.default.reason +Output [2]: [r_reason_sk#12, r_reason_desc#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28 ), IsNotNull(r_reason_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [2]: [r_reason_sk#12, r_reason_desc#13] + +(16) Filter [codegen id : 5] +Input [2]: [r_reason_sk#12, r_reason_desc#13] +Condition : ((isnotnull(r_reason_desc#13) AND (r_reason_desc#13 = reason 28 )) AND isnotnull(r_reason_sk#12)) + +(17) Project [codegen id : 5] +Output [1]: [r_reason_sk#12] +Input [2]: [r_reason_sk#12, r_reason_desc#13] + +(18) BroadcastExchange +Input [1]: [r_reason_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_reason_sk#8] +Right keys [1]: [r_reason_sk#12] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 6] +Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#10) THEN (cast((ss_quantity#4 - sr_return_quantity#10) as decimal(10,0)) * ss_sales_price#5) ELSE (cast(ss_quantity#4 as decimal(10,0)) * ss_sales_price#5) END AS act_sales#14] +Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10, r_reason_sk#12] + +(21) HashAggregate [codegen id : 6] +Input [2]: [ss_customer_sk#2, act_sales#14] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [partial_sum(act_sales#14)] +Aggregate Attributes [2]: [sum#15, isEmpty#16] +Results [3]: [ss_customer_sk#2, sum#17, isEmpty#18] + +(22) Exchange +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(23) HashAggregate [codegen id : 7] +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [sum(act_sales#14)] +Aggregate Attributes [1]: [sum(act_sales#14)#19] +Results [2]: [ss_customer_sk#2, sum(act_sales#14)#19 AS sumsales#20] + +(24) TakeOrderedAndProject +Input [2]: [ss_customer_sk#2, sumsales#20] +Arguments: 100, [sumsales#20 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], [ss_customer_sk#2, sumsales#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_datafusion/simplified.txt new file mode 100644 index 0000000000..350956593a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_datafusion/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [sumsales,ss_customer_sk] + WholeStageCodegen (7) + HashAggregate [ss_customer_sk,sum,isEmpty] [sum(act_sales),sumsales,sum,isEmpty] + InputAdapter + Exchange [ss_customer_sk] #1 + WholeStageCodegen (6) + HashAggregate [ss_customer_sk,act_sales] [sum,isEmpty,sum,isEmpty] + Project [ss_customer_sk,sr_return_quantity,ss_quantity,ss_sales_price] + BroadcastHashJoin [sr_reason_sk,r_reason_sk] + Project [ss_customer_sk,ss_quantity,ss_sales_price,sr_reason_sk,sr_return_quantity] + SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + InputAdapter + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #2 + WholeStageCodegen (1) + Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #3 + WholeStageCodegen (3) + Project [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + Filter [sr_item_sk,sr_ticket_number,sr_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Project [r_reason_sk] + Filter [r_reason_desc,r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.reason [r_reason_sk,r_reason_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..172c035654 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_iceberg_compat/explain.txt @@ -0,0 +1,138 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Project (13) + : +- * SortMergeJoin Inner (12) + : :- * Sort (5) + : : +- Exchange (4) + : : +- * Project (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.store_sales (1) + : +- * Sort (11) + : +- Exchange (10) + : +- * Project (9) + : +- * Filter (8) + : +- * ColumnarToRow (7) + : +- Scan parquet spark_catalog.default.store_returns (6) + +- BroadcastExchange (18) + +- * Project (17) + +- * Filter (16) + +- * ColumnarToRow (15) + +- Scan parquet spark_catalog.default.reason (14) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] + +(3) Project [codegen id : 1] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6] + +(4) Exchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#3 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_reason_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(8) Filter [codegen id : 3] +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_item_sk#7) AND isnotnull(sr_ticket_number#9)) AND isnotnull(sr_reason_sk#8)) + +(9) Project [codegen id : 3] +Output [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] + +(10) Exchange +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: hashpartitioning(sr_item_sk#7, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [sr_item_sk#7 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 6] +Left keys [2]: [ss_item_sk#1, ss_ticket_number#3] +Right keys [2]: [sr_item_sk#7, sr_ticket_number#9] +Join type: Inner +Join condition: None + +(13) Project [codegen id : 6] +Output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] + +(14) Scan parquet spark_catalog.default.reason +Output [2]: [r_reason_sk#12, r_reason_desc#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28 ), IsNotNull(r_reason_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [2]: [r_reason_sk#12, r_reason_desc#13] + +(16) Filter [codegen id : 5] +Input [2]: [r_reason_sk#12, r_reason_desc#13] +Condition : ((isnotnull(r_reason_desc#13) AND (r_reason_desc#13 = reason 28 )) AND isnotnull(r_reason_sk#12)) + +(17) Project [codegen id : 5] +Output [1]: [r_reason_sk#12] +Input [2]: [r_reason_sk#12, r_reason_desc#13] + +(18) BroadcastExchange +Input [1]: [r_reason_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_reason_sk#8] +Right keys [1]: [r_reason_sk#12] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 6] +Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#10) THEN (cast((ss_quantity#4 - sr_return_quantity#10) as decimal(10,0)) * ss_sales_price#5) ELSE (cast(ss_quantity#4 as decimal(10,0)) * ss_sales_price#5) END AS act_sales#14] +Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10, r_reason_sk#12] + +(21) HashAggregate [codegen id : 6] +Input [2]: [ss_customer_sk#2, act_sales#14] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [partial_sum(act_sales#14)] +Aggregate Attributes [2]: [sum#15, isEmpty#16] +Results [3]: [ss_customer_sk#2, sum#17, isEmpty#18] + +(22) Exchange +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(23) HashAggregate [codegen id : 7] +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [sum(act_sales#14)] +Aggregate Attributes [1]: [sum(act_sales#14)#19] +Results [2]: [ss_customer_sk#2, sum(act_sales#14)#19 AS sumsales#20] + +(24) TakeOrderedAndProject +Input [2]: [ss_customer_sk#2, sumsales#20] +Arguments: 100, [sumsales#20 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], [ss_customer_sk#2, sumsales#20] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..350956593a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.native_iceberg_compat/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [sumsales,ss_customer_sk] + WholeStageCodegen (7) + HashAggregate [ss_customer_sk,sum,isEmpty] [sum(act_sales),sumsales,sum,isEmpty] + InputAdapter + Exchange [ss_customer_sk] #1 + WholeStageCodegen (6) + HashAggregate [ss_customer_sk,act_sales] [sum,isEmpty,sum,isEmpty] + Project [ss_customer_sk,sr_return_quantity,ss_quantity,ss_sales_price] + BroadcastHashJoin [sr_reason_sk,r_reason_sk] + Project [ss_customer_sk,ss_quantity,ss_sales_price,sr_reason_sk,sr_return_quantity] + SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + InputAdapter + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #2 + WholeStageCodegen (1) + Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #3 + WholeStageCodegen (3) + Project [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + Filter [sr_item_sk,sr_ticket_number,sr_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Project [r_reason_sk] + Filter [r_reason_desc,r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.reason [r_reason_sk,r_reason_desc] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_datafusion/explain.txt new file mode 100644 index 0000000000..91592f54ab --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_datafusion/explain.txt @@ -0,0 +1,260 @@ +== Physical Plan == +* HashAggregate (45) ++- Exchange (44) + +- * HashAggregate (43) + +- * HashAggregate (42) + +- * HashAggregate (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * SortMergeJoin LeftAnti (19) + : : : :- * Project (13) + : : : : +- * SortMergeJoin LeftSemi (12) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : : +- * Sort (11) + : : : : +- Exchange (10) + : : : : +- * Project (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet spark_catalog.default.web_sales (7) + : : : +- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet spark_catalog.default.web_returns (14) + : : +- BroadcastExchange (24) + : : +- * Project (23) + : : +- * Filter (22) + : : +- * ColumnarToRow (21) + : : +- Scan parquet spark_catalog.default.date_dim (20) + : +- BroadcastExchange (31) + : +- * Project (30) + : +- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet spark_catalog.default.customer_address (27) + +- BroadcastExchange (38) + +- * Project (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.web_site (34) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] + +(3) Filter [codegen id : 1] +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(4) Project [codegen id : 1] +Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] + +(5) Exchange +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: [ws_order_number#5 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] + +(9) Project [codegen id : 3] +Output [2]: [ws_warehouse_sk#9, ws_order_number#10] +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] + +(10) Exchange +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: hashpartitioning(ws_order_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: [ws_order_number#10 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 5] +Left keys [1]: [ws_order_number#5] +Right keys [1]: [ws_order_number#10] +Join type: LeftSemi +Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#9) + +(13) Project [codegen id : 5] +Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(14) Scan parquet spark_catalog.default.web_returns +Output [2]: [wr_order_number#12, wr_returned_date_sk#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 6] +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] + +(16) Project [codegen id : 6] +Output [1]: [wr_order_number#12] +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] + +(17) Exchange +Input [1]: [wr_order_number#12] +Arguments: hashpartitioning(wr_order_number#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 7] +Input [1]: [wr_order_number#12] +Arguments: [wr_order_number#12 ASC NULLS FIRST], false, 0 + +(19) SortMergeJoin [codegen id : 11] +Left keys [1]: [ws_order_number#5] +Right keys [1]: [wr_order_number#12] +Join type: LeftAnti +Join condition: None + +(20) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#14, d_date#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#14, d_date#15] + +(22) Filter [codegen id : 8] +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 1999-02-01)) AND (d_date#15 <= 1999-04-02)) AND isnotnull(d_date_sk#14)) + +(23) Project [codegen id : 8] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_date#15] + +(24) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 11] +Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#14] + +(27) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#16, ca_state#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 9] +Input [2]: [ca_address_sk#16, ca_state#17] + +(29) Filter [codegen id : 9] +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = IL)) AND isnotnull(ca_address_sk#16)) + +(30) Project [codegen id : 9] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_state#17] + +(31) BroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ws_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#16] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 11] +Output [4]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#16] + +(34) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#18, web_company_name#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri ), IsNotNull(web_site_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 10] +Input [2]: [web_site_sk#18, web_company_name#19] + +(36) Filter [codegen id : 10] +Input [2]: [web_site_sk#18, web_company_name#19] +Condition : ((isnotnull(web_company_name#19) AND (web_company_name#19 = pri )) AND isnotnull(web_site_sk#18)) + +(37) Project [codegen id : 10] +Output [1]: [web_site_sk#18] +Input [2]: [web_site_sk#18, web_company_name#19] + +(38) BroadcastExchange +Input [1]: [web_site_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#18] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 11] +Output [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [5]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#18] + +(41) HashAggregate [codegen id : 11] +Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Keys [1]: [ws_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21] +Results [3]: [ws_order_number#5, sum#22, sum#23] + +(42) HashAggregate [codegen id : 11] +Input [3]: [ws_order_number#5, sum#22, sum#23] +Keys [1]: [ws_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21] +Results [3]: [ws_order_number#5, sum#22, sum#23] + +(43) HashAggregate [codegen id : 11] +Input [3]: [ws_order_number#5, sum#22, sum#23] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24] +Results [3]: [sum#22, sum#23, count#25] + +(44) Exchange +Input [3]: [sum#22, sum#23, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 12] +Input [3]: [sum#22, sum#23, count#25] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24] +Results [3]: [count(ws_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#21,17,2) AS total net profit #28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_datafusion/simplified.txt new file mode 100644 index 0000000000..86981cc808 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_datafusion/simplified.txt @@ -0,0 +1,74 @@ +WholeStageCodegen (12) + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (11) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] + Project [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + SortMergeJoin [ws_order_number,wr_order_number] + InputAdapter + WholeStageCodegen (5) + Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen (1) + Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #3 + WholeStageCodegen (3) + Project [ws_warehouse_sk,ws_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (7) + Sort [wr_order_number] + InputAdapter + Exchange [wr_order_number] #4 + WholeStageCodegen (6) + Project [wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_order_number,wr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..91592f54ab --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_iceberg_compat/explain.txt @@ -0,0 +1,260 @@ +== Physical Plan == +* HashAggregate (45) ++- Exchange (44) + +- * HashAggregate (43) + +- * HashAggregate (42) + +- * HashAggregate (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * SortMergeJoin LeftAnti (19) + : : : :- * Project (13) + : : : : +- * SortMergeJoin LeftSemi (12) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : : +- * Sort (11) + : : : : +- Exchange (10) + : : : : +- * Project (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet spark_catalog.default.web_sales (7) + : : : +- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet spark_catalog.default.web_returns (14) + : : +- BroadcastExchange (24) + : : +- * Project (23) + : : +- * Filter (22) + : : +- * ColumnarToRow (21) + : : +- Scan parquet spark_catalog.default.date_dim (20) + : +- BroadcastExchange (31) + : +- * Project (30) + : +- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet spark_catalog.default.customer_address (27) + +- BroadcastExchange (38) + +- * Project (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.web_site (34) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] + +(3) Filter [codegen id : 1] +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(4) Project [codegen id : 1] +Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8] + +(5) Exchange +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Arguments: [ws_order_number#5 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] + +(9) Project [codegen id : 3] +Output [2]: [ws_warehouse_sk#9, ws_order_number#10] +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] + +(10) Exchange +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: hashpartitioning(ws_order_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: [ws_order_number#10 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 5] +Left keys [1]: [ws_order_number#5] +Right keys [1]: [ws_order_number#10] +Join type: LeftSemi +Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#9) + +(13) Project [codegen id : 5] +Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(14) Scan parquet spark_catalog.default.web_returns +Output [2]: [wr_order_number#12, wr_returned_date_sk#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 6] +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] + +(16) Project [codegen id : 6] +Output [1]: [wr_order_number#12] +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] + +(17) Exchange +Input [1]: [wr_order_number#12] +Arguments: hashpartitioning(wr_order_number#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 7] +Input [1]: [wr_order_number#12] +Arguments: [wr_order_number#12 ASC NULLS FIRST], false, 0 + +(19) SortMergeJoin [codegen id : 11] +Left keys [1]: [ws_order_number#5] +Right keys [1]: [wr_order_number#12] +Join type: LeftAnti +Join condition: None + +(20) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#14, d_date#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#14, d_date#15] + +(22) Filter [codegen id : 8] +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 1999-02-01)) AND (d_date#15 <= 1999-04-02)) AND isnotnull(d_date_sk#14)) + +(23) Project [codegen id : 8] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_date#15] + +(24) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 11] +Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#14] + +(27) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#16, ca_state#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 9] +Input [2]: [ca_address_sk#16, ca_state#17] + +(29) Filter [codegen id : 9] +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = IL)) AND isnotnull(ca_address_sk#16)) + +(30) Project [codegen id : 9] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_state#17] + +(31) BroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ws_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#16] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 11] +Output [4]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#16] + +(34) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#18, web_company_name#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri ), IsNotNull(web_site_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 10] +Input [2]: [web_site_sk#18, web_company_name#19] + +(36) Filter [codegen id : 10] +Input [2]: [web_site_sk#18, web_company_name#19] +Condition : ((isnotnull(web_company_name#19) AND (web_company_name#19 = pri )) AND isnotnull(web_site_sk#18)) + +(37) Project [codegen id : 10] +Output [1]: [web_site_sk#18] +Input [2]: [web_site_sk#18, web_company_name#19] + +(38) BroadcastExchange +Input [1]: [web_site_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#18] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 11] +Output [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [5]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#18] + +(41) HashAggregate [codegen id : 11] +Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Keys [1]: [ws_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21] +Results [3]: [ws_order_number#5, sum#22, sum#23] + +(42) HashAggregate [codegen id : 11] +Input [3]: [ws_order_number#5, sum#22, sum#23] +Keys [1]: [ws_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21] +Results [3]: [ws_order_number#5, sum#22, sum#23] + +(43) HashAggregate [codegen id : 11] +Input [3]: [ws_order_number#5, sum#22, sum#23] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24] +Results [3]: [sum#22, sum#23, count#25] + +(44) Exchange +Input [3]: [sum#22, sum#23, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 12] +Input [3]: [sum#22, sum#23, count#25] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24] +Results [3]: [count(ws_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#21,17,2) AS total net profit #28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..86981cc808 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.native_iceberg_compat/simplified.txt @@ -0,0 +1,74 @@ +WholeStageCodegen (12) + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (11) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] + Project [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + SortMergeJoin [ws_order_number,wr_order_number] + InputAdapter + WholeStageCodegen (5) + Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen (1) + Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #3 + WholeStageCodegen (3) + Project [ws_warehouse_sk,ws_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (7) + Sort [wr_order_number] + InputAdapter + Exchange [wr_order_number] #4 + WholeStageCodegen (6) + Project [wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_order_number,wr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (8) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (9) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_datafusion/explain.txt new file mode 100644 index 0000000000..194134249e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_datafusion/explain.txt @@ -0,0 +1,330 @@ +== Physical Plan == +* HashAggregate (58) ++- Exchange (57) + +- * HashAggregate (56) + +- * HashAggregate (55) + +- * HashAggregate (54) + +- * Project (53) + +- * BroadcastHashJoin Inner BuildRight (52) + :- * Project (46) + : +- * BroadcastHashJoin Inner BuildRight (45) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * SortMergeJoin LeftSemi (32) + : : : :- * SortMergeJoin LeftSemi (17) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : : +- * Project (16) + : : : : +- * SortMergeJoin Inner (15) + : : : : :- * Sort (12) + : : : : : +- Exchange (11) + : : : : : +- * Project (10) + : : : : : +- * Filter (9) + : : : : : +- * ColumnarToRow (8) + : : : : : +- Scan parquet spark_catalog.default.web_sales (7) + : : : : +- * Sort (14) + : : : : +- ReusedExchange (13) + : : : +- * Project (31) + : : : +- * SortMergeJoin Inner (30) + : : : :- * Sort (23) + : : : : +- Exchange (22) + : : : : +- * Project (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet spark_catalog.default.web_returns (18) + : : : +- * Project (29) + : : : +- * SortMergeJoin Inner (28) + : : : :- * Sort (25) + : : : : +- ReusedExchange (24) + : : : +- * Sort (27) + : : : +- ReusedExchange (26) + : : +- BroadcastExchange (37) + : : +- * Project (36) + : : +- * Filter (35) + : : +- * ColumnarToRow (34) + : : +- Scan parquet spark_catalog.default.date_dim (33) + : +- BroadcastExchange (44) + : +- * Project (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet spark_catalog.default.customer_address (40) + +- BroadcastExchange (51) + +- * Project (50) + +- * Filter (49) + +- * ColumnarToRow (48) + +- Scan parquet spark_catalog.default.web_site (47) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(4) Project [codegen id : 1] +Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] + +(5) Exchange +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] + +(9) Filter [codegen id : 3] +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8)) + +(10) Project [codegen id : 3] +Output [2]: [ws_warehouse_sk#8, ws_order_number#9] +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] + +(11) Exchange +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: hashpartitioning(ws_order_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 4] +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_order_number#9 ASC NULLS FIRST], false, 0 + +(13) ReusedExchange [Reuses operator id: 11] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] + +(14) Sort [codegen id : 6] +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0 + +(15) SortMergeJoin [codegen id : 7] +Left keys [1]: [ws_order_number#9] +Right keys [1]: [ws_order_number#12] +Join type: Inner +Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) + +(16) Project [codegen id : 7] +Output [1]: [ws_order_number#9] +Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] + +(17) SortMergeJoin [codegen id : 8] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#9] +Join type: LeftSemi +Join condition: None + +(18) Scan parquet spark_catalog.default.web_returns +Output [2]: [wr_order_number#13, wr_returned_date_sk#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 9] +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] + +(20) Filter [codegen id : 9] +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] +Condition : isnotnull(wr_order_number#13) + +(21) Project [codegen id : 9] +Output [1]: [wr_order_number#13] +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] + +(22) Exchange +Input [1]: [wr_order_number#13] +Arguments: hashpartitioning(wr_order_number#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(23) Sort [codegen id : 10] +Input [1]: [wr_order_number#13] +Arguments: [wr_order_number#13 ASC NULLS FIRST], false, 0 + +(24) ReusedExchange [Reuses operator id: 11] +Output [2]: [ws_warehouse_sk#8, ws_order_number#9] + +(25) Sort [codegen id : 12] +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_order_number#9 ASC NULLS FIRST], false, 0 + +(26) ReusedExchange [Reuses operator id: 11] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] + +(27) Sort [codegen id : 14] +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0 + +(28) SortMergeJoin [codegen id : 15] +Left keys [1]: [ws_order_number#9] +Right keys [1]: [ws_order_number#12] +Join type: Inner +Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) + +(29) Project [codegen id : 15] +Output [1]: [ws_order_number#9] +Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] + +(30) SortMergeJoin [codegen id : 16] +Left keys [1]: [wr_order_number#13] +Right keys [1]: [ws_order_number#9] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 16] +Output [1]: [wr_order_number#13] +Input [2]: [wr_order_number#13, ws_order_number#9] + +(32) SortMergeJoin [codegen id : 20] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [wr_order_number#13] +Join type: LeftSemi +Join condition: None + +(33) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_date#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 17] +Input [2]: [d_date_sk#15, d_date#16] + +(35) Filter [codegen id : 17] +Input [2]: [d_date_sk#15, d_date#16] +Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 1999-02-01)) AND (d_date#16 <= 1999-04-02)) AND isnotnull(d_date_sk#15)) + +(36) Project [codegen id : 17] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_date#16] + +(37) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(38) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 20] +Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#15] + +(40) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 18] +Input [2]: [ca_address_sk#17, ca_state#18] + +(42) Filter [codegen id : 18] +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : ((isnotnull(ca_state#18) AND (ca_state#18 = IL)) AND isnotnull(ca_address_sk#17)) + +(43) Project [codegen id : 18] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_state#18] + +(44) BroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 20] +Output [4]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#17] + +(47) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#19, web_company_name#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri ), IsNotNull(web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 19] +Input [2]: [web_site_sk#19, web_company_name#20] + +(49) Filter [codegen id : 19] +Input [2]: [web_site_sk#19, web_company_name#20] +Condition : ((isnotnull(web_company_name#20) AND (web_company_name#20 = pri )) AND isnotnull(web_site_sk#19)) + +(50) Project [codegen id : 19] +Output [1]: [web_site_sk#19] +Input [2]: [web_site_sk#19, web_company_name#20] + +(51) BroadcastExchange +Input [1]: [web_site_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(52) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#19] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 20] +Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [5]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#19] + +(54) HashAggregate [codegen id : 20] +Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Keys [1]: [ws_order_number#4] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22] +Results [3]: [ws_order_number#4, sum#23, sum#24] + +(55) HashAggregate [codegen id : 20] +Input [3]: [ws_order_number#4, sum#23, sum#24] +Keys [1]: [ws_order_number#4] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22] +Results [3]: [ws_order_number#4, sum#23, sum#24] + +(56) HashAggregate [codegen id : 20] +Input [3]: [ws_order_number#4, sum#23, sum#24] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22, count(ws_order_number#4)#25] +Results [3]: [sum#23, sum#24, count#26] + +(57) Exchange +Input [3]: [sum#23, sum#24, count#26] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(58) HashAggregate [codegen id : 21] +Input [3]: [sum#23, sum#24, count#26] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22, count(ws_order_number#4)#25] +Results [3]: [count(ws_order_number#4)#25 AS order count #27, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#21,17,2) AS total shipping cost #28, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#22,17,2) AS total net profit #29] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f628b29094 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_datafusion/simplified.txt @@ -0,0 +1,102 @@ +WholeStageCodegen (21) + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (20) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] + Project [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + SortMergeJoin [ws_order_number,wr_order_number] + InputAdapter + WholeStageCodegen (8) + SortMergeJoin [ws_order_number,ws_order_number] + InputAdapter + WholeStageCodegen (2) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen (1) + Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (7) + Project [ws_order_number] + SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #3 + WholeStageCodegen (3) + Project [ws_warehouse_sk,ws_order_number] + Filter [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ws_order_number] + InputAdapter + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + InputAdapter + WholeStageCodegen (16) + Project [wr_order_number] + SortMergeJoin [wr_order_number,ws_order_number] + InputAdapter + WholeStageCodegen (10) + Sort [wr_order_number] + InputAdapter + Exchange [wr_order_number] #4 + WholeStageCodegen (9) + Project [wr_order_number] + Filter [wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_order_number,wr_returned_date_sk] + InputAdapter + WholeStageCodegen (15) + Project [ws_order_number] + SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + InputAdapter + WholeStageCodegen (12) + Sort [ws_order_number] + InputAdapter + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + InputAdapter + WholeStageCodegen (14) + Sort [ws_order_number] + InputAdapter + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (17) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (18) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (19) + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..194134249e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_iceberg_compat/explain.txt @@ -0,0 +1,330 @@ +== Physical Plan == +* HashAggregate (58) ++- Exchange (57) + +- * HashAggregate (56) + +- * HashAggregate (55) + +- * HashAggregate (54) + +- * Project (53) + +- * BroadcastHashJoin Inner BuildRight (52) + :- * Project (46) + : +- * BroadcastHashJoin Inner BuildRight (45) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * SortMergeJoin LeftSemi (32) + : : : :- * SortMergeJoin LeftSemi (17) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : : +- * Project (16) + : : : : +- * SortMergeJoin Inner (15) + : : : : :- * Sort (12) + : : : : : +- Exchange (11) + : : : : : +- * Project (10) + : : : : : +- * Filter (9) + : : : : : +- * ColumnarToRow (8) + : : : : : +- Scan parquet spark_catalog.default.web_sales (7) + : : : : +- * Sort (14) + : : : : +- ReusedExchange (13) + : : : +- * Project (31) + : : : +- * SortMergeJoin Inner (30) + : : : :- * Sort (23) + : : : : +- Exchange (22) + : : : : +- * Project (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet spark_catalog.default.web_returns (18) + : : : +- * Project (29) + : : : +- * SortMergeJoin Inner (28) + : : : :- * Sort (25) + : : : : +- ReusedExchange (24) + : : : +- * Sort (27) + : : : +- ReusedExchange (26) + : : +- BroadcastExchange (37) + : : +- * Project (36) + : : +- * Filter (35) + : : +- * ColumnarToRow (34) + : : +- Scan parquet spark_catalog.default.date_dim (33) + : +- BroadcastExchange (44) + : +- * Project (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet spark_catalog.default.customer_address (40) + +- BroadcastExchange (51) + +- * Project (50) + +- * Filter (49) + +- * ColumnarToRow (48) + +- Scan parquet spark_catalog.default.web_site (47) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(4) Project [codegen id : 1] +Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7] + +(5) Exchange +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] + +(9) Filter [codegen id : 3] +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8)) + +(10) Project [codegen id : 3] +Output [2]: [ws_warehouse_sk#8, ws_order_number#9] +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] + +(11) Exchange +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: hashpartitioning(ws_order_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 4] +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_order_number#9 ASC NULLS FIRST], false, 0 + +(13) ReusedExchange [Reuses operator id: 11] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] + +(14) Sort [codegen id : 6] +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0 + +(15) SortMergeJoin [codegen id : 7] +Left keys [1]: [ws_order_number#9] +Right keys [1]: [ws_order_number#12] +Join type: Inner +Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) + +(16) Project [codegen id : 7] +Output [1]: [ws_order_number#9] +Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] + +(17) SortMergeJoin [codegen id : 8] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#9] +Join type: LeftSemi +Join condition: None + +(18) Scan parquet spark_catalog.default.web_returns +Output [2]: [wr_order_number#13, wr_returned_date_sk#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 9] +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] + +(20) Filter [codegen id : 9] +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] +Condition : isnotnull(wr_order_number#13) + +(21) Project [codegen id : 9] +Output [1]: [wr_order_number#13] +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] + +(22) Exchange +Input [1]: [wr_order_number#13] +Arguments: hashpartitioning(wr_order_number#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(23) Sort [codegen id : 10] +Input [1]: [wr_order_number#13] +Arguments: [wr_order_number#13 ASC NULLS FIRST], false, 0 + +(24) ReusedExchange [Reuses operator id: 11] +Output [2]: [ws_warehouse_sk#8, ws_order_number#9] + +(25) Sort [codegen id : 12] +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_order_number#9 ASC NULLS FIRST], false, 0 + +(26) ReusedExchange [Reuses operator id: 11] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] + +(27) Sort [codegen id : 14] +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0 + +(28) SortMergeJoin [codegen id : 15] +Left keys [1]: [ws_order_number#9] +Right keys [1]: [ws_order_number#12] +Join type: Inner +Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) + +(29) Project [codegen id : 15] +Output [1]: [ws_order_number#9] +Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] + +(30) SortMergeJoin [codegen id : 16] +Left keys [1]: [wr_order_number#13] +Right keys [1]: [ws_order_number#9] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 16] +Output [1]: [wr_order_number#13] +Input [2]: [wr_order_number#13, ws_order_number#9] + +(32) SortMergeJoin [codegen id : 20] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [wr_order_number#13] +Join type: LeftSemi +Join condition: None + +(33) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#15, d_date#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 17] +Input [2]: [d_date_sk#15, d_date#16] + +(35) Filter [codegen id : 17] +Input [2]: [d_date_sk#15, d_date#16] +Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 1999-02-01)) AND (d_date#16 <= 1999-04-02)) AND isnotnull(d_date_sk#15)) + +(36) Project [codegen id : 17] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_date#16] + +(37) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(38) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 20] +Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#15] + +(40) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 18] +Input [2]: [ca_address_sk#17, ca_state#18] + +(42) Filter [codegen id : 18] +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : ((isnotnull(ca_state#18) AND (ca_state#18 = IL)) AND isnotnull(ca_address_sk#17)) + +(43) Project [codegen id : 18] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_state#18] + +(44) BroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 20] +Output [4]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#17] + +(47) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#19, web_company_name#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri ), IsNotNull(web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 19] +Input [2]: [web_site_sk#19, web_company_name#20] + +(49) Filter [codegen id : 19] +Input [2]: [web_site_sk#19, web_company_name#20] +Condition : ((isnotnull(web_company_name#20) AND (web_company_name#20 = pri )) AND isnotnull(web_site_sk#19)) + +(50) Project [codegen id : 19] +Output [1]: [web_site_sk#19] +Input [2]: [web_site_sk#19, web_company_name#20] + +(51) BroadcastExchange +Input [1]: [web_site_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(52) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#19] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 20] +Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [5]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#19] + +(54) HashAggregate [codegen id : 20] +Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Keys [1]: [ws_order_number#4] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22] +Results [3]: [ws_order_number#4, sum#23, sum#24] + +(55) HashAggregate [codegen id : 20] +Input [3]: [ws_order_number#4, sum#23, sum#24] +Keys [1]: [ws_order_number#4] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22] +Results [3]: [ws_order_number#4, sum#23, sum#24] + +(56) HashAggregate [codegen id : 20] +Input [3]: [ws_order_number#4, sum#23, sum#24] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22, count(ws_order_number#4)#25] +Results [3]: [sum#23, sum#24, count#26] + +(57) Exchange +Input [3]: [sum#23, sum#24, count#26] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(58) HashAggregate [codegen id : 21] +Input [3]: [sum#23, sum#24, count#26] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22, count(ws_order_number#4)#25] +Results [3]: [count(ws_order_number#4)#25 AS order count #27, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#21,17,2) AS total shipping cost #28, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#22,17,2) AS total net profit #29] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..f628b29094 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.native_iceberg_compat/simplified.txt @@ -0,0 +1,102 @@ +WholeStageCodegen (21) + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (20) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] + Project [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + SortMergeJoin [ws_order_number,wr_order_number] + InputAdapter + WholeStageCodegen (8) + SortMergeJoin [ws_order_number,ws_order_number] + InputAdapter + WholeStageCodegen (2) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen (1) + Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (7) + Project [ws_order_number] + SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + InputAdapter + WholeStageCodegen (4) + Sort [ws_order_number] + InputAdapter + Exchange [ws_order_number] #3 + WholeStageCodegen (3) + Project [ws_warehouse_sk,ws_order_number] + Filter [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (6) + Sort [ws_order_number] + InputAdapter + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + InputAdapter + WholeStageCodegen (16) + Project [wr_order_number] + SortMergeJoin [wr_order_number,ws_order_number] + InputAdapter + WholeStageCodegen (10) + Sort [wr_order_number] + InputAdapter + Exchange [wr_order_number] #4 + WholeStageCodegen (9) + Project [wr_order_number] + Filter [wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_order_number,wr_returned_date_sk] + InputAdapter + WholeStageCodegen (15) + Project [ws_order_number] + SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + InputAdapter + WholeStageCodegen (12) + Sort [ws_order_number] + InputAdapter + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + InputAdapter + WholeStageCodegen (14) + Sort [ws_order_number] + InputAdapter + ReusedExchange [ws_warehouse_sk,ws_order_number] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (17) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (18) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (19) + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_datafusion/explain.txt new file mode 100644 index 0000000000..067597a654 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_datafusion/explain.txt @@ -0,0 +1,163 @@ +== Physical Plan == +* HashAggregate (28) ++- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet spark_catalog.default.household_demographics (5) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet spark_catalog.default.time_dim (12) + +- BroadcastExchange (23) + +- * Project (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet spark_catalog.default.store (19) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Project [codegen id : 4] +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(5) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#5, hd_dep_count#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] + +(7) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Condition : ((isnotnull(hd_dep_count#6) AND (hd_dep_count#6 = 7)) AND isnotnull(hd_demo_sk#5)) + +(8) Project [codegen id : 1] +Output [1]: [hd_demo_sk#5] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] + +(9) BroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(10) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#5] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 4] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] + +(12) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] + +(14) Filter [codegen id : 2] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Condition : ((((isnotnull(t_hour#8) AND isnotnull(t_minute#9)) AND (t_hour#8 = 20)) AND (t_minute#9 >= 30)) AND isnotnull(t_time_sk#7)) + +(15) Project [codegen id : 2] +Output [1]: [t_time_sk#7] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] + +(16) BroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#7] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 4] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#7] + +(19) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#10, s_store_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#10, s_store_name#11] + +(21) Filter [codegen id : 3] +Input [2]: [s_store_sk#10, s_store_name#11] +Condition : ((isnotnull(s_store_name#11) AND (s_store_name#11 = ese)) AND isnotnull(s_store_sk#10)) + +(22) Project [codegen id : 3] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_store_name#11] + +(23) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#10] + +(26) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#12] +Results [1]: [count#13] + +(27) Exchange +Input [1]: [count#13] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(28) HashAggregate [codegen id : 5] +Input [1]: [count#13] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#14] +Results [1]: [count(1)#14 AS count(1)#15] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9f6ea6e9b4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_datafusion/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (5) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..067597a654 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_iceberg_compat/explain.txt @@ -0,0 +1,163 @@ +== Physical Plan == +* HashAggregate (28) ++- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet spark_catalog.default.household_demographics (5) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet spark_catalog.default.time_dim (12) + +- BroadcastExchange (23) + +- * Project (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet spark_catalog.default.store (19) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Project [codegen id : 4] +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4] + +(5) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#5, hd_dep_count#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] + +(7) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] +Condition : ((isnotnull(hd_dep_count#6) AND (hd_dep_count#6 = 7)) AND isnotnull(hd_demo_sk#5)) + +(8) Project [codegen id : 1] +Output [1]: [hd_demo_sk#5] +Input [2]: [hd_demo_sk#5, hd_dep_count#6] + +(9) BroadcastExchange +Input [1]: [hd_demo_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(10) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#5] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 4] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] + +(12) Scan parquet spark_catalog.default.time_dim +Output [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] + +(14) Filter [codegen id : 2] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Condition : ((((isnotnull(t_hour#8) AND isnotnull(t_minute#9)) AND (t_hour#8 = 20)) AND (t_minute#9 >= 30)) AND isnotnull(t_time_sk#7)) + +(15) Project [codegen id : 2] +Output [1]: [t_time_sk#7] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] + +(16) BroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#7] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 4] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#7] + +(19) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#10, s_store_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#10, s_store_name#11] + +(21) Filter [codegen id : 3] +Input [2]: [s_store_sk#10, s_store_name#11] +Condition : ((isnotnull(s_store_name#11) AND (s_store_name#11 = ese)) AND isnotnull(s_store_sk#10)) + +(22) Project [codegen id : 3] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_store_name#11] + +(23) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#10] + +(26) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#12] +Results [1]: [count#13] + +(27) Exchange +Input [1]: [count#13] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(28) HashAggregate [codegen id : 5] +Input [1]: [count#13] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#14] +Results [1]: [count(1)#14 AS count(1)#15] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9f6ea6e9b4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.native_iceberg_compat/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (5) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (4) + HashAggregate [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [hd_demo_sk] + Filter [hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_datafusion/explain.txt new file mode 100644 index 0000000000..235e92c3ed --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_datafusion/explain.txt @@ -0,0 +1,167 @@ +== Physical Plan == +* HashAggregate (27) ++- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * SortMergeJoin FullOuter (23) + :- * Sort (13) + : +- * HashAggregate (12) + : +- Exchange (11) + : +- * HashAggregate (10) + : +- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.store_sales (1) + : +- BroadcastExchange (7) + : +- * Project (6) + : +- * Filter (5) + : +- * ColumnarToRow (4) + : +- Scan parquet spark_catalog.default.date_dim (3) + +- * Sort (22) + +- * HashAggregate (21) + +- Exchange (20) + +- * HashAggregate (19) + +- * Project (18) + +- * BroadcastHashJoin Inner BuildRight (17) + :- * ColumnarToRow (15) + : +- Scan parquet spark_catalog.default.catalog_sales (14) + +- ReusedExchange (16) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] + +(3) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(5) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(6) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(7) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 2] +Output [2]: [ss_item_sk#1, ss_customer_sk#2] +Input [4]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3, d_date_sk#4] + +(10) HashAggregate [codegen id : 2] +Input [2]: [ss_item_sk#1, ss_customer_sk#2] +Keys [2]: [ss_customer_sk#2, ss_item_sk#1] +Functions: [] +Aggregate Attributes: [] +Results [2]: [ss_customer_sk#2, ss_item_sk#1] + +(11) Exchange +Input [2]: [ss_customer_sk#2, ss_item_sk#1] +Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) HashAggregate [codegen id : 3] +Input [2]: [ss_customer_sk#2, ss_item_sk#1] +Keys [2]: [ss_customer_sk#2, ss_item_sk#1] +Functions: [] +Aggregate Attributes: [] +Results [2]: [ss_customer_sk#2 AS customer_sk#6, ss_item_sk#1 AS item_sk#7] + +(13) Sort [codegen id : 3] +Input [2]: [customer_sk#6, item_sk#7] +Arguments: [customer_sk#6 ASC NULLS FIRST, item_sk#7 ASC NULLS FIRST], false, 0 + +(14) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#10)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] + +(16) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#11] + +(17) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 5] +Output [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Input [4]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10, d_date_sk#11] + +(19) HashAggregate [codegen id : 5] +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Functions: [] +Aggregate Attributes: [] +Results [2]: [cs_bill_customer_sk#8, cs_item_sk#9] + +(20) Exchange +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Arguments: hashpartitioning(cs_bill_customer_sk#8, cs_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(21) HashAggregate [codegen id : 6] +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Functions: [] +Aggregate Attributes: [] +Results [2]: [cs_bill_customer_sk#8 AS customer_sk#12, cs_item_sk#9 AS item_sk#13] + +(22) Sort [codegen id : 6] +Input [2]: [customer_sk#12, item_sk#13] +Arguments: [customer_sk#12 ASC NULLS FIRST, item_sk#13 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 7] +Left keys [2]: [customer_sk#6, item_sk#7] +Right keys [2]: [customer_sk#12, item_sk#13] +Join type: FullOuter +Join condition: None + +(24) Project [codegen id : 7] +Output [2]: [customer_sk#6, customer_sk#12] +Input [4]: [customer_sk#6, item_sk#7, customer_sk#12, item_sk#13] + +(25) HashAggregate [codegen id : 7] +Input [2]: [customer_sk#6, customer_sk#12] +Keys: [] +Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum#14, sum#15, sum#16] +Results [3]: [sum#17, sum#18, sum#19] + +(26) Exchange +Input [3]: [sum#17, sum#18, sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 8] +Input [3]: [sum#17, sum#18, sum#19] +Keys: [] +Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END)#20, sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#21, sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#22] +Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END)#20 AS store_only#23, sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#21 AS catalog_only#24, sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#22 AS store_and_catalog#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_datafusion/simplified.txt new file mode 100644 index 0000000000..cc1e94c5a1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_datafusion/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [sum(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),store_only,catalog_only,store_and_catalog,sum,sum,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..235e92c3ed --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_iceberg_compat/explain.txt @@ -0,0 +1,167 @@ +== Physical Plan == +* HashAggregate (27) ++- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * SortMergeJoin FullOuter (23) + :- * Sort (13) + : +- * HashAggregate (12) + : +- Exchange (11) + : +- * HashAggregate (10) + : +- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.store_sales (1) + : +- BroadcastExchange (7) + : +- * Project (6) + : +- * Filter (5) + : +- * ColumnarToRow (4) + : +- Scan parquet spark_catalog.default.date_dim (3) + +- * Sort (22) + +- * HashAggregate (21) + +- Exchange (20) + +- * HashAggregate (19) + +- * Project (18) + +- * BroadcastHashJoin Inner BuildRight (17) + :- * ColumnarToRow (15) + : +- Scan parquet spark_catalog.default.catalog_sales (14) + +- ReusedExchange (16) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3] + +(3) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(5) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(6) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(7) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 2] +Output [2]: [ss_item_sk#1, ss_customer_sk#2] +Input [4]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3, d_date_sk#4] + +(10) HashAggregate [codegen id : 2] +Input [2]: [ss_item_sk#1, ss_customer_sk#2] +Keys [2]: [ss_customer_sk#2, ss_item_sk#1] +Functions: [] +Aggregate Attributes: [] +Results [2]: [ss_customer_sk#2, ss_item_sk#1] + +(11) Exchange +Input [2]: [ss_customer_sk#2, ss_item_sk#1] +Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) HashAggregate [codegen id : 3] +Input [2]: [ss_customer_sk#2, ss_item_sk#1] +Keys [2]: [ss_customer_sk#2, ss_item_sk#1] +Functions: [] +Aggregate Attributes: [] +Results [2]: [ss_customer_sk#2 AS customer_sk#6, ss_item_sk#1 AS item_sk#7] + +(13) Sort [codegen id : 3] +Input [2]: [customer_sk#6, item_sk#7] +Arguments: [customer_sk#6 ASC NULLS FIRST, item_sk#7 ASC NULLS FIRST], false, 0 + +(14) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#10)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] + +(16) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#11] + +(17) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 5] +Output [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Input [4]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10, d_date_sk#11] + +(19) HashAggregate [codegen id : 5] +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Functions: [] +Aggregate Attributes: [] +Results [2]: [cs_bill_customer_sk#8, cs_item_sk#9] + +(20) Exchange +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Arguments: hashpartitioning(cs_bill_customer_sk#8, cs_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(21) HashAggregate [codegen id : 6] +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Functions: [] +Aggregate Attributes: [] +Results [2]: [cs_bill_customer_sk#8 AS customer_sk#12, cs_item_sk#9 AS item_sk#13] + +(22) Sort [codegen id : 6] +Input [2]: [customer_sk#12, item_sk#13] +Arguments: [customer_sk#12 ASC NULLS FIRST, item_sk#13 ASC NULLS FIRST], false, 0 + +(23) SortMergeJoin [codegen id : 7] +Left keys [2]: [customer_sk#6, item_sk#7] +Right keys [2]: [customer_sk#12, item_sk#13] +Join type: FullOuter +Join condition: None + +(24) Project [codegen id : 7] +Output [2]: [customer_sk#6, customer_sk#12] +Input [4]: [customer_sk#6, item_sk#7, customer_sk#12, item_sk#13] + +(25) HashAggregate [codegen id : 7] +Input [2]: [customer_sk#6, customer_sk#12] +Keys: [] +Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum#14, sum#15, sum#16] +Results [3]: [sum#17, sum#18, sum#19] + +(26) Exchange +Input [3]: [sum#17, sum#18, sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 8] +Input [3]: [sum#17, sum#18, sum#19] +Keys: [] +Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END)#20, sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#21, sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#22] +Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END)#20 AS store_only#23, sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#21 AS catalog_only#24, sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#22 AS store_and_catalog#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..cc1e94c5a1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.native_iceberg_compat/simplified.txt @@ -0,0 +1,44 @@ +WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [sum(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),store_only,catalog_only,store_and_catalog,sum,sum,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_datafusion/explain.txt new file mode 100644 index 0000000000..340150a42d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_datafusion/explain.txt @@ -0,0 +1,150 @@ +== Physical Plan == +* Project (26) ++- * Sort (25) + +- Exchange (24) + +- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.store_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#5] + +(20) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5, _we0#17] + +(24) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(25) Sort [codegen id : 7] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], true, 0 + +(26) Project [codegen id : 7] +Output [6]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b831f96b22 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_datafusion/simplified.txt @@ -0,0 +1,42 @@ +WholeStageCodegen (7) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio] + Sort [i_category,i_class,i_item_id,i_item_desc,revenueratio] + InputAdapter + Exchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 + WholeStageCodegen (6) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..340150a42d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_iceberg_compat/explain.txt @@ -0,0 +1,150 @@ +== Physical Plan == +* Project (26) ++- * Sort (25) + +- Exchange (24) + +- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.store_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#5] + +(20) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, i_item_id#5, _we0#17] + +(24) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(25) Sort [codegen id : 7] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] +Arguments: [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], true, 0 + +(26) Project [codegen id : 7] +Output [6]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18, i_item_id#5] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..b831f96b22 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.native_iceberg_compat/simplified.txt @@ -0,0 +1,42 @@ +WholeStageCodegen (7) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio] + Sort [i_category,i_class,i_item_id,i_item_desc,revenueratio] + InputAdapter + Exchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 + WholeStageCodegen (6) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_datafusion/explain.txt new file mode 100644 index 0000000000..18c9bd3425 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_datafusion/explain.txt @@ -0,0 +1,187 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.warehouse (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.ship_mode (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.call_center (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet spark_catalog.default.date_dim (22) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Condition : (((isnotnull(cs_warehouse_sk#4) AND isnotnull(cs_ship_mode_sk#3)) AND isnotnull(cs_call_center_sk#2)) AND isnotnull(cs_ship_date_sk#1)) + +(4) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_warehouse_sk#4] +Right keys [1]: [w_warehouse_sk#6] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7] +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(10) Scan parquet spark_catalog.default.ship_mode +Output [2]: [sm_ship_mode_sk#8, sm_type#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] + +(12) Filter [codegen id : 2] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Condition : isnotnull(sm_ship_mode_sk#8) + +(13) BroadcastExchange +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_ship_mode_sk#3] +Right keys [1]: [sm_ship_mode_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 5] +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] + +(16) Scan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#10, cc_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#10, cc_name#11] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#10, cc_name#11] +Condition : isnotnull(cc_call_center_sk#10) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#10, cc_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_call_center_sk#2] +Right keys [1]: [cc_call_center_sk#10] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 5] +Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11] +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_call_center_sk#10, cc_name#11] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_month_seq#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#12, d_month_seq#13] + +(24) Filter [codegen id : 4] +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#12] +Input [2]: [d_date_sk#12, d_month_seq#13] + +(26) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] +Input [6]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11, d_date_sk#12] + +(29) HashAggregate [codegen id : 5] +Input [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] +Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] + +(30) Exchange +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, cc_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(31) HashAggregate [codegen id : 6] +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] +Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29] +Results [8]: [_groupingexpression#14 AS substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25 AS 30 days #31, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26 AS 31 - 60 days #32, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27 AS 61 - 90 days #33, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28 AS 91 - 120 days #34, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29 AS >120 days #35] + +(32) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#30 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, cc_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_datafusion/simplified.txt new file mode 100644 index 0000000000..db126ff908 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_datafusion/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (6) + HashAggregate [_groupingexpression,sm_type,cc_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + InputAdapter + Exchange [_groupingexpression,sm_type,cc_name] #1 + WholeStageCodegen (5) + HashAggregate [_groupingexpression,sm_type,cc_name,cs_ship_date_sk,cs_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,cc_name,w_warehouse_name] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Project [cs_ship_date_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_name] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [cs_ship_date_sk,cs_call_center_sk,cs_sold_date_sk,w_warehouse_name,sm_type] + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] + Project [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..18c9bd3425 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_iceberg_compat/explain.txt @@ -0,0 +1,187 @@ +== Physical Plan == +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.warehouse (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.ship_mode (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.call_center (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet spark_catalog.default.date_dim (22) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5] +Condition : (((isnotnull(cs_warehouse_sk#4) AND isnotnull(cs_ship_mode_sk#3)) AND isnotnull(cs_call_center_sk#2)) AND isnotnull(cs_ship_date_sk#1)) + +(4) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_warehouse_sk#4] +Right keys [1]: [w_warehouse_sk#6] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 5] +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7] +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(10) Scan parquet spark_catalog.default.ship_mode +Output [2]: [sm_ship_mode_sk#8, sm_type#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] + +(12) Filter [codegen id : 2] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Condition : isnotnull(sm_ship_mode_sk#8) + +(13) BroadcastExchange +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_ship_mode_sk#3] +Right keys [1]: [sm_ship_mode_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 5] +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] + +(16) Scan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#10, cc_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#10, cc_name#11] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#10, cc_name#11] +Condition : isnotnull(cc_call_center_sk#10) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#10, cc_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_call_center_sk#2] +Right keys [1]: [cc_call_center_sk#10] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 5] +Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11] +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_call_center_sk#10, cc_name#11] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_month_seq#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#12, d_month_seq#13] + +(24) Filter [codegen id : 4] +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#12] +Input [2]: [d_date_sk#12, d_month_seq#13] + +(26) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] +Input [6]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11, d_date_sk#12] + +(29) HashAggregate [codegen id : 5] +Input [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] +Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] + +(30) Exchange +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, cc_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(31) HashAggregate [codegen id : 6] +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] +Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29] +Results [8]: [_groupingexpression#14 AS substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25 AS 30 days #31, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26 AS 31 - 60 days #32, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27 AS 61 - 90 days #33, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28 AS 91 - 120 days #34, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29 AS >120 days #35] + +(32) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#30 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, cc_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..db126ff908 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.native_iceberg_compat/simplified.txt @@ -0,0 +1,48 @@ +TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (6) + HashAggregate [_groupingexpression,sm_type,cc_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] + InputAdapter + Exchange [_groupingexpression,sm_type,cc_name] #1 + WholeStageCodegen (5) + HashAggregate [_groupingexpression,sm_type,cc_name,cs_ship_date_sk,cs_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,cc_name,w_warehouse_name] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Project [cs_ship_date_sk,cs_sold_date_sk,w_warehouse_name,sm_type,cc_name] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [cs_ship_date_sk,cs_call_center_sk,cs_sold_date_sk,w_warehouse_name,sm_type] + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] + Project [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_datafusion/explain.txt new file mode 100644 index 0000000000..70584d3db8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_datafusion/explain.txt @@ -0,0 +1,204 @@ +== Physical Plan == +* ColumnarToRow (39) ++- CometTakeOrderedAndProject (38) + +- CometHashAggregate (37) + +- CometExchange (36) + +- CometHashAggregate (35) + +- CometProject (34) + +- CometBroadcastHashJoin (33) + :- CometProject (29) + : +- CometBroadcastHashJoin (28) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometBroadcastHashJoin (11) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : +- CometBroadcastExchange (10) + : : : +- CometProject (9) + : : : +- CometBroadcastHashJoin (8) + : : : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (7) + : : : +- CometProject (6) + : : : +- CometFilter (5) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : +- CometBroadcastExchange (21) + : : +- CometUnion (20) + : : :- CometProject (15) + : : : +- CometBroadcastHashJoin (14) + : : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (12) + : : : +- ReusedExchange (13) + : : +- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (16) + : : +- ReusedExchange (17) + : +- CometBroadcastExchange (27) + : +- CometProject (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (24) + +- CometBroadcastExchange (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(2) CometFilter +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Arguments: [ss_customer_sk#4, ss_sold_date_sk#5] + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Arguments: [d_date_sk#6, d_year#7, d_moy#8] + +(5) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2002)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 7)) AND isnotnull(d_date_sk#6)) + +(6) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#4], [ss_customer_sk#4] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: [ss_customer_sk#4] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [ss_customer_sk#4] +Arguments: [c_customer_sk#1], [ss_customer_sk#4], LeftSemi, BuildRight + +(12) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Arguments: [ws_bill_customer_sk#9, ws_sold_date_sk#10] + +(13) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#11] + +(14) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Right output [1]: [d_date_sk#11] +Arguments: [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(15) CometProject +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] +Arguments: [customer_sk#12], [ws_bill_customer_sk#9 AS customer_sk#12] + +(16) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Arguments: [cs_ship_customer_sk#13, cs_sold_date_sk#14] + +(17) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#15] + +(18) CometBroadcastHashJoin +Left output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Right output [1]: [d_date_sk#15] +Arguments: [cs_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + +(19) CometProject +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] +Arguments: [customer_sk#16], [cs_ship_customer_sk#13 AS customer_sk#16] + +(20) CometUnion +Child 0 Input [1]: [customer_sk#12] +Child 1 Input [1]: [customer_sk#16] + +(21) CometBroadcastExchange +Input [1]: [customer_sk#12] +Arguments: [customer_sk#12] + +(22) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [customer_sk#12] +Arguments: [c_customer_sk#1], [customer_sk#12], LeftSemi, BuildRight + +(23) CometProject +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_current_cdemo_sk#2, c_current_addr_sk#3], [c_current_cdemo_sk#2, c_current_addr_sk#3] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#17, ca_county#18] +Arguments: [ca_address_sk#17, ca_county#18] + +(25) CometFilter +Input [2]: [ca_address_sk#17, ca_county#18] +Condition : (ca_county#18 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#17)) + +(26) CometProject +Input [2]: [ca_address_sk#17, ca_county#18] +Arguments: [ca_address_sk#17], [ca_address_sk#17] + +(27) CometBroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: [ca_address_sk#17] + +(28) CometBroadcastHashJoin +Left output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [ca_address_sk#17] +Arguments: [c_current_addr_sk#3], [ca_address_sk#17], Inner, BuildRight + +(29) CometProject +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17] +Arguments: [c_current_cdemo_sk#2], [c_current_cdemo_sk#2] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(31) CometFilter +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#19) + +(32) CometBroadcastExchange +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(33) CometBroadcastHashJoin +Left output [1]: [c_current_cdemo_sk#2] +Right output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: [c_current_cdemo_sk#2], [cd_demo_sk#19], Inner, BuildRight + +(34) CometProject +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(35) CometHashAggregate +Input [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [partial_count(1)] + +(36) CometExchange +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#28] +Arguments: hashpartitioning(cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(37) CometHashAggregate +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#28] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [count(1)] + +(38) CometTakeOrderedAndProject +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#29, cd_purchase_estimate#23, cnt2#30, cd_credit_rating#24, cnt3#31, cd_dep_count#25, cnt4#32, cd_dep_employed_count#26, cnt5#33, cd_dep_college_count#27, cnt6#34] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[cd_gender#20 ASC NULLS FIRST,cd_marital_status#21 ASC NULLS FIRST,cd_education_status#22 ASC NULLS FIRST,cd_purchase_estimate#23 ASC NULLS FIRST,cd_credit_rating#24 ASC NULLS FIRST,cd_dep_count#25 ASC NULLS FIRST,cd_dep_employed_count#26 ASC NULLS FIRST,cd_dep_college_count#27 ASC NULLS FIRST], output=[cd_gender#20,cd_marital_status#21,cd_education_status#22,cnt1#29,cd_purchase_estimate#23,cnt2#30,cd_credit_rating#24,cnt3#31,cd_dep_count#25,cnt4#32,cd_dep_employed_count#26,cnt5#33,cd_dep_college_count#27,cnt6#34]), [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#29, cd_purchase_estimate#23, cnt2#30, cd_credit_rating#24, cnt3#31, cd_dep_count#25, cnt4#32, cd_dep_employed_count#26, cnt5#33, cd_dep_college_count#27, cnt6#34], 100, [cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_education_status#22 ASC NULLS FIRST, cd_purchase_estimate#23 ASC NULLS FIRST, cd_credit_rating#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#29, cd_purchase_estimate#23, cnt2#30, cd_credit_rating#24, cnt3#31, cd_dep_count#25, cnt4#32, cd_dep_employed_count#26, cnt5#33, cd_dep_college_count#27, cnt6#34] + +(39) ColumnarToRow [codegen id : 1] +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#29, cd_purchase_estimate#23, cnt2#30, cd_credit_rating#24, cnt3#31, cd_dep_count#25, cnt4#32, cd_dep_employed_count#26, cnt5#33, cd_dep_college_count#27, cnt6#34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..291f8cde65 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_datafusion/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cnt1,cd_purchase_estimate,cnt2,cd_credit_rating,cnt3,cd_dep_count,cnt4,cd_dep_employed_count,cnt5,cd_dep_college_count,cnt6] + CometHashAggregate [cd_gender,cd_marital_status,cd_education_status,cnt1,cd_purchase_estimate,cnt2,cd_credit_rating,cnt3,cd_dep_count,cnt4,cd_dep_employed_count,cnt5,cd_dep_college_count,cnt6,count,count(1)] + CometExchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + CometHashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count] + CometProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometBroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometProject [c_current_cdemo_sk] + CometBroadcastHashJoin [c_current_cdemo_sk,c_current_addr_sk,ca_address_sk] + CometProject [c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,customer_sk] + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [customer_sk] #4 + CometUnion [customer_sk] + CometProject [ws_bill_customer_sk] [customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + CometProject [cs_ship_customer_sk] [customer_sk] + CometBroadcastHashJoin [cs_ship_customer_sk,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_customer_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [ca_address_sk] #5 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_county] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_county] + CometBroadcastExchange [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #6 + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..b9230f9e7c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_iceberg_compat/explain.txt @@ -0,0 +1,225 @@ +== Physical Plan == +* ColumnarToRow (39) ++- CometTakeOrderedAndProject (38) + +- CometHashAggregate (37) + +- CometExchange (36) + +- CometHashAggregate (35) + +- CometProject (34) + +- CometBroadcastHashJoin (33) + :- CometProject (29) + : +- CometBroadcastHashJoin (28) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometBroadcastHashJoin (11) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : +- CometBroadcastExchange (10) + : : : +- CometProject (9) + : : : +- CometBroadcastHashJoin (8) + : : : :- CometScan parquet spark_catalog.default.store_sales (3) + : : : +- CometBroadcastExchange (7) + : : : +- CometProject (6) + : : : +- CometFilter (5) + : : : +- CometScan parquet spark_catalog.default.date_dim (4) + : : +- CometBroadcastExchange (21) + : : +- CometUnion (20) + : : :- CometProject (15) + : : : +- CometBroadcastHashJoin (14) + : : : :- CometScan parquet spark_catalog.default.web_sales (12) + : : : +- ReusedExchange (13) + : : +- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometScan parquet spark_catalog.default.catalog_sales (16) + : : +- ReusedExchange (17) + : +- CometBroadcastExchange (27) + : +- CometProject (26) + : +- CometFilter (25) + : +- CometScan parquet spark_catalog.default.customer_address (24) + +- CometBroadcastExchange (32) + +- CometFilter (31) + +- CometScan parquet spark_catalog.default.customer_demographics (30) + + +(1) CometScan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +ReadSchema: struct + +(4) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2002)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 7)) AND isnotnull(d_date_sk#6)) + +(6) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#4], [ss_customer_sk#4] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: [ss_customer_sk#4] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [ss_customer_sk#4] +Arguments: [c_customer_sk#1], [ss_customer_sk#4], LeftSemi, BuildRight + +(12) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#10)] +ReadSchema: struct + +(13) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#11] + +(14) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Right output [1]: [d_date_sk#11] +Arguments: [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(15) CometProject +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] +Arguments: [customer_sk#12], [ws_bill_customer_sk#9 AS customer_sk#12] + +(16) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#14)] +ReadSchema: struct + +(17) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#15] + +(18) CometBroadcastHashJoin +Left output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Right output [1]: [d_date_sk#15] +Arguments: [cs_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + +(19) CometProject +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] +Arguments: [customer_sk#16], [cs_ship_customer_sk#13 AS customer_sk#16] + +(20) CometUnion +Child 0 Input [1]: [customer_sk#12] +Child 1 Input [1]: [customer_sk#16] + +(21) CometBroadcastExchange +Input [1]: [customer_sk#12] +Arguments: [customer_sk#12] + +(22) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [customer_sk#12] +Arguments: [c_customer_sk#1], [customer_sk#12], LeftSemi, BuildRight + +(23) CometProject +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_current_cdemo_sk#2, c_current_addr_sk#3], [c_current_cdemo_sk#2, c_current_addr_sk#3] + +(24) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_county#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(25) CometFilter +Input [2]: [ca_address_sk#17, ca_county#18] +Condition : (ca_county#18 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#17)) + +(26) CometProject +Input [2]: [ca_address_sk#17, ca_county#18] +Arguments: [ca_address_sk#17], [ca_address_sk#17] + +(27) CometBroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: [ca_address_sk#17] + +(28) CometBroadcastHashJoin +Left output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [ca_address_sk#17] +Arguments: [c_current_addr_sk#3], [ca_address_sk#17], Inner, BuildRight + +(29) CometProject +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17] +Arguments: [c_current_cdemo_sk#2], [c_current_cdemo_sk#2] + +(30) CometScan parquet spark_catalog.default.customer_demographics +Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(31) CometFilter +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#19) + +(32) CometBroadcastExchange +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(33) CometBroadcastHashJoin +Left output [1]: [c_current_cdemo_sk#2] +Right output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: [c_current_cdemo_sk#2], [cd_demo_sk#19], Inner, BuildRight + +(34) CometProject +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(35) CometHashAggregate +Input [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [partial_count(1)] + +(36) CometExchange +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#28] +Arguments: hashpartitioning(cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(37) CometHashAggregate +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#28] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [count(1)] + +(38) CometTakeOrderedAndProject +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#29, cd_purchase_estimate#23, cnt2#30, cd_credit_rating#24, cnt3#31, cd_dep_count#25, cnt4#32, cd_dep_employed_count#26, cnt5#33, cd_dep_college_count#27, cnt6#34] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[cd_gender#20 ASC NULLS FIRST,cd_marital_status#21 ASC NULLS FIRST,cd_education_status#22 ASC NULLS FIRST,cd_purchase_estimate#23 ASC NULLS FIRST,cd_credit_rating#24 ASC NULLS FIRST,cd_dep_count#25 ASC NULLS FIRST,cd_dep_employed_count#26 ASC NULLS FIRST,cd_dep_college_count#27 ASC NULLS FIRST], output=[cd_gender#20,cd_marital_status#21,cd_education_status#22,cnt1#29,cd_purchase_estimate#23,cnt2#30,cd_credit_rating#24,cnt3#31,cd_dep_count#25,cnt4#32,cd_dep_employed_count#26,cnt5#33,cd_dep_college_count#27,cnt6#34]), [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#29, cd_purchase_estimate#23, cnt2#30, cd_credit_rating#24, cnt3#31, cd_dep_count#25, cnt4#32, cd_dep_employed_count#26, cnt5#33, cd_dep_college_count#27, cnt6#34], 100, [cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_education_status#22 ASC NULLS FIRST, cd_purchase_estimate#23 ASC NULLS FIRST, cd_credit_rating#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#29, cd_purchase_estimate#23, cnt2#30, cd_credit_rating#24, cnt3#31, cd_dep_count#25, cnt4#32, cd_dep_employed_count#26, cnt5#33, cd_dep_college_count#27, cnt6#34] + +(39) ColumnarToRow [codegen id : 1] +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#29, cd_purchase_estimate#23, cnt2#30, cd_credit_rating#24, cnt3#31, cd_dep_count#25, cnt4#32, cd_dep_employed_count#26, cnt5#33, cd_dep_college_count#27, cnt6#34] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..6496c88347 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q10a.native_iceberg_compat/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cnt1,cd_purchase_estimate,cnt2,cd_credit_rating,cnt3,cd_dep_count,cnt4,cd_dep_employed_count,cnt5,cd_dep_college_count,cnt6] + CometHashAggregate [cd_gender,cd_marital_status,cd_education_status,cnt1,cd_purchase_estimate,cnt2,cd_credit_rating,cnt3,cd_dep_count,cnt4,cd_dep_employed_count,cnt5,cd_dep_college_count,cnt6,count,count(1)] + CometExchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + CometHashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count] + CometProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometBroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometProject [c_current_cdemo_sk] + CometBroadcastHashJoin [c_current_cdemo_sk,c_current_addr_sk,ca_address_sk] + CometProject [c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,customer_sk] + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [customer_sk] #4 + CometUnion [customer_sk] + CometProject [ws_bill_customer_sk] [customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + CometProject [cs_ship_customer_sk] [customer_sk] + CometBroadcastHashJoin [cs_ship_customer_sk,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [ca_address_sk] #5 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_county] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county] + CometBroadcastExchange [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #6 + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_datafusion/explain.txt new file mode 100644 index 0000000000..81a4d45adf --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_datafusion/explain.txt @@ -0,0 +1,326 @@ +== Physical Plan == +* ColumnarToRow (62) ++- CometTakeOrderedAndProject (61) + +- CometProject (60) + +- CometBroadcastHashJoin (59) + :- CometProject (46) + : +- CometBroadcastHashJoin (45) + : :- CometBroadcastHashJoin (31) + : : :- CometFilter (16) + : : : +- CometHashAggregate (15) + : : : +- CometExchange (14) + : : : +- CometHashAggregate (13) + : : : +- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (30) + : : +- CometHashAggregate (29) + : : +- CometExchange (28) + : : +- CometHashAggregate (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometProject (21) + : : : +- CometBroadcastHashJoin (20) + : : : :- CometFilter (18) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (17) + : : : +- ReusedExchange (19) + : : +- CometBroadcastExchange (24) + : : +- CometFilter (23) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (22) + : +- CometBroadcastExchange (44) + : +- CometFilter (43) + : +- CometHashAggregate (42) + : +- CometExchange (41) + : +- CometHashAggregate (40) + : +- CometProject (39) + : +- CometBroadcastHashJoin (38) + : :- CometProject (36) + : : +- CometBroadcastHashJoin (35) + : : :- CometFilter (33) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (32) + : : +- ReusedExchange (34) + : +- ReusedExchange (37) + +- CometBroadcastExchange (58) + +- CometHashAggregate (57) + +- CometExchange (56) + +- CometHashAggregate (55) + +- CometProject (54) + +- CometBroadcastHashJoin (53) + :- CometProject (51) + : +- CometBroadcastHashJoin (50) + : :- CometFilter (48) + : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (47) + : +- ReusedExchange (49) + +- ReusedExchange (52) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Arguments: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(2) CometFilter +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(4) CometFilter +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_customer_sk#9) + +(5) CometBroadcastExchange +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(6) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Right output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [c_customer_sk#1], [ss_customer_sk#9], Inner, BuildRight + +(7) CometProject +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(9) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(11) CometBroadcastHashJoin +Left output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Right output [2]: [d_date_sk#13, d_year#14] +Arguments: [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] + +(13) CometHashAggregate +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] + +(14) CometExchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#15] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] + +(16) CometFilter +Input [2]: [customer_id#16, year_total#17] +Condition : (isnotnull(year_total#17) AND (year_total#17 > 0.00)) + +(17) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Arguments: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + +(18) CometFilter +Input [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Condition : (isnotnull(c_customer_sk#18) AND isnotnull(c_customer_id#19)) + +(19) ReusedExchange [Reuses operator id: 5] +Output [4]: [ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] + +(20) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Right output [4]: [ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Arguments: [c_customer_sk#18], [ss_customer_sk#26], Inner, BuildRight + +(21) CometProject +Input [12]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Arguments: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29], [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] + +(22) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#30, d_year#31] +Arguments: [d_date_sk#30, d_year#31] + +(23) CometFilter +Input [2]: [d_date_sk#30, d_year#31] +Condition : ((isnotnull(d_year#31) AND (d_year#31 = 2002)) AND isnotnull(d_date_sk#30)) + +(24) CometBroadcastExchange +Input [2]: [d_date_sk#30, d_year#31] +Arguments: [d_date_sk#30, d_year#31] + +(25) CometBroadcastHashJoin +Left output [10]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Right output [2]: [d_date_sk#30, d_year#31] +Arguments: [ss_sold_date_sk#29], [d_date_sk#30], Inner, BuildRight + +(26) CometProject +Input [12]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29, d_date_sk#30, d_year#31] +Arguments: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31], [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31] + +(27) CometHashAggregate +Input [10]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31] +Keys [8]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#28 - ss_ext_discount_amt#27)))] + +(28) CometExchange +Input [9]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, sum#32] +Arguments: hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [9]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, sum#32] +Keys [8]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#28 - ss_ext_discount_amt#27)))] + +(30) CometBroadcastExchange +Input [5]: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37] +Arguments: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37] + +(31) CometBroadcastHashJoin +Left output [2]: [customer_id#16, year_total#17] +Right output [5]: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37] +Arguments: [customer_id#16], [customer_id#33], Inner, BuildRight + +(32) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#38, c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45] +Arguments: [c_customer_sk#38, c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45] + +(33) CometFilter +Input [8]: [c_customer_sk#38, c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45] +Condition : (isnotnull(c_customer_sk#38) AND isnotnull(c_customer_id#39)) + +(34) ReusedExchange [Reuses operator id: 5] +Output [4]: [ws_bill_customer_sk#46, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] + +(35) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#38, c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45] +Right output [4]: [ws_bill_customer_sk#46, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] +Arguments: [c_customer_sk#38], [ws_bill_customer_sk#46], Inner, BuildRight + +(36) CometProject +Input [12]: [c_customer_sk#38, c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_bill_customer_sk#46, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] +Arguments: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49], [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] + +(37) ReusedExchange [Reuses operator id: 10] +Output [2]: [d_date_sk#50, d_year#51] + +(38) CometBroadcastHashJoin +Left output [10]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] +Right output [2]: [d_date_sk#50, d_year#51] +Arguments: [ws_sold_date_sk#49], [d_date_sk#50], Inner, BuildRight + +(39) CometProject +Input [12]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49, d_date_sk#50, d_year#51] +Arguments: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, d_year#51], [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, d_year#51] + +(40) CometHashAggregate +Input [10]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, d_year#51] +Keys [8]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, d_year#51] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#48 - ws_ext_discount_amt#47)))] + +(41) CometExchange +Input [9]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, d_year#51, sum#52] +Arguments: hashpartitioning(c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, d_year#51, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(42) CometHashAggregate +Input [9]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, d_year#51, sum#52] +Keys [8]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, d_year#51] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#48 - ws_ext_discount_amt#47)))] + +(43) CometFilter +Input [2]: [customer_id#53, year_total#54] +Condition : (isnotnull(year_total#54) AND (year_total#54 > 0.00)) + +(44) CometBroadcastExchange +Input [2]: [customer_id#53, year_total#54] +Arguments: [customer_id#53, year_total#54] + +(45) CometBroadcastHashJoin +Left output [7]: [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37] +Right output [2]: [customer_id#53, year_total#54] +Arguments: [customer_id#16], [customer_id#53], Inner, BuildRight + +(46) CometProject +Input [9]: [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37, customer_id#53, year_total#54] +Arguments: [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37, year_total#54], [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37, year_total#54] + +(47) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [8]: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62] +Arguments: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62] + +(48) CometFilter +Input [8]: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62] +Condition : (isnotnull(c_customer_sk#55) AND isnotnull(c_customer_id#56)) + +(49) ReusedExchange [Reuses operator id: 5] +Output [4]: [ws_bill_customer_sk#63, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66] + +(50) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62] +Right output [4]: [ws_bill_customer_sk#63, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66] +Arguments: [c_customer_sk#55], [ws_bill_customer_sk#63], Inner, BuildRight + +(51) CometProject +Input [12]: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_bill_customer_sk#63, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66] +Arguments: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66], [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66] + +(52) ReusedExchange [Reuses operator id: 24] +Output [2]: [d_date_sk#67, d_year#68] + +(53) CometBroadcastHashJoin +Left output [10]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66] +Right output [2]: [d_date_sk#67, d_year#68] +Arguments: [ws_sold_date_sk#66], [d_date_sk#67], Inner, BuildRight + +(54) CometProject +Input [12]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66, d_date_sk#67, d_year#68] +Arguments: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, d_year#68], [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, d_year#68] + +(55) CometHashAggregate +Input [10]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, d_year#68] +Keys [8]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#68] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#65 - ws_ext_discount_amt#64)))] + +(56) CometExchange +Input [9]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#68, sum#69] +Arguments: hashpartitioning(c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#68, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(57) CometHashAggregate +Input [9]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#68, sum#69] +Keys [8]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#68] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#65 - ws_ext_discount_amt#64)))] + +(58) CometBroadcastExchange +Input [2]: [customer_id#70, year_total#71] +Arguments: [customer_id#70, year_total#71] + +(59) CometBroadcastHashJoin +Left output [8]: [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37, year_total#54] +Right output [2]: [customer_id#70, year_total#71] +Arguments: [customer_id#16], [customer_id#70], Inner, (CASE WHEN (year_total#54 > 0.00) THEN (year_total#71 / year_total#54) ELSE 0E-20 END > CASE WHEN (year_total#17 > 0.00) THEN (year_total#37 / year_total#17) ELSE 0E-20 END), BuildRight + +(60) CometProject +Input [10]: [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37, year_total#54, customer_id#70, year_total#71] +Arguments: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36], [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36] + +(61) CometTakeOrderedAndProject +Input [4]: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[customer_id#33 ASC NULLS FIRST,customer_first_name#34 ASC NULLS FIRST,customer_last_name#35 ASC NULLS FIRST,customer_email_address#36 ASC NULLS FIRST], output=[customer_id#33,customer_first_name#34,customer_last_name#35,customer_email_address#36]), [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36], 100, [customer_id#33 ASC NULLS FIRST, customer_first_name#34 ASC NULLS FIRST, customer_last_name#35 ASC NULLS FIRST, customer_email_address#36 ASC NULLS FIRST], [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36] + +(62) ColumnarToRow [codegen id : 1] +Input [4]: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_datafusion/simplified.txt new file mode 100644 index 0000000000..42f117f1cb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_datafusion/simplified.txt @@ -0,0 +1,64 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,customer_email_address] + CometProject [customer_id,customer_first_name,customer_last_name,customer_email_address] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,customer_id,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_email_address,year_total] + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,ss_ext_list_price,ss_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [customer_id,customer_first_name,customer_last_name,customer_email_address,year_total] #4 + CometHashAggregate [customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,ss_ext_list_price,ss_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [customer_id,year_total] #7 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,ws_ext_list_price,ws_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #2 + ReusedExchange [d_date_sk,d_year] #3 + CometBroadcastExchange [customer_id,year_total] #9 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #10 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,ws_ext_list_price,ws_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #2 + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..366abc6216 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_iceberg_compat/explain.txt @@ -0,0 +1,363 @@ +== Physical Plan == +* ColumnarToRow (64) ++- CometTakeOrderedAndProject (63) + +- CometProject (62) + +- CometBroadcastHashJoin (61) + :- CometProject (48) + : +- CometBroadcastHashJoin (47) + : :- CometBroadcastHashJoin (31) + : : :- CometFilter (16) + : : : +- CometHashAggregate (15) + : : : +- CometExchange (14) + : : : +- CometHashAggregate (13) + : : : +- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.store_sales (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (30) + : : +- CometHashAggregate (29) + : : +- CometExchange (28) + : : +- CometHashAggregate (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometProject (21) + : : : +- CometBroadcastHashJoin (20) + : : : :- CometFilter (18) + : : : : +- CometScan parquet spark_catalog.default.customer (17) + : : : +- ReusedExchange (19) + : : +- CometBroadcastExchange (24) + : : +- CometFilter (23) + : : +- CometScan parquet spark_catalog.default.date_dim (22) + : +- CometBroadcastExchange (46) + : +- CometFilter (45) + : +- CometHashAggregate (44) + : +- CometExchange (43) + : +- CometHashAggregate (42) + : +- CometProject (41) + : +- CometBroadcastHashJoin (40) + : :- CometProject (38) + : : +- CometBroadcastHashJoin (37) + : : :- CometFilter (33) + : : : +- CometScan parquet spark_catalog.default.customer (32) + : : +- CometBroadcastExchange (36) + : : +- CometFilter (35) + : : +- CometScan parquet spark_catalog.default.web_sales (34) + : +- ReusedExchange (39) + +- CometBroadcastExchange (60) + +- CometHashAggregate (59) + +- CometExchange (58) + +- CometHashAggregate (57) + +- CometProject (56) + +- CometBroadcastHashJoin (55) + :- CometProject (53) + : +- CometBroadcastHashJoin (52) + : :- CometFilter (50) + : : +- CometScan parquet spark_catalog.default.customer (49) + : +- ReusedExchange (51) + +- ReusedExchange (54) + + +(1) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_customer_sk#9) + +(5) CometBroadcastExchange +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(6) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Right output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [c_customer_sk#1], [ss_customer_sk#9], Inner, BuildRight + +(7) CometProject +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(11) CometBroadcastHashJoin +Left output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Right output [2]: [d_date_sk#13, d_year#14] +Arguments: [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(12) CometProject +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14], [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] + +(13) CometHashAggregate +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] + +(14) CometExchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#15] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] + +(16) CometFilter +Input [2]: [customer_id#16, year_total#17] +Condition : (isnotnull(year_total#17) AND (year_total#17 > 0.00)) + +(17) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(18) CometFilter +Input [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Condition : (isnotnull(c_customer_sk#18) AND isnotnull(c_customer_id#19)) + +(19) ReusedExchange [Reuses operator id: 5] +Output [4]: [ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] + +(20) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Right output [4]: [ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Arguments: [c_customer_sk#18], [ss_customer_sk#26], Inner, BuildRight + +(21) CometProject +Input [12]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_customer_sk#26, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Arguments: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29], [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] + +(22) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#30, d_year#31] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) CometFilter +Input [2]: [d_date_sk#30, d_year#31] +Condition : ((isnotnull(d_year#31) AND (d_year#31 = 2002)) AND isnotnull(d_date_sk#30)) + +(24) CometBroadcastExchange +Input [2]: [d_date_sk#30, d_year#31] +Arguments: [d_date_sk#30, d_year#31] + +(25) CometBroadcastHashJoin +Left output [10]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29] +Right output [2]: [d_date_sk#30, d_year#31] +Arguments: [ss_sold_date_sk#29], [d_date_sk#30], Inner, BuildRight + +(26) CometProject +Input [12]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, ss_sold_date_sk#29, d_date_sk#30, d_year#31] +Arguments: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31], [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31] + +(27) CometHashAggregate +Input [10]: [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#27, ss_ext_list_price#28, d_year#31] +Keys [8]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#28 - ss_ext_discount_amt#27)))] + +(28) CometExchange +Input [9]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, sum#32] +Arguments: hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [9]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, sum#32] +Keys [8]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#31, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#28 - ss_ext_discount_amt#27)))] + +(30) CometBroadcastExchange +Input [5]: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37] +Arguments: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37] + +(31) CometBroadcastHashJoin +Left output [2]: [customer_id#16, year_total#17] +Right output [5]: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37] +Arguments: [customer_id#16], [customer_id#33], Inner, BuildRight + +(32) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#38, c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(33) CometFilter +Input [8]: [c_customer_sk#38, c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45] +Condition : (isnotnull(c_customer_sk#38) AND isnotnull(c_customer_id#39)) + +(34) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_bill_customer_sk#46, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#49)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(35) CometFilter +Input [4]: [ws_bill_customer_sk#46, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] +Condition : isnotnull(ws_bill_customer_sk#46) + +(36) CometBroadcastExchange +Input [4]: [ws_bill_customer_sk#46, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] +Arguments: [ws_bill_customer_sk#46, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] + +(37) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#38, c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45] +Right output [4]: [ws_bill_customer_sk#46, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] +Arguments: [c_customer_sk#38], [ws_bill_customer_sk#46], Inner, BuildRight + +(38) CometProject +Input [12]: [c_customer_sk#38, c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_bill_customer_sk#46, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] +Arguments: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49], [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] + +(39) ReusedExchange [Reuses operator id: 10] +Output [2]: [d_date_sk#50, d_year#51] + +(40) CometBroadcastHashJoin +Left output [10]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49] +Right output [2]: [d_date_sk#50, d_year#51] +Arguments: [ws_sold_date_sk#49], [d_date_sk#50], Inner, BuildRight + +(41) CometProject +Input [12]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, ws_sold_date_sk#49, d_date_sk#50, d_year#51] +Arguments: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, d_year#51], [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, d_year#51] + +(42) CometHashAggregate +Input [10]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, ws_ext_discount_amt#47, ws_ext_list_price#48, d_year#51] +Keys [8]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, d_year#51] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#48 - ws_ext_discount_amt#47)))] + +(43) CometExchange +Input [9]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, d_year#51, sum#52] +Arguments: hashpartitioning(c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, d_year#51, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(44) CometHashAggregate +Input [9]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, d_year#51, sum#52] +Keys [8]: [c_customer_id#39, c_first_name#40, c_last_name#41, c_preferred_cust_flag#42, c_birth_country#43, c_login#44, c_email_address#45, d_year#51] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#48 - ws_ext_discount_amt#47)))] + +(45) CometFilter +Input [2]: [customer_id#53, year_total#54] +Condition : (isnotnull(year_total#54) AND (year_total#54 > 0.00)) + +(46) CometBroadcastExchange +Input [2]: [customer_id#53, year_total#54] +Arguments: [customer_id#53, year_total#54] + +(47) CometBroadcastHashJoin +Left output [7]: [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37] +Right output [2]: [customer_id#53, year_total#54] +Arguments: [customer_id#16], [customer_id#53], Inner, BuildRight + +(48) CometProject +Input [9]: [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37, customer_id#53, year_total#54] +Arguments: [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37, year_total#54], [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37, year_total#54] + +(49) CometScan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(50) CometFilter +Input [8]: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62] +Condition : (isnotnull(c_customer_sk#55) AND isnotnull(c_customer_id#56)) + +(51) ReusedExchange [Reuses operator id: 36] +Output [4]: [ws_bill_customer_sk#63, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66] + +(52) CometBroadcastHashJoin +Left output [8]: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62] +Right output [4]: [ws_bill_customer_sk#63, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66] +Arguments: [c_customer_sk#55], [ws_bill_customer_sk#63], Inner, BuildRight + +(53) CometProject +Input [12]: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_bill_customer_sk#63, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66] +Arguments: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66], [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66] + +(54) ReusedExchange [Reuses operator id: 24] +Output [2]: [d_date_sk#67, d_year#68] + +(55) CometBroadcastHashJoin +Left output [10]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66] +Right output [2]: [d_date_sk#67, d_year#68] +Arguments: [ws_sold_date_sk#66], [d_date_sk#67], Inner, BuildRight + +(56) CometProject +Input [12]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, ws_sold_date_sk#66, d_date_sk#67, d_year#68] +Arguments: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, d_year#68], [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, d_year#68] + +(57) CometHashAggregate +Input [10]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#64, ws_ext_list_price#65, d_year#68] +Keys [8]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#68] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#65 - ws_ext_discount_amt#64)))] + +(58) CometExchange +Input [9]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#68, sum#69] +Arguments: hashpartitioning(c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#68, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(59) CometHashAggregate +Input [9]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#68, sum#69] +Keys [8]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#68] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#65 - ws_ext_discount_amt#64)))] + +(60) CometBroadcastExchange +Input [2]: [customer_id#70, year_total#71] +Arguments: [customer_id#70, year_total#71] + +(61) CometBroadcastHashJoin +Left output [8]: [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37, year_total#54] +Right output [2]: [customer_id#70, year_total#71] +Arguments: [customer_id#16], [customer_id#70], Inner, (CASE WHEN (year_total#54 > 0.00) THEN (year_total#71 / year_total#54) ELSE 0E-20 END > CASE WHEN (year_total#17 > 0.00) THEN (year_total#37 / year_total#17) ELSE 0E-20 END), BuildRight + +(62) CometProject +Input [10]: [customer_id#16, year_total#17, customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36, year_total#37, year_total#54, customer_id#70, year_total#71] +Arguments: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36], [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36] + +(63) CometTakeOrderedAndProject +Input [4]: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[customer_id#33 ASC NULLS FIRST,customer_first_name#34 ASC NULLS FIRST,customer_last_name#35 ASC NULLS FIRST,customer_email_address#36 ASC NULLS FIRST], output=[customer_id#33,customer_first_name#34,customer_last_name#35,customer_email_address#36]), [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36], 100, [customer_id#33 ASC NULLS FIRST, customer_first_name#34 ASC NULLS FIRST, customer_last_name#35 ASC NULLS FIRST, customer_email_address#36 ASC NULLS FIRST], [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36] + +(64) ColumnarToRow [codegen id : 1] +Input [4]: [customer_id#33, customer_first_name#34, customer_last_name#35, customer_email_address#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..4398ff4014 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q11.native_iceberg_compat/simplified.txt @@ -0,0 +1,66 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,customer_email_address] + CometProject [customer_id,customer_first_name,customer_last_name,customer_email_address] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,customer_id,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_email_address,year_total] + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,ss_ext_list_price,ss_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [customer_id,customer_first_name,customer_last_name,customer_email_address,year_total] #4 + CometHashAggregate [customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum,ss_ext_list_price,ss_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [customer_id,year_total] #7 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,ws_ext_list_price,ws_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometBroadcastExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 + CometFilter [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk,d_year] #3 + CometBroadcastExchange [customer_id,year_total] #10 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt)))] + CometExchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,ws_ext_list_price,ws_ext_discount_amt] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_datafusion/explain.txt new file mode 100644 index 0000000000..b5331eee1f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_datafusion/explain.txt @@ -0,0 +1,116 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ws_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [ws_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] + +(20) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, _we0#15] + +(22) TakeOrderedAndProject +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST], [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a438deae67 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_datafusion/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (2) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0] + CometExchange [i_class] #1 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #3 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..4309133cf0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_iceberg_compat/explain.txt @@ -0,0 +1,126 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.web_sales (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometScan parquet spark_catalog.default.item (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometScan parquet spark_catalog.default.date_dim (8) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ws_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [ws_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] + +(20) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, _we0#15] + +(22) TakeOrderedAndProject +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST], [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..ed8ba16b01 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q12.native_iceberg_compat/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (2) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0] + CometExchange [i_class] #1 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,sum,sum(UnscaledValue(ws_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,ws_ext_sales_price] + CometProject [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #3 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_datafusion/explain.txt new file mode 100644 index 0000000000..d424b863f8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_datafusion/explain.txt @@ -0,0 +1,542 @@ +== Physical Plan == +* ColumnarToRow (77) ++- CometTakeOrderedAndProject (76) + +- CometBroadcastHashJoin (75) + :- CometFilter (56) + : +- CometHashAggregate (55) + : +- CometExchange (54) + : +- CometHashAggregate (53) + : +- CometProject (52) + : +- CometBroadcastHashJoin (51) + : :- CometProject (46) + : : +- CometBroadcastHashJoin (45) + : : :- CometBroadcastHashJoin (39) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (38) + : : : +- CometProject (37) + : : : +- CometBroadcastHashJoin (36) + : : : :- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : : +- CometBroadcastExchange (35) + : : : +- CometBroadcastHashJoin (34) + : : : :- CometHashAggregate (32) + : : : : +- CometExchange (31) + : : : : +- CometHashAggregate (30) + : : : : +- CometProject (29) + : : : : +- CometBroadcastHashJoin (28) + : : : : :- CometProject (26) + : : : : : +- CometBroadcastHashJoin (25) + : : : : : :- CometFilter (6) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (5) + : : : : : +- CometBroadcastExchange (24) + : : : : : +- CometBroadcastHashJoin (23) + : : : : : :- CometFilter (8) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (7) + : : : : : +- CometBroadcastExchange (22) + : : : : : +- CometProject (21) + : : : : : +- CometBroadcastHashJoin (20) + : : : : : :- CometProject (15) + : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : :- CometFilter (10) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (9) + : : : : : : +- CometBroadcastExchange (13) + : : : : : : +- CometFilter (12) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (11) + : : : : : +- CometBroadcastExchange (19) + : : : : : +- CometProject (18) + : : : : : +- CometFilter (17) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (16) + : : : : +- ReusedExchange (27) + : : : +- ReusedExchange (33) + : : +- CometBroadcastExchange (44) + : : +- CometBroadcastHashJoin (43) + : : :- CometFilter (41) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (40) + : : +- ReusedExchange (42) + : +- CometBroadcastExchange (50) + : +- CometProject (49) + : +- CometFilter (48) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (47) + +- CometBroadcastExchange (74) + +- CometFilter (73) + +- CometHashAggregate (72) + +- CometExchange (71) + +- CometHashAggregate (70) + +- CometProject (69) + +- CometBroadcastHashJoin (68) + :- CometProject (63) + : +- CometBroadcastHashJoin (62) + : :- CometBroadcastHashJoin (60) + : : :- CometFilter (58) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (57) + : : +- ReusedExchange (59) + : +- ReusedExchange (61) + +- CometBroadcastExchange (67) + +- CometProject (66) + +- CometFilter (65) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (64) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(4) CometFilter +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(5) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Arguments: [ss_item_sk#9, ss_sold_date_sk#10] + +(6) CometFilter +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(7) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(8) CometFilter +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(9) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Arguments: [cs_item_sk#15, cs_sold_date_sk#16] + +(10) CometFilter +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(11) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(12) CometFilter +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(13) CometBroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(14) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Right output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_item_sk#15], [i_item_sk#17], Inner, BuildRight + +(15) CometProject +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20], [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] + +(16) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21, d_year#22] + +(17) CometFilter +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1998)) AND (d_year#22 <= 2000)) AND isnotnull(d_date_sk#21)) + +(18) CometProject +Input [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(19) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(20) CometBroadcastHashJoin +Left output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Right output [1]: [d_date_sk#21] +Arguments: [cs_sold_date_sk#16], [d_date_sk#21], Inner, BuildRight + +(21) CometProject +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20], [i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) CometBroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20] + +(23) CometBroadcastHashJoin +Left output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)], [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)], LeftSemi, BuildRight + +(24) CometBroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Right output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_item_sk#9], [i_item_sk#11], Inner, BuildRight + +(26) CometProject +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14], [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] + +(27) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#23] + +(28) CometBroadcastHashJoin +Left output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [1]: [d_date_sk#23] +Arguments: [ss_sold_date_sk#10], [d_date_sk#23], Inner, BuildRight + +(29) CometProject +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] +Arguments: [brand_id#24, class_id#25, category_id#26], [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] + +(30) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(31) CometExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(33) ReusedExchange [Reuses operator id: 22] +Output [3]: [i_brand_id#27, i_class_id#28, i_category_id#29] + +(34) CometBroadcastHashJoin +Left output [3]: [brand_id#24, class_id#25, category_id#26] +Right output [3]: [i_brand_id#27, i_class_id#28, i_category_id#29] +Arguments: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)], [coalesce(i_brand_id#27, 0), isnull(i_brand_id#27), coalesce(i_class_id#28, 0), isnull(i_class_id#28), coalesce(i_category_id#29, 0), isnull(i_category_id#29)], LeftSemi, BuildRight + +(35) CometBroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [brand_id#24, class_id#25, category_id#26] + +(36) CometBroadcastHashJoin +Left output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Right output [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [i_brand_id#6, i_class_id#7, i_category_id#8], [brand_id#24, class_id#25, category_id#26], Inner, BuildRight + +(37) CometProject +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] +Arguments: [ss_item_sk#30], [i_item_sk#5 AS ss_item_sk#30] + +(38) CometBroadcastExchange +Input [1]: [ss_item_sk#30] +Arguments: [ss_item_sk#30] + +(39) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [1]: [ss_item_sk#30] +Arguments: [ss_item_sk#1], [ss_item_sk#30], LeftSemi, BuildRight + +(40) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] + +(41) CometFilter +Input [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Condition : (((isnotnull(i_item_sk#31) AND isnotnull(i_brand_id#32)) AND isnotnull(i_class_id#33)) AND isnotnull(i_category_id#34)) + +(42) ReusedExchange [Reuses operator id: 38] +Output [1]: [ss_item_sk#30] + +(43) CometBroadcastHashJoin +Left output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Right output [1]: [ss_item_sk#30] +Arguments: [i_item_sk#31], [ss_item_sk#30], LeftSemi, BuildRight + +(44) CometBroadcastExchange +Input [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] + +(45) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [ss_item_sk#1], [i_item_sk#31], Inner, BuildRight + +(46) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34], [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34] + +(47) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#35, d_week_seq#36] +Arguments: [d_date_sk#35, d_week_seq#36] + +(48) CometFilter +Input [2]: [d_date_sk#35, d_week_seq#36] +Condition : ((isnotnull(d_week_seq#36) AND (d_week_seq#36 = Subquery scalar-subquery#37, [id=#38])) AND isnotnull(d_date_sk#35)) + +(49) CometProject +Input [2]: [d_date_sk#35, d_week_seq#36] +Arguments: [d_date_sk#35], [d_date_sk#35] + +(50) CometBroadcastExchange +Input [1]: [d_date_sk#35] +Arguments: [d_date_sk#35] + +(51) CometBroadcastHashJoin +Left output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34] +Right output [1]: [d_date_sk#35] +Arguments: [ss_sold_date_sk#4], [d_date_sk#35], Inner, BuildRight + +(52) CometProject +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34, d_date_sk#35] +Arguments: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34], [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] + +(53) CometHashAggregate +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] +Keys [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] + +(54) CometExchange +Input [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#39, isEmpty#40, count#41] +Arguments: hashpartitioning(i_brand_id#32, i_class_id#33, i_category_id#34, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(55) CometHashAggregate +Input [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#39, isEmpty#40, count#41] +Keys [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] + +(56) CometFilter +Input [6]: [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44] +Condition : (isnotnull(sales#43) AND (cast(sales#43 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) + +(57) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50] +Arguments: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50] + +(58) CometFilter +Input [4]: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50] +Condition : isnotnull(ss_item_sk#47) + +(59) ReusedExchange [Reuses operator id: 38] +Output [1]: [ss_item_sk#51] + +(60) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50] +Right output [1]: [ss_item_sk#51] +Arguments: [ss_item_sk#47], [ss_item_sk#51], LeftSemi, BuildRight + +(61) ReusedExchange [Reuses operator id: 44] +Output [4]: [i_item_sk#52, i_brand_id#53, i_class_id#54, i_category_id#55] + +(62) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50] +Right output [4]: [i_item_sk#52, i_brand_id#53, i_class_id#54, i_category_id#55] +Arguments: [ss_item_sk#47], [i_item_sk#52], Inner, BuildRight + +(63) CometProject +Input [8]: [ss_item_sk#47, ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50, i_item_sk#52, i_brand_id#53, i_class_id#54, i_category_id#55] +Arguments: [ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50, i_brand_id#53, i_class_id#54, i_category_id#55], [ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50, i_brand_id#53, i_class_id#54, i_category_id#55] + +(64) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#56, d_week_seq#57] +Arguments: [d_date_sk#56, d_week_seq#57] + +(65) CometFilter +Input [2]: [d_date_sk#56, d_week_seq#57] +Condition : ((isnotnull(d_week_seq#57) AND (d_week_seq#57 = Subquery scalar-subquery#58, [id=#59])) AND isnotnull(d_date_sk#56)) + +(66) CometProject +Input [2]: [d_date_sk#56, d_week_seq#57] +Arguments: [d_date_sk#56], [d_date_sk#56] + +(67) CometBroadcastExchange +Input [1]: [d_date_sk#56] +Arguments: [d_date_sk#56] + +(68) CometBroadcastHashJoin +Left output [6]: [ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50, i_brand_id#53, i_class_id#54, i_category_id#55] +Right output [1]: [d_date_sk#56] +Arguments: [ss_sold_date_sk#50], [d_date_sk#56], Inner, BuildRight + +(69) CometProject +Input [7]: [ss_quantity#48, ss_list_price#49, ss_sold_date_sk#50, i_brand_id#53, i_class_id#54, i_category_id#55, d_date_sk#56] +Arguments: [ss_quantity#48, ss_list_price#49, i_brand_id#53, i_class_id#54, i_category_id#55], [ss_quantity#48, ss_list_price#49, i_brand_id#53, i_class_id#54, i_category_id#55] + +(70) CometHashAggregate +Input [5]: [ss_quantity#48, ss_list_price#49, i_brand_id#53, i_class_id#54, i_category_id#55] +Keys [3]: [i_brand_id#53, i_class_id#54, i_category_id#55] +Functions [2]: [partial_sum((cast(ss_quantity#48 as decimal(10,0)) * ss_list_price#49)), partial_count(1)] + +(71) CometExchange +Input [6]: [i_brand_id#53, i_class_id#54, i_category_id#55, sum#60, isEmpty#61, count#62] +Arguments: hashpartitioning(i_brand_id#53, i_class_id#54, i_category_id#55, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(72) CometHashAggregate +Input [6]: [i_brand_id#53, i_class_id#54, i_category_id#55, sum#60, isEmpty#61, count#62] +Keys [3]: [i_brand_id#53, i_class_id#54, i_category_id#55] +Functions [2]: [sum((cast(ss_quantity#48 as decimal(10,0)) * ss_list_price#49)), count(1)] + +(73) CometFilter +Input [6]: [channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] +Condition : (isnotnull(sales#64) AND (cast(sales#64 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6)))) + +(74) CometBroadcastExchange +Input [6]: [channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] +Arguments: [channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] + +(75) CometBroadcastHashJoin +Left output [6]: [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44] +Right output [6]: [channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] +Arguments: [i_brand_id#32, i_class_id#33, i_category_id#34], [i_brand_id#53, i_class_id#54, i_category_id#55], Inner, BuildRight + +(76) CometTakeOrderedAndProject +Input [12]: [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44, channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_brand_id#32 ASC NULLS FIRST,i_class_id#33 ASC NULLS FIRST,i_category_id#34 ASC NULLS FIRST], output=[channel#42,i_brand_id#32,i_class_id#33,i_category_id#34,sales#43,number_sales#44,channel#63,i_brand_id#53,i_class_id#54,i_category_id#55,sales#64,number_sales#65]), [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44, channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65], 100, [i_brand_id#32 ASC NULLS FIRST, i_class_id#33 ASC NULLS FIRST, i_category_id#34 ASC NULLS FIRST], [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44, channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] + +(77) ColumnarToRow [codegen id : 1] +Input [12]: [channel#42, i_brand_id#32, i_class_id#33, i_category_id#34, sales#43, number_sales#44, channel#63, i_brand_id#53, i_class_id#54, i_category_id#55, sales#64, number_sales#65] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 56 Hosting Expression = Subquery scalar-subquery#45, [id=#46] +* ColumnarToRow (94) ++- CometHashAggregate (93) + +- CometExchange (92) + +- CometHashAggregate (91) + +- CometUnion (90) + :- CometProject (81) + : +- CometBroadcastHashJoin (80) + : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (78) + : +- ReusedExchange (79) + :- CometProject (85) + : +- CometBroadcastHashJoin (84) + : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (82) + : +- ReusedExchange (83) + +- CometProject (89) + +- CometBroadcastHashJoin (88) + :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (86) + +- ReusedExchange (87) + + +(78) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_quantity#66, ss_list_price#67, ss_sold_date_sk#68] +Arguments: [ss_quantity#66, ss_list_price#67, ss_sold_date_sk#68] + +(79) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#69] + +(80) CometBroadcastHashJoin +Left output [3]: [ss_quantity#66, ss_list_price#67, ss_sold_date_sk#68] +Right output [1]: [d_date_sk#69] +Arguments: [ss_sold_date_sk#68], [d_date_sk#69], Inner, BuildRight + +(81) CometProject +Input [4]: [ss_quantity#66, ss_list_price#67, ss_sold_date_sk#68, d_date_sk#69] +Arguments: [quantity#70, list_price#71], [ss_quantity#66 AS quantity#70, ss_list_price#67 AS list_price#71] + +(82) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_quantity#72, cs_list_price#73, cs_sold_date_sk#74] +Arguments: [cs_quantity#72, cs_list_price#73, cs_sold_date_sk#74] + +(83) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#75] + +(84) CometBroadcastHashJoin +Left output [3]: [cs_quantity#72, cs_list_price#73, cs_sold_date_sk#74] +Right output [1]: [d_date_sk#75] +Arguments: [cs_sold_date_sk#74], [d_date_sk#75], Inner, BuildRight + +(85) CometProject +Input [4]: [cs_quantity#72, cs_list_price#73, cs_sold_date_sk#74, d_date_sk#75] +Arguments: [quantity#76, list_price#77], [cs_quantity#72 AS quantity#76, cs_list_price#73 AS list_price#77] + +(86) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] +Arguments: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] + +(87) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#81] + +(88) CometBroadcastHashJoin +Left output [3]: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] +Right output [1]: [d_date_sk#81] +Arguments: [ws_sold_date_sk#80], [d_date_sk#81], Inner, BuildRight + +(89) CometProject +Input [4]: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80, d_date_sk#81] +Arguments: [quantity#82, list_price#83], [ws_quantity#78 AS quantity#82, ws_list_price#79 AS list_price#83] + +(90) CometUnion +Child 0 Input [2]: [quantity#70, list_price#71] +Child 1 Input [2]: [quantity#76, list_price#77] +Child 2 Input [2]: [quantity#82, list_price#83] + +(91) CometHashAggregate +Input [2]: [quantity#70, list_price#71] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#70 as decimal(10,0)) * list_price#71))] + +(92) CometExchange +Input [2]: [sum#84, count#85] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(93) CometHashAggregate +Input [2]: [sum#84, count#85] +Keys: [] +Functions [1]: [avg((cast(quantity#70 as decimal(10,0)) * list_price#71))] + +(94) ColumnarToRow [codegen id : 1] +Input [1]: [average_sales#86] + +Subquery:2 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#37, [id=#38] +* ColumnarToRow (98) ++- CometProject (97) + +- CometFilter (96) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (95) + + +(95) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [4]: [d_week_seq#87, d_year#88, d_moy#89, d_dom#90] +Arguments: [d_week_seq#87, d_year#88, d_moy#89, d_dom#90] + +(96) CometFilter +Input [4]: [d_week_seq#87, d_year#88, d_moy#89, d_dom#90] +Condition : (((((isnotnull(d_year#88) AND isnotnull(d_moy#89)) AND isnotnull(d_dom#90)) AND (d_year#88 = 1999)) AND (d_moy#89 = 12)) AND (d_dom#90 = 16)) + +(97) CometProject +Input [4]: [d_week_seq#87, d_year#88, d_moy#89, d_dom#90] +Arguments: [d_week_seq#87], [d_week_seq#87] + +(98) ColumnarToRow [codegen id : 1] +Input [1]: [d_week_seq#87] + +Subquery:3 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46] + +Subquery:4 Hosting operator id = 65 Hosting Expression = Subquery scalar-subquery#58, [id=#59] +* ColumnarToRow (102) ++- CometProject (101) + +- CometFilter (100) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (99) + + +(99) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Arguments: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] + +(100) CometFilter +Input [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Condition : (((((isnotnull(d_year#92) AND isnotnull(d_moy#93)) AND isnotnull(d_dom#94)) AND (d_year#92 = 1998)) AND (d_moy#93 = 12)) AND (d_dom#94 = 16)) + +(101) CometProject +Input [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Arguments: [d_week_seq#91], [d_week_seq#91] + +(102) ColumnarToRow [codegen id : 1] +Input [1]: [d_week_seq#91] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_datafusion/simplified.txt new file mode 100644 index 0000000000..d5143df8c8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_datafusion/simplified.txt @@ -0,0 +1,114 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + CometBroadcastHashJoin [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Subquery #2 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [average_sales,sum,count,avg((cast(quantity as decimal(10,0)) * list_price))] + CometExchange #11 + CometHashAggregate [sum,count,quantity,list_price] + CometUnion [quantity,list_price] + CometProject [ss_quantity,ss_list_price] [quantity,list_price] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometProject [cs_quantity,cs_list_price] [quantity,list_price] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_quantity,cs_list_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometProject [ws_quantity,ws_list_price] [quantity,list_price] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #1 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometBroadcastExchange [ss_item_sk] #2 + CometProject [i_item_sk] [ss_item_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [brand_id,class_id,category_id] #3 + CometBroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + CometHashAggregate [brand_id,class_id,category_id] + CometExchange [brand_id,class_id,category_id] #4 + CometHashAggregate [brand_id,class_id,category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [brand_id,class_id,category_id] + CometBroadcastHashJoin [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #5 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #6 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [d_date_sk] #8 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + ReusedExchange [d_date_sk] #8 + ReusedExchange [i_brand_id,i_class_id,i_category_id] #6 + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,ss_item_sk] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + ReusedExchange [ss_item_sk] #2 + CometBroadcastExchange [d_date_sk] #10 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year,d_moy,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_week_seq,d_year,d_moy,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq] + CometBroadcastExchange [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] #12 + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #13 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [ss_item_sk] #2 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + CometBroadcastExchange [d_date_sk] #14 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year,d_moy,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_week_seq,d_year,d_moy,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..04787290c6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_iceberg_compat/explain.txt @@ -0,0 +1,639 @@ +== Physical Plan == +* ColumnarToRow (85) ++- CometTakeOrderedAndProject (84) + +- CometBroadcastHashJoin (83) + :- CometFilter (64) + : +- CometHashAggregate (63) + : +- CometExchange (62) + : +- CometHashAggregate (61) + : +- CometProject (60) + : +- CometBroadcastHashJoin (59) + : :- CometProject (54) + : : +- CometBroadcastHashJoin (53) + : : :- CometBroadcastHashJoin (47) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (46) + : : : +- CometProject (45) + : : : +- CometBroadcastHashJoin (44) + : : : :- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.item (3) + : : : +- CometBroadcastExchange (43) + : : : +- CometBroadcastHashJoin (42) + : : : :- CometHashAggregate (32) + : : : : +- CometExchange (31) + : : : : +- CometHashAggregate (30) + : : : : +- CometProject (29) + : : : : +- CometBroadcastHashJoin (28) + : : : : :- CometProject (26) + : : : : : +- CometBroadcastHashJoin (25) + : : : : : :- CometFilter (6) + : : : : : : +- CometScan parquet spark_catalog.default.store_sales (5) + : : : : : +- CometBroadcastExchange (24) + : : : : : +- CometBroadcastHashJoin (23) + : : : : : :- CometFilter (8) + : : : : : : +- CometScan parquet spark_catalog.default.item (7) + : : : : : +- CometBroadcastExchange (22) + : : : : : +- CometProject (21) + : : : : : +- CometBroadcastHashJoin (20) + : : : : : :- CometProject (15) + : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : :- CometFilter (10) + : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (9) + : : : : : : +- CometBroadcastExchange (13) + : : : : : : +- CometFilter (12) + : : : : : : +- CometScan parquet spark_catalog.default.item (11) + : : : : : +- CometBroadcastExchange (19) + : : : : : +- CometProject (18) + : : : : : +- CometFilter (17) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (16) + : : : : +- ReusedExchange (27) + : : : +- CometBroadcastExchange (41) + : : : +- CometProject (40) + : : : +- CometBroadcastHashJoin (39) + : : : :- CometProject (37) + : : : : +- CometBroadcastHashJoin (36) + : : : : :- CometFilter (34) + : : : : : +- CometScan parquet spark_catalog.default.web_sales (33) + : : : : +- ReusedExchange (35) + : : : +- ReusedExchange (38) + : : +- CometBroadcastExchange (52) + : : +- CometBroadcastHashJoin (51) + : : :- CometFilter (49) + : : : +- CometScan parquet spark_catalog.default.item (48) + : : +- ReusedExchange (50) + : +- CometBroadcastExchange (58) + : +- CometProject (57) + : +- CometFilter (56) + : +- CometScan parquet spark_catalog.default.date_dim (55) + +- CometBroadcastExchange (82) + +- CometFilter (81) + +- CometHashAggregate (80) + +- CometExchange (79) + +- CometHashAggregate (78) + +- CometProject (77) + +- CometBroadcastHashJoin (76) + :- CometProject (71) + : +- CometBroadcastHashJoin (70) + : :- CometBroadcastHashJoin (68) + : : :- CometFilter (66) + : : : +- CometScan parquet spark_catalog.default.store_sales (65) + : : +- ReusedExchange (67) + : +- ReusedExchange (69) + +- CometBroadcastExchange (75) + +- CometProject (74) + +- CometFilter (73) + +- CometScan parquet spark_catalog.default.date_dim (72) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(5) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) CometFilter +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(7) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(8) CometFilter +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(9) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(11) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) CometFilter +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(13) CometBroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(14) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Right output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_item_sk#15], [i_item_sk#17], Inner, BuildRight + +(15) CometProject +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20], [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] + +(16) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) CometFilter +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1998)) AND (d_year#22 <= 2000)) AND isnotnull(d_date_sk#21)) + +(18) CometProject +Input [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(19) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(20) CometBroadcastHashJoin +Left output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Right output [1]: [d_date_sk#21] +Arguments: [cs_sold_date_sk#16], [d_date_sk#21], Inner, BuildRight + +(21) CometProject +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20], [i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) CometBroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20] + +(23) CometBroadcastHashJoin +Left output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)], [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)], LeftSemi, BuildRight + +(24) CometBroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Right output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_item_sk#9], [i_item_sk#11], Inner, BuildRight + +(26) CometProject +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14], [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] + +(27) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#23] + +(28) CometBroadcastHashJoin +Left output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [1]: [d_date_sk#23] +Arguments: [ss_sold_date_sk#10], [d_date_sk#23], Inner, BuildRight + +(29) CometProject +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] +Arguments: [brand_id#24, class_id#25, category_id#26], [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] + +(30) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(31) CometExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(33) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(34) CometFilter +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(35) ReusedExchange [Reuses operator id: 13] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(36) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Right output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [ws_item_sk#27], [i_item_sk#29], Inner, BuildRight + +(37) CometProject +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32], [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] + +(38) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#33] + +(39) CometBroadcastHashJoin +Left output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Right output [1]: [d_date_sk#33] +Arguments: [ws_sold_date_sk#28], [d_date_sk#33], Inner, BuildRight + +(40) CometProject +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] +Arguments: [i_brand_id#30, i_class_id#31, i_category_id#32], [i_brand_id#30, i_class_id#31, i_category_id#32] + +(41) CometBroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [i_brand_id#30, i_class_id#31, i_category_id#32] + +(42) CometBroadcastHashJoin +Left output [3]: [brand_id#24, class_id#25, category_id#26] +Right output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)], [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)], LeftSemi, BuildRight + +(43) CometBroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [brand_id#24, class_id#25, category_id#26] + +(44) CometBroadcastHashJoin +Left output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Right output [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [i_brand_id#6, i_class_id#7, i_category_id#8], [brand_id#24, class_id#25, category_id#26], Inner, BuildRight + +(45) CometProject +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] +Arguments: [ss_item_sk#34], [i_item_sk#5 AS ss_item_sk#34] + +(46) CometBroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: [ss_item_sk#34] + +(47) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [1]: [ss_item_sk#34] +Arguments: [ss_item_sk#1], [ss_item_sk#34], LeftSemi, BuildRight + +(48) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(49) CometFilter +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : (((isnotnull(i_item_sk#35) AND isnotnull(i_brand_id#36)) AND isnotnull(i_class_id#37)) AND isnotnull(i_category_id#38)) + +(50) ReusedExchange [Reuses operator id: 46] +Output [1]: [ss_item_sk#34] + +(51) CometBroadcastHashJoin +Left output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Right output [1]: [ss_item_sk#34] +Arguments: [i_item_sk#35], [ss_item_sk#34], LeftSemi, BuildRight + +(52) CometBroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(53) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [ss_item_sk#1], [i_item_sk#35], Inner, BuildRight + +(54) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38], [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] + +(55) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#39, d_week_seq#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(56) CometFilter +Input [2]: [d_date_sk#39, d_week_seq#40] +Condition : ((isnotnull(d_week_seq#40) AND (d_week_seq#40 = Subquery scalar-subquery#41, [id=#42])) AND isnotnull(d_date_sk#39)) + +(57) CometProject +Input [2]: [d_date_sk#39, d_week_seq#40] +Arguments: [d_date_sk#39], [d_date_sk#39] + +(58) CometBroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: [d_date_sk#39] + +(59) CometBroadcastHashJoin +Left output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Right output [1]: [d_date_sk#39] +Arguments: [ss_sold_date_sk#4], [d_date_sk#39], Inner, BuildRight + +(60) CometProject +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] +Arguments: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38], [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] + +(61) CometHashAggregate +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] + +(62) CometExchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#43, isEmpty#44, count#45] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(63) CometHashAggregate +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#43, isEmpty#44, count#45] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] + +(64) CometFilter +Input [6]: [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48] +Condition : (isnotnull(sales#47) AND (cast(sales#47 as decimal(32,6)) > cast(Subquery scalar-subquery#49, [id=#50] as decimal(32,6)))) + +(65) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#51, ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#54)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(66) CometFilter +Input [4]: [ss_item_sk#51, ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54] +Condition : isnotnull(ss_item_sk#51) + +(67) ReusedExchange [Reuses operator id: 46] +Output [1]: [ss_item_sk#55] + +(68) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#51, ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54] +Right output [1]: [ss_item_sk#55] +Arguments: [ss_item_sk#51], [ss_item_sk#55], LeftSemi, BuildRight + +(69) ReusedExchange [Reuses operator id: 52] +Output [4]: [i_item_sk#56, i_brand_id#57, i_class_id#58, i_category_id#59] + +(70) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#51, ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54] +Right output [4]: [i_item_sk#56, i_brand_id#57, i_class_id#58, i_category_id#59] +Arguments: [ss_item_sk#51], [i_item_sk#56], Inner, BuildRight + +(71) CometProject +Input [8]: [ss_item_sk#51, ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54, i_item_sk#56, i_brand_id#57, i_class_id#58, i_category_id#59] +Arguments: [ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54, i_brand_id#57, i_class_id#58, i_category_id#59], [ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54, i_brand_id#57, i_class_id#58, i_category_id#59] + +(72) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#60, d_week_seq#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(73) CometFilter +Input [2]: [d_date_sk#60, d_week_seq#61] +Condition : ((isnotnull(d_week_seq#61) AND (d_week_seq#61 = Subquery scalar-subquery#62, [id=#63])) AND isnotnull(d_date_sk#60)) + +(74) CometProject +Input [2]: [d_date_sk#60, d_week_seq#61] +Arguments: [d_date_sk#60], [d_date_sk#60] + +(75) CometBroadcastExchange +Input [1]: [d_date_sk#60] +Arguments: [d_date_sk#60] + +(76) CometBroadcastHashJoin +Left output [6]: [ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54, i_brand_id#57, i_class_id#58, i_category_id#59] +Right output [1]: [d_date_sk#60] +Arguments: [ss_sold_date_sk#54], [d_date_sk#60], Inner, BuildRight + +(77) CometProject +Input [7]: [ss_quantity#52, ss_list_price#53, ss_sold_date_sk#54, i_brand_id#57, i_class_id#58, i_category_id#59, d_date_sk#60] +Arguments: [ss_quantity#52, ss_list_price#53, i_brand_id#57, i_class_id#58, i_category_id#59], [ss_quantity#52, ss_list_price#53, i_brand_id#57, i_class_id#58, i_category_id#59] + +(78) CometHashAggregate +Input [5]: [ss_quantity#52, ss_list_price#53, i_brand_id#57, i_class_id#58, i_category_id#59] +Keys [3]: [i_brand_id#57, i_class_id#58, i_category_id#59] +Functions [2]: [partial_sum((cast(ss_quantity#52 as decimal(10,0)) * ss_list_price#53)), partial_count(1)] + +(79) CometExchange +Input [6]: [i_brand_id#57, i_class_id#58, i_category_id#59, sum#64, isEmpty#65, count#66] +Arguments: hashpartitioning(i_brand_id#57, i_class_id#58, i_category_id#59, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(80) CometHashAggregate +Input [6]: [i_brand_id#57, i_class_id#58, i_category_id#59, sum#64, isEmpty#65, count#66] +Keys [3]: [i_brand_id#57, i_class_id#58, i_category_id#59] +Functions [2]: [sum((cast(ss_quantity#52 as decimal(10,0)) * ss_list_price#53)), count(1)] + +(81) CometFilter +Input [6]: [channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] +Condition : (isnotnull(sales#68) AND (cast(sales#68 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#49, [id=#50] as decimal(32,6)))) + +(82) CometBroadcastExchange +Input [6]: [channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] +Arguments: [channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] + +(83) CometBroadcastHashJoin +Left output [6]: [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48] +Right output [6]: [channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] +Arguments: [i_brand_id#36, i_class_id#37, i_category_id#38], [i_brand_id#57, i_class_id#58, i_category_id#59], Inner, BuildRight + +(84) CometTakeOrderedAndProject +Input [12]: [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48, channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_brand_id#36 ASC NULLS FIRST,i_class_id#37 ASC NULLS FIRST,i_category_id#38 ASC NULLS FIRST], output=[channel#46,i_brand_id#36,i_class_id#37,i_category_id#38,sales#47,number_sales#48,channel#67,i_brand_id#57,i_class_id#58,i_category_id#59,sales#68,number_sales#69]), [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48, channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69], 100, [i_brand_id#36 ASC NULLS FIRST, i_class_id#37 ASC NULLS FIRST, i_category_id#38 ASC NULLS FIRST], [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48, channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] + +(85) ColumnarToRow [codegen id : 1] +Input [12]: [channel#46, i_brand_id#36, i_class_id#37, i_category_id#38, sales#47, number_sales#48, channel#67, i_brand_id#57, i_class_id#58, i_category_id#59, sales#68, number_sales#69] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquery#49, [id=#50] +* ColumnarToRow (102) ++- CometHashAggregate (101) + +- CometExchange (100) + +- CometHashAggregate (99) + +- CometUnion (98) + :- CometProject (89) + : +- CometBroadcastHashJoin (88) + : :- CometScan parquet spark_catalog.default.store_sales (86) + : +- ReusedExchange (87) + :- CometProject (93) + : +- CometBroadcastHashJoin (92) + : :- CometScan parquet spark_catalog.default.catalog_sales (90) + : +- ReusedExchange (91) + +- CometProject (97) + +- CometBroadcastHashJoin (96) + :- CometScan parquet spark_catalog.default.web_sales (94) + +- ReusedExchange (95) + + +(86) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#70, ss_list_price#71, ss_sold_date_sk#72] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#72)] +ReadSchema: struct + +(87) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#73] + +(88) CometBroadcastHashJoin +Left output [3]: [ss_quantity#70, ss_list_price#71, ss_sold_date_sk#72] +Right output [1]: [d_date_sk#73] +Arguments: [ss_sold_date_sk#72], [d_date_sk#73], Inner, BuildRight + +(89) CometProject +Input [4]: [ss_quantity#70, ss_list_price#71, ss_sold_date_sk#72, d_date_sk#73] +Arguments: [quantity#74, list_price#75], [ss_quantity#70 AS quantity#74, ss_list_price#71 AS list_price#75] + +(90) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#76, cs_list_price#77, cs_sold_date_sk#78] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#78)] +ReadSchema: struct + +(91) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#79] + +(92) CometBroadcastHashJoin +Left output [3]: [cs_quantity#76, cs_list_price#77, cs_sold_date_sk#78] +Right output [1]: [d_date_sk#79] +Arguments: [cs_sold_date_sk#78], [d_date_sk#79], Inner, BuildRight + +(93) CometProject +Input [4]: [cs_quantity#76, cs_list_price#77, cs_sold_date_sk#78, d_date_sk#79] +Arguments: [quantity#80, list_price#81], [cs_quantity#76 AS quantity#80, cs_list_price#77 AS list_price#81] + +(94) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#82, ws_list_price#83, ws_sold_date_sk#84] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#84)] +ReadSchema: struct + +(95) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#85] + +(96) CometBroadcastHashJoin +Left output [3]: [ws_quantity#82, ws_list_price#83, ws_sold_date_sk#84] +Right output [1]: [d_date_sk#85] +Arguments: [ws_sold_date_sk#84], [d_date_sk#85], Inner, BuildRight + +(97) CometProject +Input [4]: [ws_quantity#82, ws_list_price#83, ws_sold_date_sk#84, d_date_sk#85] +Arguments: [quantity#86, list_price#87], [ws_quantity#82 AS quantity#86, ws_list_price#83 AS list_price#87] + +(98) CometUnion +Child 0 Input [2]: [quantity#74, list_price#75] +Child 1 Input [2]: [quantity#80, list_price#81] +Child 2 Input [2]: [quantity#86, list_price#87] + +(99) CometHashAggregate +Input [2]: [quantity#74, list_price#75] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#74 as decimal(10,0)) * list_price#75))] + +(100) CometExchange +Input [2]: [sum#88, count#89] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(101) CometHashAggregate +Input [2]: [sum#88, count#89] +Keys: [] +Functions [1]: [avg((cast(quantity#74 as decimal(10,0)) * list_price#75))] + +(102) ColumnarToRow [codegen id : 1] +Input [1]: [average_sales#90] + +Subquery:2 Hosting operator id = 56 Hosting Expression = Subquery scalar-subquery#41, [id=#42] +* ColumnarToRow (106) ++- CometProject (105) + +- CometFilter (104) + +- CometScan parquet spark_catalog.default.date_dim (103) + + +(103) CometScan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,16)] +ReadSchema: struct + +(104) CometFilter +Input [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Condition : (((((isnotnull(d_year#92) AND isnotnull(d_moy#93)) AND isnotnull(d_dom#94)) AND (d_year#92 = 1999)) AND (d_moy#93 = 12)) AND (d_dom#94 = 16)) + +(105) CometProject +Input [4]: [d_week_seq#91, d_year#92, d_moy#93, d_dom#94] +Arguments: [d_week_seq#91], [d_week_seq#91] + +(106) ColumnarToRow [codegen id : 1] +Input [1]: [d_week_seq#91] + +Subquery:3 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#49, [id=#50] + +Subquery:4 Hosting operator id = 73 Hosting Expression = Subquery scalar-subquery#62, [id=#63] +* ColumnarToRow (110) ++- CometProject (109) + +- CometFilter (108) + +- CometScan parquet spark_catalog.default.date_dim (107) + + +(107) CometScan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#95, d_year#96, d_moy#97, d_dom#98] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1998), EqualTo(d_moy,12), EqualTo(d_dom,16)] +ReadSchema: struct + +(108) CometFilter +Input [4]: [d_week_seq#95, d_year#96, d_moy#97, d_dom#98] +Condition : (((((isnotnull(d_year#96) AND isnotnull(d_moy#97)) AND isnotnull(d_dom#98)) AND (d_year#96 = 1998)) AND (d_moy#97 = 12)) AND (d_dom#98 = 16)) + +(109) CometProject +Input [4]: [d_week_seq#95, d_year#96, d_moy#97, d_dom#98] +Arguments: [d_week_seq#95], [d_week_seq#95] + +(110) ColumnarToRow [codegen id : 1] +Input [1]: [d_week_seq#95] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8511334b1a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14.native_iceberg_compat/simplified.txt @@ -0,0 +1,122 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + CometBroadcastHashJoin [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Subquery #2 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [average_sales,sum,count,avg((cast(quantity as decimal(10,0)) * list_price))] + CometExchange #12 + CometHashAggregate [sum,count,quantity,list_price] + CometUnion [quantity,list_price] + CometProject [ss_quantity,ss_list_price] [quantity,list_price] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometProject [cs_quantity,cs_list_price] [quantity,list_price] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometProject [ws_quantity,ws_list_price] [quantity,list_price] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #8 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #1 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometBroadcastExchange [ss_item_sk] #2 + CometProject [i_item_sk] [ss_item_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [brand_id,class_id,category_id] #3 + CometBroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + CometHashAggregate [brand_id,class_id,category_id] + CometExchange [brand_id,class_id,category_id] #4 + CometHashAggregate [brand_id,class_id,category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [brand_id,class_id,category_id] + CometBroadcastHashJoin [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #5 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #6 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [d_date_sk] #8 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + ReusedExchange [d_date_sk] #8 + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #9 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_item_sk,ws_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ws_item_sk,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + ReusedExchange [d_date_sk] #8 + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,ss_item_sk] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + ReusedExchange [ss_item_sk] #2 + CometBroadcastExchange [d_date_sk] #11 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year,d_moy,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + CometBroadcastExchange [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] #13 + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #14 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [ss_item_sk] #2 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + CometBroadcastExchange [d_date_sk] #15 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [d_week_seq] + CometFilter [d_week_seq,d_year,d_moy,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_datafusion/explain.txt new file mode 100644 index 0000000000..a0aa70424a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_datafusion/explain.txt @@ -0,0 +1,603 @@ +== Physical Plan == +* ColumnarToRow (92) ++- CometTakeOrderedAndProject (91) + +- CometHashAggregate (90) + +- CometExchange (89) + +- CometHashAggregate (88) + +- CometUnion (87) + :- CometHashAggregate (66) + : +- CometExchange (65) + : +- CometHashAggregate (64) + : +- CometUnion (63) + : :- CometFilter (56) + : : +- CometHashAggregate (55) + : : +- CometExchange (54) + : : +- CometHashAggregate (53) + : : +- CometProject (52) + : : +- CometBroadcastHashJoin (51) + : : :- CometProject (46) + : : : +- CometBroadcastHashJoin (45) + : : : :- CometBroadcastHashJoin (39) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometBroadcastExchange (38) + : : : : +- CometProject (37) + : : : : +- CometBroadcastHashJoin (36) + : : : : :- CometFilter (4) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : : : +- CometBroadcastExchange (35) + : : : : +- CometBroadcastHashJoin (34) + : : : : :- CometHashAggregate (32) + : : : : : +- CometExchange (31) + : : : : : +- CometHashAggregate (30) + : : : : : +- CometProject (29) + : : : : : +- CometBroadcastHashJoin (28) + : : : : : :- CometProject (26) + : : : : : : +- CometBroadcastHashJoin (25) + : : : : : : :- CometFilter (6) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (5) + : : : : : : +- CometBroadcastExchange (24) + : : : : : : +- CometBroadcastHashJoin (23) + : : : : : : :- CometFilter (8) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (7) + : : : : : : +- CometBroadcastExchange (22) + : : : : : : +- CometProject (21) + : : : : : : +- CometBroadcastHashJoin (20) + : : : : : : :- CometProject (15) + : : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : : :- CometFilter (10) + : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (9) + : : : : : : : +- CometBroadcastExchange (13) + : : : : : : : +- CometFilter (12) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (11) + : : : : : : +- CometBroadcastExchange (19) + : : : : : : +- CometProject (18) + : : : : : : +- CometFilter (17) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (16) + : : : : : +- ReusedExchange (27) + : : : : +- ReusedExchange (33) + : : : +- CometBroadcastExchange (44) + : : : +- CometBroadcastHashJoin (43) + : : : :- CometFilter (41) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (40) + : : : +- ReusedExchange (42) + : : +- CometBroadcastExchange (50) + : : +- CometProject (49) + : : +- CometFilter (48) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (47) + : :- CometFilter (59) + : : +- CometHashAggregate (58) + : : +- ReusedExchange (57) + : +- CometFilter (62) + : +- CometHashAggregate (61) + : +- ReusedExchange (60) + :- CometHashAggregate (71) + : +- CometExchange (70) + : +- CometHashAggregate (69) + : +- CometHashAggregate (68) + : +- ReusedExchange (67) + :- CometHashAggregate (76) + : +- CometExchange (75) + : +- CometHashAggregate (74) + : +- CometHashAggregate (73) + : +- ReusedExchange (72) + :- CometHashAggregate (81) + : +- CometExchange (80) + : +- CometHashAggregate (79) + : +- CometHashAggregate (78) + : +- ReusedExchange (77) + +- CometHashAggregate (86) + +- CometExchange (85) + +- CometHashAggregate (84) + +- CometHashAggregate (83) + +- ReusedExchange (82) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Arguments: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(4) CometFilter +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(5) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Arguments: [ss_item_sk#9, ss_sold_date_sk#10] + +(6) CometFilter +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(7) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(8) CometFilter +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(9) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Arguments: [cs_item_sk#15, cs_sold_date_sk#16] + +(10) CometFilter +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(11) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(12) CometFilter +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(13) CometBroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(14) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Right output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_item_sk#15], [i_item_sk#17], Inner, BuildRight + +(15) CometProject +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20], [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] + +(16) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21, d_year#22] + +(17) CometFilter +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(18) CometProject +Input [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(19) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(20) CometBroadcastHashJoin +Left output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Right output [1]: [d_date_sk#21] +Arguments: [cs_sold_date_sk#16], [d_date_sk#21], Inner, BuildRight + +(21) CometProject +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20], [i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) CometBroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20] + +(23) CometBroadcastHashJoin +Left output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)], [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)], LeftSemi, BuildRight + +(24) CometBroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Right output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_item_sk#9], [i_item_sk#11], Inner, BuildRight + +(26) CometProject +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14], [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] + +(27) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#23] + +(28) CometBroadcastHashJoin +Left output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [1]: [d_date_sk#23] +Arguments: [ss_sold_date_sk#10], [d_date_sk#23], Inner, BuildRight + +(29) CometProject +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] +Arguments: [brand_id#24, class_id#25, category_id#26], [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] + +(30) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(31) CometExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(33) ReusedExchange [Reuses operator id: 22] +Output [3]: [i_brand_id#27, i_class_id#28, i_category_id#29] + +(34) CometBroadcastHashJoin +Left output [3]: [brand_id#24, class_id#25, category_id#26] +Right output [3]: [i_brand_id#27, i_class_id#28, i_category_id#29] +Arguments: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)], [coalesce(i_brand_id#27, 0), isnull(i_brand_id#27), coalesce(i_class_id#28, 0), isnull(i_class_id#28), coalesce(i_category_id#29, 0), isnull(i_category_id#29)], LeftSemi, BuildRight + +(35) CometBroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [brand_id#24, class_id#25, category_id#26] + +(36) CometBroadcastHashJoin +Left output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Right output [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [i_brand_id#6, i_class_id#7, i_category_id#8], [brand_id#24, class_id#25, category_id#26], Inner, BuildRight + +(37) CometProject +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] +Arguments: [ss_item_sk#30], [i_item_sk#5 AS ss_item_sk#30] + +(38) CometBroadcastExchange +Input [1]: [ss_item_sk#30] +Arguments: [ss_item_sk#30] + +(39) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [1]: [ss_item_sk#30] +Arguments: [ss_item_sk#1], [ss_item_sk#30], LeftSemi, BuildRight + +(40) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] + +(41) CometFilter +Input [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Condition : isnotnull(i_item_sk#31) + +(42) ReusedExchange [Reuses operator id: 38] +Output [1]: [ss_item_sk#30] + +(43) CometBroadcastHashJoin +Left output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Right output [1]: [ss_item_sk#30] +Arguments: [i_item_sk#31], [ss_item_sk#30], LeftSemi, BuildRight + +(44) CometBroadcastExchange +Input [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] + +(45) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [ss_item_sk#1], [i_item_sk#31], Inner, BuildRight + +(46) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34], [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34] + +(47) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#35, d_year#36, d_moy#37] +Arguments: [d_date_sk#35, d_year#36, d_moy#37] + +(48) CometFilter +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] +Condition : ((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2000)) AND (d_moy#37 = 11)) AND isnotnull(d_date_sk#35)) + +(49) CometProject +Input [3]: [d_date_sk#35, d_year#36, d_moy#37] +Arguments: [d_date_sk#35], [d_date_sk#35] + +(50) CometBroadcastExchange +Input [1]: [d_date_sk#35] +Arguments: [d_date_sk#35] + +(51) CometBroadcastHashJoin +Left output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34] +Right output [1]: [d_date_sk#35] +Arguments: [ss_sold_date_sk#4], [d_date_sk#35], Inner, BuildRight + +(52) CometProject +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#32, i_class_id#33, i_category_id#34, d_date_sk#35] +Arguments: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34], [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] + +(53) CometHashAggregate +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#32, i_class_id#33, i_category_id#34] +Keys [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] + +(54) CometExchange +Input [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#38, isEmpty#39, count#40] +Arguments: hashpartitioning(i_brand_id#32, i_class_id#33, i_category_id#34, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(55) CometHashAggregate +Input [6]: [i_brand_id#32, i_class_id#33, i_category_id#34, sum#38, isEmpty#39, count#40] +Keys [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] + +(56) CometFilter +Input [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sales#42, number_sales#43] +Condition : (isnotnull(sales#42) AND (cast(sales#42 as decimal(32,6)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) + +(57) ReusedExchange [Reuses operator id: 54] +Output [6]: [i_brand_id#46, i_class_id#47, i_category_id#48, sum#49, isEmpty#50, count#51] + +(58) CometHashAggregate +Input [6]: [i_brand_id#46, i_class_id#47, i_category_id#48, sum#49, isEmpty#50, count#51] +Keys [3]: [i_brand_id#46, i_class_id#47, i_category_id#48] +Functions [2]: [sum((cast(cs_quantity#52 as decimal(10,0)) * cs_list_price#53)), count(1)] + +(59) CometFilter +Input [6]: [channel#54, i_brand_id#46, i_class_id#47, i_category_id#48, sales#55, number_sales#56] +Condition : (isnotnull(sales#55) AND (cast(sales#55 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) + +(60) ReusedExchange [Reuses operator id: 54] +Output [6]: [i_brand_id#57, i_class_id#58, i_category_id#59, sum#60, isEmpty#61, count#62] + +(61) CometHashAggregate +Input [6]: [i_brand_id#57, i_class_id#58, i_category_id#59, sum#60, isEmpty#61, count#62] +Keys [3]: [i_brand_id#57, i_class_id#58, i_category_id#59] +Functions [2]: [sum((cast(ws_quantity#63 as decimal(10,0)) * ws_list_price#64)), count(1)] + +(62) CometFilter +Input [6]: [channel#65, i_brand_id#57, i_class_id#58, i_category_id#59, sales#66, number_sales#67] +Condition : (isnotnull(sales#66) AND (cast(sales#66 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) + +(63) CometUnion +Child 0 Input [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sales#42, number_sales#43] +Child 1 Input [6]: [channel#54, i_brand_id#46, i_class_id#47, i_category_id#48, sales#55, number_sales#56] +Child 2 Input [6]: [channel#65, i_brand_id#57, i_class_id#58, i_category_id#59, sales#66, number_sales#67] + +(64) CometHashAggregate +Input [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sales#42, number_sales#43] +Keys [4]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)] + +(65) CometExchange +Input [7]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum#68, isEmpty#69, sum#70] +Arguments: hashpartitioning(channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(66) CometHashAggregate +Input [7]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum#68, isEmpty#69, sum#70] +Keys [4]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34] +Functions [2]: [sum(sales#42), sum(number_sales#43)] + +(67) ReusedExchange [Reuses operator id: 65] +Output [7]: [channel#41, i_brand_id#71, i_class_id#72, i_category_id#73, sum#68, isEmpty#69, sum#70] + +(68) CometHashAggregate +Input [7]: [channel#41, i_brand_id#71, i_class_id#72, i_category_id#73, sum#68, isEmpty#69, sum#70] +Keys [4]: [channel#41, i_brand_id#71, i_class_id#72, i_category_id#73] +Functions [2]: [sum(sales#42), sum(number_sales#43)] + +(69) CometHashAggregate +Input [5]: [channel#41, i_brand_id#71, i_class_id#72, sum_sales#74, number_sales#75] +Keys [3]: [channel#41, i_brand_id#71, i_class_id#72] +Functions [2]: [partial_sum(sum_sales#74), partial_sum(number_sales#75)] + +(70) CometExchange +Input [6]: [channel#41, i_brand_id#71, i_class_id#72, sum#76, isEmpty#77, sum#78] +Arguments: hashpartitioning(channel#41, i_brand_id#71, i_class_id#72, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(71) CometHashAggregate +Input [6]: [channel#41, i_brand_id#71, i_class_id#72, sum#76, isEmpty#77, sum#78] +Keys [3]: [channel#41, i_brand_id#71, i_class_id#72] +Functions [2]: [sum(sum_sales#74), sum(number_sales#75)] + +(72) ReusedExchange [Reuses operator id: 65] +Output [7]: [channel#41, i_brand_id#79, i_class_id#80, i_category_id#81, sum#68, isEmpty#69, sum#70] + +(73) CometHashAggregate +Input [7]: [channel#41, i_brand_id#79, i_class_id#80, i_category_id#81, sum#68, isEmpty#69, sum#70] +Keys [4]: [channel#41, i_brand_id#79, i_class_id#80, i_category_id#81] +Functions [2]: [sum(sales#42), sum(number_sales#43)] + +(74) CometHashAggregate +Input [4]: [channel#41, i_brand_id#79, sum_sales#82, number_sales#83] +Keys [2]: [channel#41, i_brand_id#79] +Functions [2]: [partial_sum(sum_sales#82), partial_sum(number_sales#83)] + +(75) CometExchange +Input [5]: [channel#41, i_brand_id#79, sum#84, isEmpty#85, sum#86] +Arguments: hashpartitioning(channel#41, i_brand_id#79, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(76) CometHashAggregate +Input [5]: [channel#41, i_brand_id#79, sum#84, isEmpty#85, sum#86] +Keys [2]: [channel#41, i_brand_id#79] +Functions [2]: [sum(sum_sales#82), sum(number_sales#83)] + +(77) ReusedExchange [Reuses operator id: 65] +Output [7]: [channel#41, i_brand_id#87, i_class_id#88, i_category_id#89, sum#68, isEmpty#69, sum#70] + +(78) CometHashAggregate +Input [7]: [channel#41, i_brand_id#87, i_class_id#88, i_category_id#89, sum#68, isEmpty#69, sum#70] +Keys [4]: [channel#41, i_brand_id#87, i_class_id#88, i_category_id#89] +Functions [2]: [sum(sales#42), sum(number_sales#43)] + +(79) CometHashAggregate +Input [3]: [channel#41, sum_sales#90, number_sales#91] +Keys [1]: [channel#41] +Functions [2]: [partial_sum(sum_sales#90), partial_sum(number_sales#91)] + +(80) CometExchange +Input [4]: [channel#41, sum#92, isEmpty#93, sum#94] +Arguments: hashpartitioning(channel#41, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(81) CometHashAggregate +Input [4]: [channel#41, sum#92, isEmpty#93, sum#94] +Keys [1]: [channel#41] +Functions [2]: [sum(sum_sales#90), sum(number_sales#91)] + +(82) ReusedExchange [Reuses operator id: 65] +Output [7]: [channel#41, i_brand_id#95, i_class_id#96, i_category_id#97, sum#68, isEmpty#69, sum#70] + +(83) CometHashAggregate +Input [7]: [channel#41, i_brand_id#95, i_class_id#96, i_category_id#97, sum#68, isEmpty#69, sum#70] +Keys [4]: [channel#41, i_brand_id#95, i_class_id#96, i_category_id#97] +Functions [2]: [sum(sales#42), sum(number_sales#43)] + +(84) CometHashAggregate +Input [2]: [sum_sales#98, number_sales#99] +Keys: [] +Functions [2]: [partial_sum(sum_sales#98), partial_sum(number_sales#99)] + +(85) CometExchange +Input [3]: [sum#100, isEmpty#101, sum#102] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(86) CometHashAggregate +Input [3]: [sum#100, isEmpty#101, sum#102] +Keys: [] +Functions [2]: [sum(sum_sales#98), sum(number_sales#99)] + +(87) CometUnion +Child 0 Input [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104] +Child 1 Input [6]: [channel#41, i_brand_id#71, i_class_id#72, i_category_id#105, sum(sum_sales)#106, sum(number_sales)#107] +Child 2 Input [6]: [channel#41, i_brand_id#79, i_class_id#108, i_category_id#109, sum(sum_sales)#110, sum(number_sales)#111] +Child 3 Input [6]: [channel#41, i_brand_id#112, i_class_id#113, i_category_id#114, sum(sum_sales)#115, sum(number_sales)#116] +Child 4 Input [6]: [channel#117, i_brand_id#118, i_class_id#119, i_category_id#120, sum(sum_sales)#121, sum(number_sales)#122] + +(88) CometHashAggregate +Input [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104] +Keys [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104] +Functions: [] + +(89) CometExchange +Input [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104] +Arguments: hashpartitioning(channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(90) CometHashAggregate +Input [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104] +Keys [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104] +Functions: [] + +(91) CometTakeOrderedAndProject +Input [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[channel#41 ASC NULLS FIRST,i_brand_id#32 ASC NULLS FIRST,i_class_id#33 ASC NULLS FIRST,i_category_id#34 ASC NULLS FIRST], output=[channel#41,i_brand_id#32,i_class_id#33,i_category_id#34,sum_sales#103,number_sales#104]), [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104], 100, [channel#41 ASC NULLS FIRST, i_brand_id#32 ASC NULLS FIRST, i_class_id#33 ASC NULLS FIRST, i_category_id#34 ASC NULLS FIRST], [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104] + +(92) ColumnarToRow [codegen id : 1] +Input [6]: [channel#41, i_brand_id#32, i_class_id#33, i_category_id#34, sum_sales#103, number_sales#104] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 56 Hosting Expression = Subquery scalar-subquery#44, [id=#45] +* ColumnarToRow (112) ++- CometHashAggregate (111) + +- CometExchange (110) + +- CometHashAggregate (109) + +- CometUnion (108) + :- CometProject (96) + : +- CometBroadcastHashJoin (95) + : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (93) + : +- ReusedExchange (94) + :- CometProject (103) + : +- CometBroadcastHashJoin (102) + : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (97) + : +- CometBroadcastExchange (101) + : +- CometProject (100) + : +- CometFilter (99) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (98) + +- CometProject (107) + +- CometBroadcastHashJoin (106) + :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (104) + +- ReusedExchange (105) + + +(93) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_quantity#123, ss_list_price#124, ss_sold_date_sk#125] +Arguments: [ss_quantity#123, ss_list_price#124, ss_sold_date_sk#125] + +(94) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#126] + +(95) CometBroadcastHashJoin +Left output [3]: [ss_quantity#123, ss_list_price#124, ss_sold_date_sk#125] +Right output [1]: [d_date_sk#126] +Arguments: [ss_sold_date_sk#125], [d_date_sk#126], Inner, BuildRight + +(96) CometProject +Input [4]: [ss_quantity#123, ss_list_price#124, ss_sold_date_sk#125, d_date_sk#126] +Arguments: [quantity#127, list_price#128], [ss_quantity#123 AS quantity#127, ss_list_price#124 AS list_price#128] + +(97) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_quantity#129, cs_list_price#130, cs_sold_date_sk#131] +Arguments: [cs_quantity#129, cs_list_price#130, cs_sold_date_sk#131] + +(98) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#132, d_year#133] +Arguments: [d_date_sk#132, d_year#133] + +(99) CometFilter +Input [2]: [d_date_sk#132, d_year#133] +Condition : (((isnotnull(d_year#133) AND (d_year#133 >= 1998)) AND (d_year#133 <= 2000)) AND isnotnull(d_date_sk#132)) + +(100) CometProject +Input [2]: [d_date_sk#132, d_year#133] +Arguments: [d_date_sk#132], [d_date_sk#132] + +(101) CometBroadcastExchange +Input [1]: [d_date_sk#132] +Arguments: [d_date_sk#132] + +(102) CometBroadcastHashJoin +Left output [3]: [cs_quantity#129, cs_list_price#130, cs_sold_date_sk#131] +Right output [1]: [d_date_sk#132] +Arguments: [cs_sold_date_sk#131], [d_date_sk#132], Inner, BuildRight + +(103) CometProject +Input [4]: [cs_quantity#129, cs_list_price#130, cs_sold_date_sk#131, d_date_sk#132] +Arguments: [quantity#134, list_price#135], [cs_quantity#129 AS quantity#134, cs_list_price#130 AS list_price#135] + +(104) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_quantity#136, ws_list_price#137, ws_sold_date_sk#138] +Arguments: [ws_quantity#136, ws_list_price#137, ws_sold_date_sk#138] + +(105) ReusedExchange [Reuses operator id: 101] +Output [1]: [d_date_sk#139] + +(106) CometBroadcastHashJoin +Left output [3]: [ws_quantity#136, ws_list_price#137, ws_sold_date_sk#138] +Right output [1]: [d_date_sk#139] +Arguments: [ws_sold_date_sk#138], [d_date_sk#139], Inner, BuildRight + +(107) CometProject +Input [4]: [ws_quantity#136, ws_list_price#137, ws_sold_date_sk#138, d_date_sk#139] +Arguments: [quantity#140, list_price#141], [ws_quantity#136 AS quantity#140, ws_list_price#137 AS list_price#141] + +(108) CometUnion +Child 0 Input [2]: [quantity#127, list_price#128] +Child 1 Input [2]: [quantity#134, list_price#135] +Child 2 Input [2]: [quantity#140, list_price#141] + +(109) CometHashAggregate +Input [2]: [quantity#127, list_price#128] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#127 as decimal(10,0)) * list_price#128))] + +(110) CometExchange +Input [2]: [sum#142, count#143] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(111) CometHashAggregate +Input [2]: [sum#142, count#143] +Keys: [] +Functions [1]: [avg((cast(quantity#127 as decimal(10,0)) * list_price#128))] + +(112) ColumnarToRow [codegen id : 1] +Input [1]: [average_sales#144] + +Subquery:2 Hosting operator id = 59 Hosting Expression = ReusedSubquery Subquery scalar-subquery#44, [id=#45] + +Subquery:3 Hosting operator id = 62 Hosting Expression = ReusedSubquery Subquery scalar-subquery#44, [id=#45] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9119b51ec8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_datafusion/simplified.txt @@ -0,0 +1,119 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + CometExchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #1 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + CometUnion [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + CometExchange [channel,i_brand_id,i_class_id,i_category_id] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum,sales,number_sales] + CometUnion [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [average_sales,sum,count,avg((cast(quantity as decimal(10,0)) * list_price))] + CometExchange #13 + CometHashAggregate [sum,count,quantity,list_price] + CometUnion [quantity,list_price] + CometProject [ss_quantity,ss_list_price] [quantity,list_price] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #10 + CometProject [cs_quantity,cs_list_price] [quantity,list_price] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #14 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometProject [ws_quantity,ws_list_price] [quantity,list_price] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #14 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #3 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometBroadcastExchange [ss_item_sk] #4 + CometProject [i_item_sk] [ss_item_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [brand_id,class_id,category_id] #5 + CometBroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + CometHashAggregate [brand_id,class_id,category_id] + CometExchange [brand_id,class_id,category_id] #6 + CometHashAggregate [brand_id,class_id,category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [brand_id,class_id,category_id] + CometBroadcastHashJoin [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #8 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [d_date_sk] #10 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + ReusedExchange [d_date_sk] #10 + ReusedExchange [i_brand_id,i_class_id,i_category_id] #8 + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,ss_item_sk] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id] + ReusedExchange [ss_item_sk] #4 + CometBroadcastExchange [d_date_sk] #12 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #1 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1)] + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3 + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #1 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1)] + ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum,sum(sum_sales),sum(number_salesL)] + CometExchange [channel,i_brand_id,i_class_id] #15 + CometHashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum,sum_sales,number_sales] + CometHashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales,i_category_id,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum,sum(sum_sales),sum(number_salesL)] + CometExchange [channel,i_brand_id] #16 + CometHashAggregate [channel,i_brand_id,sum,isEmpty,sum,sum_sales,number_sales] + CometHashAggregate [channel,i_brand_id,sum_sales,number_sales,i_class_id,i_category_id,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum,sum(sum_sales),sum(number_salesL)] + CometExchange [channel] #17 + CometHashAggregate [channel,sum,isEmpty,sum,sum_sales,number_sales] + CometHashAggregate [channel,sum_sales,number_sales,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum,sum(sum_sales),sum(number_salesL)] + CometExchange #18 + CometHashAggregate [sum,isEmpty,sum,sum_sales,number_sales] + CometHashAggregate [sum_sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..e5804c5048 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_iceberg_compat/explain.txt @@ -0,0 +1,812 @@ +== Physical Plan == +* ColumnarToRow (122) ++- CometTakeOrderedAndProject (121) + +- CometHashAggregate (120) + +- CometExchange (119) + +- CometHashAggregate (118) + +- CometUnion (117) + :- CometHashAggregate (96) + : +- CometExchange (95) + : +- CometHashAggregate (94) + : +- CometUnion (93) + : :- CometFilter (64) + : : +- CometHashAggregate (63) + : : +- CometExchange (62) + : : +- CometHashAggregate (61) + : : +- CometProject (60) + : : +- CometBroadcastHashJoin (59) + : : :- CometProject (54) + : : : +- CometBroadcastHashJoin (53) + : : : :- CometBroadcastHashJoin (47) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : +- CometBroadcastExchange (46) + : : : : +- CometProject (45) + : : : : +- CometBroadcastHashJoin (44) + : : : : :- CometFilter (4) + : : : : : +- CometScan parquet spark_catalog.default.item (3) + : : : : +- CometBroadcastExchange (43) + : : : : +- CometBroadcastHashJoin (42) + : : : : :- CometHashAggregate (32) + : : : : : +- CometExchange (31) + : : : : : +- CometHashAggregate (30) + : : : : : +- CometProject (29) + : : : : : +- CometBroadcastHashJoin (28) + : : : : : :- CometProject (26) + : : : : : : +- CometBroadcastHashJoin (25) + : : : : : : :- CometFilter (6) + : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (5) + : : : : : : +- CometBroadcastExchange (24) + : : : : : : +- CometBroadcastHashJoin (23) + : : : : : : :- CometFilter (8) + : : : : : : : +- CometScan parquet spark_catalog.default.item (7) + : : : : : : +- CometBroadcastExchange (22) + : : : : : : +- CometProject (21) + : : : : : : +- CometBroadcastHashJoin (20) + : : : : : : :- CometProject (15) + : : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : : :- CometFilter (10) + : : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (9) + : : : : : : : +- CometBroadcastExchange (13) + : : : : : : : +- CometFilter (12) + : : : : : : : +- CometScan parquet spark_catalog.default.item (11) + : : : : : : +- CometBroadcastExchange (19) + : : : : : : +- CometProject (18) + : : : : : : +- CometFilter (17) + : : : : : : +- CometScan parquet spark_catalog.default.date_dim (16) + : : : : : +- ReusedExchange (27) + : : : : +- CometBroadcastExchange (41) + : : : : +- CometProject (40) + : : : : +- CometBroadcastHashJoin (39) + : : : : :- CometProject (37) + : : : : : +- CometBroadcastHashJoin (36) + : : : : : :- CometFilter (34) + : : : : : : +- CometScan parquet spark_catalog.default.web_sales (33) + : : : : : +- ReusedExchange (35) + : : : : +- ReusedExchange (38) + : : : +- CometBroadcastExchange (52) + : : : +- CometBroadcastHashJoin (51) + : : : :- CometFilter (49) + : : : : +- CometScan parquet spark_catalog.default.item (48) + : : : +- ReusedExchange (50) + : : +- CometBroadcastExchange (58) + : : +- CometProject (57) + : : +- CometFilter (56) + : : +- CometScan parquet spark_catalog.default.date_dim (55) + : :- CometFilter (78) + : : +- CometHashAggregate (77) + : : +- CometExchange (76) + : : +- CometHashAggregate (75) + : : +- CometProject (74) + : : +- CometBroadcastHashJoin (73) + : : :- CometProject (71) + : : : +- CometBroadcastHashJoin (70) + : : : :- CometBroadcastHashJoin (68) + : : : : :- CometFilter (66) + : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (65) + : : : : +- ReusedExchange (67) + : : : +- ReusedExchange (69) + : : +- ReusedExchange (72) + : +- CometFilter (92) + : +- CometHashAggregate (91) + : +- CometExchange (90) + : +- CometHashAggregate (89) + : +- CometProject (88) + : +- CometBroadcastHashJoin (87) + : :- CometProject (85) + : : +- CometBroadcastHashJoin (84) + : : :- CometBroadcastHashJoin (82) + : : : :- CometFilter (80) + : : : : +- CometScan parquet spark_catalog.default.web_sales (79) + : : : +- ReusedExchange (81) + : : +- ReusedExchange (83) + : +- ReusedExchange (86) + :- CometHashAggregate (101) + : +- CometExchange (100) + : +- CometHashAggregate (99) + : +- CometHashAggregate (98) + : +- ReusedExchange (97) + :- CometHashAggregate (106) + : +- CometExchange (105) + : +- CometHashAggregate (104) + : +- CometHashAggregate (103) + : +- ReusedExchange (102) + :- CometHashAggregate (111) + : +- CometExchange (110) + : +- CometHashAggregate (109) + : +- CometHashAggregate (108) + : +- ReusedExchange (107) + +- CometHashAggregate (116) + +- CometExchange (115) + +- CometHashAggregate (114) + +- CometHashAggregate (113) + +- ReusedExchange (112) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(5) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) CometFilter +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(7) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(8) CometFilter +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(9) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(11) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) CometFilter +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(13) CometBroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(14) CometBroadcastHashJoin +Left output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Right output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_item_sk#15], [i_item_sk#17], Inner, BuildRight + +(15) CometProject +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20], [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] + +(16) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) CometFilter +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(18) CometProject +Input [2]: [d_date_sk#21, d_year#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(19) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(20) CometBroadcastHashJoin +Left output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Right output [1]: [d_date_sk#21] +Arguments: [cs_sold_date_sk#16], [d_date_sk#21], Inner, BuildRight + +(21) CometProject +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20], [i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) CometBroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [i_brand_id#18, i_class_id#19, i_category_id#20] + +(23) CometBroadcastHashJoin +Left output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)], [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)], LeftSemi, BuildRight + +(24) CometBroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(25) CometBroadcastHashJoin +Left output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Right output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_item_sk#9], [i_item_sk#11], Inner, BuildRight + +(26) CometProject +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14], [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] + +(27) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#23] + +(28) CometBroadcastHashJoin +Left output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Right output [1]: [d_date_sk#23] +Arguments: [ss_sold_date_sk#10], [d_date_sk#23], Inner, BuildRight + +(29) CometProject +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] +Arguments: [brand_id#24, class_id#25, category_id#26], [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] + +(30) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(31) CometExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(32) CometHashAggregate +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] + +(33) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(34) CometFilter +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(35) ReusedExchange [Reuses operator id: 13] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(36) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Right output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [ws_item_sk#27], [i_item_sk#29], Inner, BuildRight + +(37) CometProject +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32], [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] + +(38) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#33] + +(39) CometBroadcastHashJoin +Left output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Right output [1]: [d_date_sk#33] +Arguments: [ws_sold_date_sk#28], [d_date_sk#33], Inner, BuildRight + +(40) CometProject +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] +Arguments: [i_brand_id#30, i_class_id#31, i_category_id#32], [i_brand_id#30, i_class_id#31, i_category_id#32] + +(41) CometBroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [i_brand_id#30, i_class_id#31, i_category_id#32] + +(42) CometBroadcastHashJoin +Left output [3]: [brand_id#24, class_id#25, category_id#26] +Right output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)], [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)], LeftSemi, BuildRight + +(43) CometBroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [brand_id#24, class_id#25, category_id#26] + +(44) CometBroadcastHashJoin +Left output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Right output [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: [i_brand_id#6, i_class_id#7, i_category_id#8], [brand_id#24, class_id#25, category_id#26], Inner, BuildRight + +(45) CometProject +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] +Arguments: [ss_item_sk#34], [i_item_sk#5 AS ss_item_sk#34] + +(46) CometBroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: [ss_item_sk#34] + +(47) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [1]: [ss_item_sk#34] +Arguments: [ss_item_sk#1], [ss_item_sk#34], LeftSemi, BuildRight + +(48) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(49) CometFilter +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : isnotnull(i_item_sk#35) + +(50) ReusedExchange [Reuses operator id: 46] +Output [1]: [ss_item_sk#34] + +(51) CometBroadcastHashJoin +Left output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Right output [1]: [ss_item_sk#34] +Arguments: [i_item_sk#35], [ss_item_sk#34], LeftSemi, BuildRight + +(52) CometBroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(53) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Right output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [ss_item_sk#1], [i_item_sk#35], Inner, BuildRight + +(54) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38], [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] + +(55) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#39, d_year#40, d_moy#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(56) CometFilter +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] +Condition : ((((isnotnull(d_year#40) AND isnotnull(d_moy#41)) AND (d_year#40 = 2000)) AND (d_moy#41 = 11)) AND isnotnull(d_date_sk#39)) + +(57) CometProject +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] +Arguments: [d_date_sk#39], [d_date_sk#39] + +(58) CometBroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: [d_date_sk#39] + +(59) CometBroadcastHashJoin +Left output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Right output [1]: [d_date_sk#39] +Arguments: [ss_sold_date_sk#4], [d_date_sk#39], Inner, BuildRight + +(60) CometProject +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] +Arguments: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38], [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] + +(61) CometHashAggregate +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] + +(62) CometExchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#42, isEmpty#43, count#44] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(63) CometHashAggregate +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#42, isEmpty#43, count#44] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] + +(64) CometFilter +Input [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sales#46, number_sales#47] +Condition : (isnotnull(sales#46) AND (cast(sales#46 as decimal(32,6)) > cast(Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) + +(65) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#50, cs_quantity#51, cs_list_price#52, cs_sold_date_sk#53] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#53)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(66) CometFilter +Input [4]: [cs_item_sk#50, cs_quantity#51, cs_list_price#52, cs_sold_date_sk#53] +Condition : isnotnull(cs_item_sk#50) + +(67) ReusedExchange [Reuses operator id: 46] +Output [1]: [ss_item_sk#54] + +(68) CometBroadcastHashJoin +Left output [4]: [cs_item_sk#50, cs_quantity#51, cs_list_price#52, cs_sold_date_sk#53] +Right output [1]: [ss_item_sk#54] +Arguments: [cs_item_sk#50], [ss_item_sk#54], LeftSemi, BuildRight + +(69) ReusedExchange [Reuses operator id: 52] +Output [4]: [i_item_sk#55, i_brand_id#56, i_class_id#57, i_category_id#58] + +(70) CometBroadcastHashJoin +Left output [4]: [cs_item_sk#50, cs_quantity#51, cs_list_price#52, cs_sold_date_sk#53] +Right output [4]: [i_item_sk#55, i_brand_id#56, i_class_id#57, i_category_id#58] +Arguments: [cs_item_sk#50], [i_item_sk#55], Inner, BuildRight + +(71) CometProject +Input [8]: [cs_item_sk#50, cs_quantity#51, cs_list_price#52, cs_sold_date_sk#53, i_item_sk#55, i_brand_id#56, i_class_id#57, i_category_id#58] +Arguments: [cs_quantity#51, cs_list_price#52, cs_sold_date_sk#53, i_brand_id#56, i_class_id#57, i_category_id#58], [cs_quantity#51, cs_list_price#52, cs_sold_date_sk#53, i_brand_id#56, i_class_id#57, i_category_id#58] + +(72) ReusedExchange [Reuses operator id: 58] +Output [1]: [d_date_sk#59] + +(73) CometBroadcastHashJoin +Left output [6]: [cs_quantity#51, cs_list_price#52, cs_sold_date_sk#53, i_brand_id#56, i_class_id#57, i_category_id#58] +Right output [1]: [d_date_sk#59] +Arguments: [cs_sold_date_sk#53], [d_date_sk#59], Inner, BuildRight + +(74) CometProject +Input [7]: [cs_quantity#51, cs_list_price#52, cs_sold_date_sk#53, i_brand_id#56, i_class_id#57, i_category_id#58, d_date_sk#59] +Arguments: [cs_quantity#51, cs_list_price#52, i_brand_id#56, i_class_id#57, i_category_id#58], [cs_quantity#51, cs_list_price#52, i_brand_id#56, i_class_id#57, i_category_id#58] + +(75) CometHashAggregate +Input [5]: [cs_quantity#51, cs_list_price#52, i_brand_id#56, i_class_id#57, i_category_id#58] +Keys [3]: [i_brand_id#56, i_class_id#57, i_category_id#58] +Functions [2]: [partial_sum((cast(cs_quantity#51 as decimal(10,0)) * cs_list_price#52)), partial_count(1)] + +(76) CometExchange +Input [6]: [i_brand_id#56, i_class_id#57, i_category_id#58, sum#60, isEmpty#61, count#62] +Arguments: hashpartitioning(i_brand_id#56, i_class_id#57, i_category_id#58, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(77) CometHashAggregate +Input [6]: [i_brand_id#56, i_class_id#57, i_category_id#58, sum#60, isEmpty#61, count#62] +Keys [3]: [i_brand_id#56, i_class_id#57, i_category_id#58] +Functions [2]: [sum((cast(cs_quantity#51 as decimal(10,0)) * cs_list_price#52)), count(1)] + +(78) CometFilter +Input [6]: [channel#63, i_brand_id#56, i_class_id#57, i_category_id#58, sales#64, number_sales#65] +Condition : (isnotnull(sales#64) AND (cast(sales#64 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) + +(79) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#66, ws_quantity#67, ws_list_price#68, ws_sold_date_sk#69] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#69)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(80) CometFilter +Input [4]: [ws_item_sk#66, ws_quantity#67, ws_list_price#68, ws_sold_date_sk#69] +Condition : isnotnull(ws_item_sk#66) + +(81) ReusedExchange [Reuses operator id: 46] +Output [1]: [ss_item_sk#70] + +(82) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#66, ws_quantity#67, ws_list_price#68, ws_sold_date_sk#69] +Right output [1]: [ss_item_sk#70] +Arguments: [ws_item_sk#66], [ss_item_sk#70], LeftSemi, BuildRight + +(83) ReusedExchange [Reuses operator id: 52] +Output [4]: [i_item_sk#71, i_brand_id#72, i_class_id#73, i_category_id#74] + +(84) CometBroadcastHashJoin +Left output [4]: [ws_item_sk#66, ws_quantity#67, ws_list_price#68, ws_sold_date_sk#69] +Right output [4]: [i_item_sk#71, i_brand_id#72, i_class_id#73, i_category_id#74] +Arguments: [ws_item_sk#66], [i_item_sk#71], Inner, BuildRight + +(85) CometProject +Input [8]: [ws_item_sk#66, ws_quantity#67, ws_list_price#68, ws_sold_date_sk#69, i_item_sk#71, i_brand_id#72, i_class_id#73, i_category_id#74] +Arguments: [ws_quantity#67, ws_list_price#68, ws_sold_date_sk#69, i_brand_id#72, i_class_id#73, i_category_id#74], [ws_quantity#67, ws_list_price#68, ws_sold_date_sk#69, i_brand_id#72, i_class_id#73, i_category_id#74] + +(86) ReusedExchange [Reuses operator id: 58] +Output [1]: [d_date_sk#75] + +(87) CometBroadcastHashJoin +Left output [6]: [ws_quantity#67, ws_list_price#68, ws_sold_date_sk#69, i_brand_id#72, i_class_id#73, i_category_id#74] +Right output [1]: [d_date_sk#75] +Arguments: [ws_sold_date_sk#69], [d_date_sk#75], Inner, BuildRight + +(88) CometProject +Input [7]: [ws_quantity#67, ws_list_price#68, ws_sold_date_sk#69, i_brand_id#72, i_class_id#73, i_category_id#74, d_date_sk#75] +Arguments: [ws_quantity#67, ws_list_price#68, i_brand_id#72, i_class_id#73, i_category_id#74], [ws_quantity#67, ws_list_price#68, i_brand_id#72, i_class_id#73, i_category_id#74] + +(89) CometHashAggregate +Input [5]: [ws_quantity#67, ws_list_price#68, i_brand_id#72, i_class_id#73, i_category_id#74] +Keys [3]: [i_brand_id#72, i_class_id#73, i_category_id#74] +Functions [2]: [partial_sum((cast(ws_quantity#67 as decimal(10,0)) * ws_list_price#68)), partial_count(1)] + +(90) CometExchange +Input [6]: [i_brand_id#72, i_class_id#73, i_category_id#74, sum#76, isEmpty#77, count#78] +Arguments: hashpartitioning(i_brand_id#72, i_class_id#73, i_category_id#74, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(91) CometHashAggregate +Input [6]: [i_brand_id#72, i_class_id#73, i_category_id#74, sum#76, isEmpty#77, count#78] +Keys [3]: [i_brand_id#72, i_class_id#73, i_category_id#74] +Functions [2]: [sum((cast(ws_quantity#67 as decimal(10,0)) * ws_list_price#68)), count(1)] + +(92) CometFilter +Input [6]: [channel#79, i_brand_id#72, i_class_id#73, i_category_id#74, sales#80, number_sales#81] +Condition : (isnotnull(sales#80) AND (cast(sales#80 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#48, [id=#49] as decimal(32,6)))) + +(93) CometUnion +Child 0 Input [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sales#46, number_sales#47] +Child 1 Input [6]: [channel#63, i_brand_id#56, i_class_id#57, i_category_id#58, sales#64, number_sales#65] +Child 2 Input [6]: [channel#79, i_brand_id#72, i_class_id#73, i_category_id#74, sales#80, number_sales#81] + +(94) CometHashAggregate +Input [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sales#46, number_sales#47] +Keys [4]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum(sales#46), partial_sum(number_sales#47)] + +(95) CometExchange +Input [7]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum#82, isEmpty#83, sum#84] +Arguments: hashpartitioning(channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(96) CometHashAggregate +Input [7]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum#82, isEmpty#83, sum#84] +Keys [4]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#46), sum(number_sales#47)] + +(97) ReusedExchange [Reuses operator id: 95] +Output [7]: [channel#45, i_brand_id#85, i_class_id#86, i_category_id#87, sum#82, isEmpty#83, sum#84] + +(98) CometHashAggregate +Input [7]: [channel#45, i_brand_id#85, i_class_id#86, i_category_id#87, sum#82, isEmpty#83, sum#84] +Keys [4]: [channel#45, i_brand_id#85, i_class_id#86, i_category_id#87] +Functions [2]: [sum(sales#46), sum(number_sales#47)] + +(99) CometHashAggregate +Input [5]: [channel#45, i_brand_id#85, i_class_id#86, sum_sales#88, number_sales#89] +Keys [3]: [channel#45, i_brand_id#85, i_class_id#86] +Functions [2]: [partial_sum(sum_sales#88), partial_sum(number_sales#89)] + +(100) CometExchange +Input [6]: [channel#45, i_brand_id#85, i_class_id#86, sum#90, isEmpty#91, sum#92] +Arguments: hashpartitioning(channel#45, i_brand_id#85, i_class_id#86, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(101) CometHashAggregate +Input [6]: [channel#45, i_brand_id#85, i_class_id#86, sum#90, isEmpty#91, sum#92] +Keys [3]: [channel#45, i_brand_id#85, i_class_id#86] +Functions [2]: [sum(sum_sales#88), sum(number_sales#89)] + +(102) ReusedExchange [Reuses operator id: 95] +Output [7]: [channel#45, i_brand_id#93, i_class_id#94, i_category_id#95, sum#82, isEmpty#83, sum#84] + +(103) CometHashAggregate +Input [7]: [channel#45, i_brand_id#93, i_class_id#94, i_category_id#95, sum#82, isEmpty#83, sum#84] +Keys [4]: [channel#45, i_brand_id#93, i_class_id#94, i_category_id#95] +Functions [2]: [sum(sales#46), sum(number_sales#47)] + +(104) CometHashAggregate +Input [4]: [channel#45, i_brand_id#93, sum_sales#96, number_sales#97] +Keys [2]: [channel#45, i_brand_id#93] +Functions [2]: [partial_sum(sum_sales#96), partial_sum(number_sales#97)] + +(105) CometExchange +Input [5]: [channel#45, i_brand_id#93, sum#98, isEmpty#99, sum#100] +Arguments: hashpartitioning(channel#45, i_brand_id#93, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(106) CometHashAggregate +Input [5]: [channel#45, i_brand_id#93, sum#98, isEmpty#99, sum#100] +Keys [2]: [channel#45, i_brand_id#93] +Functions [2]: [sum(sum_sales#96), sum(number_sales#97)] + +(107) ReusedExchange [Reuses operator id: 95] +Output [7]: [channel#45, i_brand_id#101, i_class_id#102, i_category_id#103, sum#82, isEmpty#83, sum#84] + +(108) CometHashAggregate +Input [7]: [channel#45, i_brand_id#101, i_class_id#102, i_category_id#103, sum#82, isEmpty#83, sum#84] +Keys [4]: [channel#45, i_brand_id#101, i_class_id#102, i_category_id#103] +Functions [2]: [sum(sales#46), sum(number_sales#47)] + +(109) CometHashAggregate +Input [3]: [channel#45, sum_sales#104, number_sales#105] +Keys [1]: [channel#45] +Functions [2]: [partial_sum(sum_sales#104), partial_sum(number_sales#105)] + +(110) CometExchange +Input [4]: [channel#45, sum#106, isEmpty#107, sum#108] +Arguments: hashpartitioning(channel#45, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(111) CometHashAggregate +Input [4]: [channel#45, sum#106, isEmpty#107, sum#108] +Keys [1]: [channel#45] +Functions [2]: [sum(sum_sales#104), sum(number_sales#105)] + +(112) ReusedExchange [Reuses operator id: 95] +Output [7]: [channel#45, i_brand_id#109, i_class_id#110, i_category_id#111, sum#82, isEmpty#83, sum#84] + +(113) CometHashAggregate +Input [7]: [channel#45, i_brand_id#109, i_class_id#110, i_category_id#111, sum#82, isEmpty#83, sum#84] +Keys [4]: [channel#45, i_brand_id#109, i_class_id#110, i_category_id#111] +Functions [2]: [sum(sales#46), sum(number_sales#47)] + +(114) CometHashAggregate +Input [2]: [sum_sales#112, number_sales#113] +Keys: [] +Functions [2]: [partial_sum(sum_sales#112), partial_sum(number_sales#113)] + +(115) CometExchange +Input [3]: [sum#114, isEmpty#115, sum#116] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(116) CometHashAggregate +Input [3]: [sum#114, isEmpty#115, sum#116] +Keys: [] +Functions [2]: [sum(sum_sales#112), sum(number_sales#113)] + +(117) CometUnion +Child 0 Input [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118] +Child 1 Input [6]: [channel#45, i_brand_id#85, i_class_id#86, i_category_id#119, sum(sum_sales)#120, sum(number_sales)#121] +Child 2 Input [6]: [channel#45, i_brand_id#93, i_class_id#122, i_category_id#123, sum(sum_sales)#124, sum(number_sales)#125] +Child 3 Input [6]: [channel#45, i_brand_id#126, i_class_id#127, i_category_id#128, sum(sum_sales)#129, sum(number_sales)#130] +Child 4 Input [6]: [channel#131, i_brand_id#132, i_class_id#133, i_category_id#134, sum(sum_sales)#135, sum(number_sales)#136] + +(118) CometHashAggregate +Input [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118] +Keys [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118] +Functions: [] + +(119) CometExchange +Input [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118] +Arguments: hashpartitioning(channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=10] + +(120) CometHashAggregate +Input [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118] +Keys [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118] +Functions: [] + +(121) CometTakeOrderedAndProject +Input [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[channel#45 ASC NULLS FIRST,i_brand_id#36 ASC NULLS FIRST,i_class_id#37 ASC NULLS FIRST,i_category_id#38 ASC NULLS FIRST], output=[channel#45,i_brand_id#36,i_class_id#37,i_category_id#38,sum_sales#117,number_sales#118]), [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118], 100, [channel#45 ASC NULLS FIRST, i_brand_id#36 ASC NULLS FIRST, i_class_id#37 ASC NULLS FIRST, i_category_id#38 ASC NULLS FIRST], [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118] + +(122) ColumnarToRow [codegen id : 1] +Input [6]: [channel#45, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#117, number_sales#118] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquery#48, [id=#49] +* ColumnarToRow (142) ++- CometHashAggregate (141) + +- CometExchange (140) + +- CometHashAggregate (139) + +- CometUnion (138) + :- CometProject (126) + : +- CometBroadcastHashJoin (125) + : :- CometScan parquet spark_catalog.default.store_sales (123) + : +- ReusedExchange (124) + :- CometProject (133) + : +- CometBroadcastHashJoin (132) + : :- CometScan parquet spark_catalog.default.catalog_sales (127) + : +- CometBroadcastExchange (131) + : +- CometProject (130) + : +- CometFilter (129) + : +- CometScan parquet spark_catalog.default.date_dim (128) + +- CometProject (137) + +- CometBroadcastHashJoin (136) + :- CometScan parquet spark_catalog.default.web_sales (134) + +- ReusedExchange (135) + + +(123) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#137, ss_list_price#138, ss_sold_date_sk#139] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#139)] +ReadSchema: struct + +(124) ReusedExchange [Reuses operator id: 19] +Output [1]: [d_date_sk#140] + +(125) CometBroadcastHashJoin +Left output [3]: [ss_quantity#137, ss_list_price#138, ss_sold_date_sk#139] +Right output [1]: [d_date_sk#140] +Arguments: [ss_sold_date_sk#139], [d_date_sk#140], Inner, BuildRight + +(126) CometProject +Input [4]: [ss_quantity#137, ss_list_price#138, ss_sold_date_sk#139, d_date_sk#140] +Arguments: [quantity#141, list_price#142], [ss_quantity#137 AS quantity#141, ss_list_price#138 AS list_price#142] + +(127) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#143, cs_list_price#144, cs_sold_date_sk#145] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#145)] +ReadSchema: struct + +(128) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#146, d_year#147] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(129) CometFilter +Input [2]: [d_date_sk#146, d_year#147] +Condition : (((isnotnull(d_year#147) AND (d_year#147 >= 1998)) AND (d_year#147 <= 2000)) AND isnotnull(d_date_sk#146)) + +(130) CometProject +Input [2]: [d_date_sk#146, d_year#147] +Arguments: [d_date_sk#146], [d_date_sk#146] + +(131) CometBroadcastExchange +Input [1]: [d_date_sk#146] +Arguments: [d_date_sk#146] + +(132) CometBroadcastHashJoin +Left output [3]: [cs_quantity#143, cs_list_price#144, cs_sold_date_sk#145] +Right output [1]: [d_date_sk#146] +Arguments: [cs_sold_date_sk#145], [d_date_sk#146], Inner, BuildRight + +(133) CometProject +Input [4]: [cs_quantity#143, cs_list_price#144, cs_sold_date_sk#145, d_date_sk#146] +Arguments: [quantity#148, list_price#149], [cs_quantity#143 AS quantity#148, cs_list_price#144 AS list_price#149] + +(134) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#150, ws_list_price#151, ws_sold_date_sk#152] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#152)] +ReadSchema: struct + +(135) ReusedExchange [Reuses operator id: 131] +Output [1]: [d_date_sk#153] + +(136) CometBroadcastHashJoin +Left output [3]: [ws_quantity#150, ws_list_price#151, ws_sold_date_sk#152] +Right output [1]: [d_date_sk#153] +Arguments: [ws_sold_date_sk#152], [d_date_sk#153], Inner, BuildRight + +(137) CometProject +Input [4]: [ws_quantity#150, ws_list_price#151, ws_sold_date_sk#152, d_date_sk#153] +Arguments: [quantity#154, list_price#155], [ws_quantity#150 AS quantity#154, ws_list_price#151 AS list_price#155] + +(138) CometUnion +Child 0 Input [2]: [quantity#141, list_price#142] +Child 1 Input [2]: [quantity#148, list_price#149] +Child 2 Input [2]: [quantity#154, list_price#155] + +(139) CometHashAggregate +Input [2]: [quantity#141, list_price#142] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#141 as decimal(10,0)) * list_price#142))] + +(140) CometExchange +Input [2]: [sum#156, count#157] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=11] + +(141) CometHashAggregate +Input [2]: [sum#156, count#157] +Keys: [] +Functions [1]: [avg((cast(quantity#141 as decimal(10,0)) * list_price#142))] + +(142) ColumnarToRow [codegen id : 1] +Input [1]: [average_sales#158] + +Subquery:2 Hosting operator id = 78 Hosting Expression = ReusedSubquery Subquery scalar-subquery#48, [id=#49] + +Subquery:3 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#48, [id=#49] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c456c5c096 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q14a.native_iceberg_compat/simplified.txt @@ -0,0 +1,149 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + CometExchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #1 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + CometUnion [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + CometExchange [channel,i_brand_id,i_class_id,i_category_id] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum,sales,number_sales] + CometUnion [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [average_sales,sum,count,avg((cast(quantity as decimal(10,0)) * list_price))] + CometExchange #14 + CometHashAggregate [sum,count,quantity,list_price] + CometUnion [quantity,list_price] + CometProject [ss_quantity,ss_list_price] [quantity,list_price] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + ReusedExchange [d_date_sk] #10 + CometProject [cs_quantity,cs_list_price] [quantity,list_price] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk] #15 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometProject [ws_quantity,ws_list_price] [quantity,list_price] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [d_date_sk] #15 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #3 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ss_quantity,ss_list_price] + CometProject [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk,ss_item_sk] + CometFilter [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + CometBroadcastExchange [ss_item_sk] #4 + CometProject [i_item_sk] [ss_item_sk] + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [brand_id,class_id,category_id] #5 + CometBroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + CometHashAggregate [brand_id,class_id,category_id] + CometExchange [brand_id,class_id,category_id] #6 + CometHashAggregate [brand_id,class_id,category_id] + CometProject [i_brand_id,i_class_id,i_category_id] [brand_id,class_id,category_id] + CometBroadcastHashJoin [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ss_item_sk,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ss_item_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #8 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [cs_item_sk,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastExchange [d_date_sk] #10 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + ReusedExchange [d_date_sk] #10 + CometBroadcastExchange [i_brand_id,i_class_id,i_category_id] #11 + CometProject [i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_item_sk,ws_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometFilter [ws_item_sk,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + ReusedExchange [d_date_sk] #10 + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12 + CometBroadcastHashJoin [i_item_sk,i_brand_id,i_class_id,i_category_id,ss_item_sk] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + ReusedExchange [ss_item_sk] #4 + CometBroadcastExchange [d_date_sk] #13 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #1 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #16 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,cs_quantity,cs_list_price] + CometProject [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_quantity,cs_list_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [cs_quantity,cs_list_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk,ss_item_sk] + CometFilter [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + ReusedExchange [ss_item_sk] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12 + ReusedExchange [d_date_sk] #13 + CometFilter [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + ReusedSubquery [average_sales] #1 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,sum,isEmpty,count,sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1)] + CometExchange [i_brand_id,i_class_id,i_category_id] #17 + CometHashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count,ws_quantity,ws_list_price] + CometProject [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_quantity,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,d_date_sk] + CometProject [ws_quantity,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id] + CometBroadcastHashJoin [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk,ss_item_sk] + CometFilter [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + ReusedExchange [ss_item_sk] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12 + ReusedExchange [d_date_sk] #13 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum,sum(sum_sales),sum(number_salesL)] + CometExchange [channel,i_brand_id,i_class_id] #18 + CometHashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum,sum_sales,number_sales] + CometHashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales,i_category_id,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum,sum(sum_sales),sum(number_salesL)] + CometExchange [channel,i_brand_id] #19 + CometHashAggregate [channel,i_brand_id,sum,isEmpty,sum,sum_sales,number_sales] + CometHashAggregate [channel,i_brand_id,sum_sales,number_sales,i_class_id,i_category_id,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum,sum(sum_sales),sum(number_salesL)] + CometExchange [channel] #20 + CometHashAggregate [channel,sum,isEmpty,sum,sum_sales,number_sales] + CometHashAggregate [channel,sum_sales,number_sales,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + CometHashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum,sum(sum_sales),sum(number_salesL)] + CometExchange #21 + CometHashAggregate [sum,isEmpty,sum,sum_sales,number_sales] + CometHashAggregate [sum_sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum,sum(sales),sum(number_salesL)] + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_datafusion/explain.txt new file mode 100644 index 0000000000..bbf7360e93 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_datafusion/explain.txt @@ -0,0 +1,729 @@ +== Physical Plan == +* ColumnarToRow (141) ++- CometTakeOrderedAndProject (140) + +- CometUnion (139) + :- CometHashAggregate (38) + : +- CometExchange (37) + : +- CometHashAggregate (36) + : +- CometProject (35) + : +- CometBroadcastHashJoin (34) + : :- CometProject (30) + : : +- CometBroadcastHashJoin (29) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (19) + : : : : +- CometBroadcastHashJoin (18) + : : : : :- CometProject (14) + : : : : : +- CometBroadcastHashJoin (13) + : : : : : :- CometProject (8) + : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : : : +- CometBroadcastExchange (6) + : : : : : : +- CometProject (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (3) + : : : : : +- CometBroadcastExchange (12) + : : : : : +- CometProject (11) + : : : : : +- CometFilter (10) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (9) + : : : : +- CometBroadcastExchange (17) + : : : : +- CometFilter (16) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (15) + : : : +- CometBroadcastExchange (22) + : : : +- CometFilter (21) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (20) + : : +- CometBroadcastExchange (28) + : : +- CometProject (27) + : : +- CometFilter (26) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (25) + : +- CometBroadcastExchange (33) + : +- CometFilter (32) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (31) + :- CometHashAggregate (63) + : +- CometExchange (62) + : +- CometHashAggregate (61) + : +- CometProject (60) + : +- CometBroadcastHashJoin (59) + : :- CometProject (57) + : : +- CometBroadcastHashJoin (56) + : : :- CometProject (54) + : : : +- CometBroadcastHashJoin (53) + : : : :- CometProject (49) + : : : : +- CometBroadcastHashJoin (48) + : : : : :- CometProject (46) + : : : : : +- CometBroadcastHashJoin (45) + : : : : : :- CometProject (43) + : : : : : : +- CometBroadcastHashJoin (42) + : : : : : : :- CometFilter (40) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (39) + : : : : : : +- ReusedExchange (41) + : : : : : +- ReusedExchange (44) + : : : : +- ReusedExchange (47) + : : : +- CometBroadcastExchange (52) + : : : +- CometFilter (51) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (50) + : : +- ReusedExchange (55) + : +- ReusedExchange (58) + :- CometHashAggregate (89) + : +- CometExchange (88) + : +- CometHashAggregate (87) + : +- CometProject (86) + : +- CometBroadcastHashJoin (85) + : :- CometProject (83) + : : +- CometBroadcastHashJoin (82) + : : :- CometProject (80) + : : : +- CometBroadcastHashJoin (79) + : : : :- CometProject (74) + : : : : +- CometBroadcastHashJoin (73) + : : : : :- CometProject (71) + : : : : : +- CometBroadcastHashJoin (70) + : : : : : :- CometProject (68) + : : : : : : +- CometBroadcastHashJoin (67) + : : : : : : :- CometFilter (65) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (64) + : : : : : : +- ReusedExchange (66) + : : : : : +- ReusedExchange (69) + : : : : +- ReusedExchange (72) + : : : +- CometBroadcastExchange (78) + : : : +- CometProject (77) + : : : +- CometFilter (76) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (75) + : : +- ReusedExchange (81) + : +- ReusedExchange (84) + :- CometHashAggregate (115) + : +- CometExchange (114) + : +- CometHashAggregate (113) + : +- CometProject (112) + : +- CometBroadcastHashJoin (111) + : :- CometProject (109) + : : +- CometBroadcastHashJoin (108) + : : :- CometProject (106) + : : : +- CometBroadcastHashJoin (105) + : : : :- CometProject (100) + : : : : +- CometBroadcastHashJoin (99) + : : : : :- CometProject (97) + : : : : : +- CometBroadcastHashJoin (96) + : : : : : :- CometProject (94) + : : : : : : +- CometBroadcastHashJoin (93) + : : : : : : :- CometFilter (91) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (90) + : : : : : : +- ReusedExchange (92) + : : : : : +- ReusedExchange (95) + : : : : +- ReusedExchange (98) + : : : +- CometBroadcastExchange (104) + : : : +- CometProject (103) + : : : +- CometFilter (102) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (101) + : : +- ReusedExchange (107) + : +- ReusedExchange (110) + +- CometHashAggregate (138) + +- CometExchange (137) + +- CometHashAggregate (136) + +- CometProject (135) + +- CometBroadcastHashJoin (134) + :- CometProject (132) + : +- CometBroadcastHashJoin (131) + : :- CometProject (129) + : : +- CometBroadcastHashJoin (128) + : : :- CometProject (126) + : : : +- CometBroadcastHashJoin (125) + : : : :- CometProject (123) + : : : : +- CometBroadcastHashJoin (122) + : : : : :- CometProject (120) + : : : : : +- CometBroadcastHashJoin (119) + : : : : : :- CometFilter (117) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (116) + : : : : : +- ReusedExchange (118) + : : : : +- ReusedExchange (121) + : : : +- ReusedExchange (124) + : : +- ReusedExchange (127) + : +- ReusedExchange (130) + +- ReusedExchange (133) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Arguments: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(2) CometFilter +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(4) CometFilter +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = M)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#10)) + +(5) CometProject +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_dep_count#13], [cd_demo_sk#10, cd_dep_count#13] + +(6) CometBroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_dep_count#13] + +(7) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Right output [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: [cs_bill_cdemo_sk#2], [cd_demo_sk#10], Inner, BuildRight + +(8) CometProject +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] +Arguments: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13], [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] + +(9) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(10) CometFilter +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Condition : (((c_birth_month#17 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#14)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_addr_sk#16)) + +(11) CometProject +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18], [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(12) CometBroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(13) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Right output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#14], Inner, BuildRight + +(14) CometProject +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(15) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [1]: [cd_demo_sk#19] +Arguments: [cd_demo_sk#19] + +(16) CometFilter +Input [1]: [cd_demo_sk#19] +Condition : isnotnull(cd_demo_sk#19) + +(17) CometBroadcastExchange +Input [1]: [cd_demo_sk#19] +Arguments: [cd_demo_sk#19] + +(18) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Right output [1]: [cd_demo_sk#19] +Arguments: [c_current_cdemo_sk#15], [cd_demo_sk#19], Inner, BuildRight + +(19) CometProject +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] + +(20) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(21) CometFilter +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#20)) + +(22) CometBroadcastExchange +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(23) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Right output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [c_current_addr_sk#16], [ca_address_sk#20], Inner, BuildRight + +(24) CometProject +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] + +(25) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#24, d_year#25] +Arguments: [d_date_sk#24, d_year#25] + +(26) CometFilter +Input [2]: [d_date_sk#24, d_year#25] +Condition : ((isnotnull(d_year#25) AND (d_year#25 = 2001)) AND isnotnull(d_date_sk#24)) + +(27) CometProject +Input [2]: [d_date_sk#24, d_year#25] +Arguments: [d_date_sk#24], [d_date_sk#24] + +(28) CometBroadcastExchange +Input [1]: [d_date_sk#24] +Arguments: [d_date_sk#24] + +(29) CometBroadcastHashJoin +Left output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Right output [1]: [d_date_sk#24] +Arguments: [cs_sold_date_sk#9], [d_date_sk#24], Inner, BuildRight + +(30) CometProject +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, d_date_sk#24] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] + +(31) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#26, i_item_id#27] +Arguments: [i_item_sk#26, i_item_id#27] + +(32) CometFilter +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) + +(33) CometBroadcastExchange +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: [i_item_sk#26, i_item_id#27] + +(34) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Right output [2]: [i_item_sk#26, i_item_id#27] +Arguments: [cs_item_sk#3], [i_item_sk#26], Inner, BuildRight + +(35) CometProject +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] +Arguments: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34], [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] + +(36) CometHashAggregate +Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [4]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] + +(37) CometExchange +Input [18]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Arguments: hashpartitioning(i_item_id#27, ca_country#23, ca_state#22, ca_county#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(38) CometHashAggregate +Input [18]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Keys [4]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] + +(39) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [9]: [cs_bill_customer_sk#49, cs_bill_cdemo_sk#50, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57] +Arguments: [cs_bill_customer_sk#49, cs_bill_cdemo_sk#50, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57] + +(40) CometFilter +Input [9]: [cs_bill_customer_sk#49, cs_bill_cdemo_sk#50, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57] +Condition : ((isnotnull(cs_bill_cdemo_sk#50) AND isnotnull(cs_bill_customer_sk#49)) AND isnotnull(cs_item_sk#51)) + +(41) ReusedExchange [Reuses operator id: 6] +Output [2]: [cd_demo_sk#58, cd_dep_count#59] + +(42) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#49, cs_bill_cdemo_sk#50, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57] +Right output [2]: [cd_demo_sk#58, cd_dep_count#59] +Arguments: [cs_bill_cdemo_sk#50], [cd_demo_sk#58], Inner, BuildRight + +(43) CometProject +Input [11]: [cs_bill_customer_sk#49, cs_bill_cdemo_sk#50, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_demo_sk#58, cd_dep_count#59] +Arguments: [cs_bill_customer_sk#49, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59], [cs_bill_customer_sk#49, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59] + +(44) ReusedExchange [Reuses operator id: 12] +Output [4]: [c_customer_sk#60, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63] + +(45) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#49, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59] +Right output [4]: [c_customer_sk#60, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63] +Arguments: [cs_bill_customer_sk#49], [c_customer_sk#60], Inner, BuildRight + +(46) CometProject +Input [13]: [cs_bill_customer_sk#49, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_customer_sk#60, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63] +Arguments: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63], [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63] + +(47) ReusedExchange [Reuses operator id: 17] +Output [1]: [cd_demo_sk#64] + +(48) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63] +Right output [1]: [cd_demo_sk#64] +Arguments: [c_current_cdemo_sk#61], [cd_demo_sk#64], Inner, BuildRight + +(49) CometProject +Input [12]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63, cd_demo_sk#64] +Arguments: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_addr_sk#62, c_birth_year#63], [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_addr_sk#62, c_birth_year#63] + +(50) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#65, ca_state#66, ca_country#67] +Arguments: [ca_address_sk#65, ca_state#66, ca_country#67] + +(51) CometFilter +Input [3]: [ca_address_sk#65, ca_state#66, ca_country#67] +Condition : (ca_state#66 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#65)) + +(52) CometBroadcastExchange +Input [3]: [ca_address_sk#65, ca_state#66, ca_country#67] +Arguments: [ca_address_sk#65, ca_state#66, ca_country#67] + +(53) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_addr_sk#62, c_birth_year#63] +Right output [3]: [ca_address_sk#65, ca_state#66, ca_country#67] +Arguments: [c_current_addr_sk#62], [ca_address_sk#65], Inner, BuildRight + +(54) CometProject +Input [13]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_addr_sk#62, c_birth_year#63, ca_address_sk#65, ca_state#66, ca_country#67] +Arguments: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67], [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67] + +(55) ReusedExchange [Reuses operator id: 28] +Output [1]: [d_date_sk#68] + +(56) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67] +Right output [1]: [d_date_sk#68] +Arguments: [cs_sold_date_sk#57], [d_date_sk#68], Inner, BuildRight + +(57) CometProject +Input [12]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67, d_date_sk#68] +Arguments: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67], [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67] + +(58) ReusedExchange [Reuses operator id: 33] +Output [2]: [i_item_sk#69, i_item_id#70] + +(59) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67] +Right output [2]: [i_item_sk#69, i_item_id#70] +Arguments: [cs_item_sk#51], [i_item_sk#69], Inner, BuildRight + +(60) CometProject +Input [12]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67, i_item_sk#69, i_item_id#70] +Arguments: [i_item_id#70, ca_country#67, ca_state#66, agg1#71, agg2#72, agg3#73, agg4#74, agg5#75, agg6#76, agg7#77], [i_item_id#70, ca_country#67, ca_state#66, cast(cs_quantity#52 as decimal(12,2)) AS agg1#71, cast(cs_list_price#53 as decimal(12,2)) AS agg2#72, cast(cs_coupon_amt#55 as decimal(12,2)) AS agg3#73, cast(cs_sales_price#54 as decimal(12,2)) AS agg4#74, cast(cs_net_profit#56 as decimal(12,2)) AS agg5#75, cast(c_birth_year#63 as decimal(12,2)) AS agg6#76, cast(cd_dep_count#59 as decimal(12,2)) AS agg7#77] + +(61) CometHashAggregate +Input [10]: [i_item_id#70, ca_country#67, ca_state#66, agg1#71, agg2#72, agg3#73, agg4#74, agg5#75, agg6#76, agg7#77] +Keys [3]: [i_item_id#70, ca_country#67, ca_state#66] +Functions [7]: [partial_avg(agg1#71), partial_avg(agg2#72), partial_avg(agg3#73), partial_avg(agg4#74), partial_avg(agg5#75), partial_avg(agg6#76), partial_avg(agg7#77)] + +(62) CometExchange +Input [17]: [i_item_id#70, ca_country#67, ca_state#66, sum#78, count#79, sum#80, count#81, sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89, sum#90, count#91] +Arguments: hashpartitioning(i_item_id#70, ca_country#67, ca_state#66, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(63) CometHashAggregate +Input [17]: [i_item_id#70, ca_country#67, ca_state#66, sum#78, count#79, sum#80, count#81, sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89, sum#90, count#91] +Keys [3]: [i_item_id#70, ca_country#67, ca_state#66] +Functions [7]: [avg(agg1#71), avg(agg2#72), avg(agg3#73), avg(agg4#74), avg(agg5#75), avg(agg6#76), avg(agg7#77)] + +(64) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [9]: [cs_bill_customer_sk#92, cs_bill_cdemo_sk#93, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100] +Arguments: [cs_bill_customer_sk#92, cs_bill_cdemo_sk#93, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100] + +(65) CometFilter +Input [9]: [cs_bill_customer_sk#92, cs_bill_cdemo_sk#93, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100] +Condition : ((isnotnull(cs_bill_cdemo_sk#93) AND isnotnull(cs_bill_customer_sk#92)) AND isnotnull(cs_item_sk#94)) + +(66) ReusedExchange [Reuses operator id: 6] +Output [2]: [cd_demo_sk#101, cd_dep_count#102] + +(67) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#92, cs_bill_cdemo_sk#93, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100] +Right output [2]: [cd_demo_sk#101, cd_dep_count#102] +Arguments: [cs_bill_cdemo_sk#93], [cd_demo_sk#101], Inner, BuildRight + +(68) CometProject +Input [11]: [cs_bill_customer_sk#92, cs_bill_cdemo_sk#93, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_demo_sk#101, cd_dep_count#102] +Arguments: [cs_bill_customer_sk#92, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102], [cs_bill_customer_sk#92, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102] + +(69) ReusedExchange [Reuses operator id: 12] +Output [4]: [c_customer_sk#103, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106] + +(70) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#92, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102] +Right output [4]: [c_customer_sk#103, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106] +Arguments: [cs_bill_customer_sk#92], [c_customer_sk#103], Inner, BuildRight + +(71) CometProject +Input [13]: [cs_bill_customer_sk#92, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_customer_sk#103, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106] +Arguments: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106], [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106] + +(72) ReusedExchange [Reuses operator id: 17] +Output [1]: [cd_demo_sk#107] + +(73) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106] +Right output [1]: [cd_demo_sk#107] +Arguments: [c_current_cdemo_sk#104], [cd_demo_sk#107], Inner, BuildRight + +(74) CometProject +Input [12]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106, cd_demo_sk#107] +Arguments: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_addr_sk#105, c_birth_year#106], [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_addr_sk#105, c_birth_year#106] + +(75) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [3]: [ca_address_sk#108, ca_state#109, ca_country#110] +Arguments: [ca_address_sk#108, ca_state#109, ca_country#110] + +(76) CometFilter +Input [3]: [ca_address_sk#108, ca_state#109, ca_country#110] +Condition : (ca_state#109 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#108)) + +(77) CometProject +Input [3]: [ca_address_sk#108, ca_state#109, ca_country#110] +Arguments: [ca_address_sk#108, ca_country#110], [ca_address_sk#108, ca_country#110] + +(78) CometBroadcastExchange +Input [2]: [ca_address_sk#108, ca_country#110] +Arguments: [ca_address_sk#108, ca_country#110] + +(79) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_addr_sk#105, c_birth_year#106] +Right output [2]: [ca_address_sk#108, ca_country#110] +Arguments: [c_current_addr_sk#105], [ca_address_sk#108], Inner, BuildRight + +(80) CometProject +Input [12]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_addr_sk#105, c_birth_year#106, ca_address_sk#108, ca_country#110] +Arguments: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_birth_year#106, ca_country#110], [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_birth_year#106, ca_country#110] + +(81) ReusedExchange [Reuses operator id: 28] +Output [1]: [d_date_sk#111] + +(82) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_birth_year#106, ca_country#110] +Right output [1]: [d_date_sk#111] +Arguments: [cs_sold_date_sk#100], [d_date_sk#111], Inner, BuildRight + +(83) CometProject +Input [11]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_birth_year#106, ca_country#110, d_date_sk#111] +Arguments: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cd_dep_count#102, c_birth_year#106, ca_country#110], [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cd_dep_count#102, c_birth_year#106, ca_country#110] + +(84) ReusedExchange [Reuses operator id: 33] +Output [2]: [i_item_sk#112, i_item_id#113] + +(85) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cd_dep_count#102, c_birth_year#106, ca_country#110] +Right output [2]: [i_item_sk#112, i_item_id#113] +Arguments: [cs_item_sk#94], [i_item_sk#112], Inner, BuildRight + +(86) CometProject +Input [11]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cd_dep_count#102, c_birth_year#106, ca_country#110, i_item_sk#112, i_item_id#113] +Arguments: [i_item_id#113, ca_country#110, agg1#114, agg2#115, agg3#116, agg4#117, agg5#118, agg6#119, agg7#120], [i_item_id#113, ca_country#110, cast(cs_quantity#95 as decimal(12,2)) AS agg1#114, cast(cs_list_price#96 as decimal(12,2)) AS agg2#115, cast(cs_coupon_amt#98 as decimal(12,2)) AS agg3#116, cast(cs_sales_price#97 as decimal(12,2)) AS agg4#117, cast(cs_net_profit#99 as decimal(12,2)) AS agg5#118, cast(c_birth_year#106 as decimal(12,2)) AS agg6#119, cast(cd_dep_count#102 as decimal(12,2)) AS agg7#120] + +(87) CometHashAggregate +Input [9]: [i_item_id#113, ca_country#110, agg1#114, agg2#115, agg3#116, agg4#117, agg5#118, agg6#119, agg7#120] +Keys [2]: [i_item_id#113, ca_country#110] +Functions [7]: [partial_avg(agg1#114), partial_avg(agg2#115), partial_avg(agg3#116), partial_avg(agg4#117), partial_avg(agg5#118), partial_avg(agg6#119), partial_avg(agg7#120)] + +(88) CometExchange +Input [16]: [i_item_id#113, ca_country#110, sum#121, count#122, sum#123, count#124, sum#125, count#126, sum#127, count#128, sum#129, count#130, sum#131, count#132, sum#133, count#134] +Arguments: hashpartitioning(i_item_id#113, ca_country#110, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(89) CometHashAggregate +Input [16]: [i_item_id#113, ca_country#110, sum#121, count#122, sum#123, count#124, sum#125, count#126, sum#127, count#128, sum#129, count#130, sum#131, count#132, sum#133, count#134] +Keys [2]: [i_item_id#113, ca_country#110] +Functions [7]: [avg(agg1#114), avg(agg2#115), avg(agg3#116), avg(agg4#117), avg(agg5#118), avg(agg6#119), avg(agg7#120)] + +(90) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [9]: [cs_bill_customer_sk#135, cs_bill_cdemo_sk#136, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143] +Arguments: [cs_bill_customer_sk#135, cs_bill_cdemo_sk#136, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143] + +(91) CometFilter +Input [9]: [cs_bill_customer_sk#135, cs_bill_cdemo_sk#136, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143] +Condition : ((isnotnull(cs_bill_cdemo_sk#136) AND isnotnull(cs_bill_customer_sk#135)) AND isnotnull(cs_item_sk#137)) + +(92) ReusedExchange [Reuses operator id: 6] +Output [2]: [cd_demo_sk#144, cd_dep_count#145] + +(93) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#135, cs_bill_cdemo_sk#136, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143] +Right output [2]: [cd_demo_sk#144, cd_dep_count#145] +Arguments: [cs_bill_cdemo_sk#136], [cd_demo_sk#144], Inner, BuildRight + +(94) CometProject +Input [11]: [cs_bill_customer_sk#135, cs_bill_cdemo_sk#136, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_demo_sk#144, cd_dep_count#145] +Arguments: [cs_bill_customer_sk#135, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145], [cs_bill_customer_sk#135, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145] + +(95) ReusedExchange [Reuses operator id: 12] +Output [4]: [c_customer_sk#146, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149] + +(96) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#135, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145] +Right output [4]: [c_customer_sk#146, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149] +Arguments: [cs_bill_customer_sk#135], [c_customer_sk#146], Inner, BuildRight + +(97) CometProject +Input [13]: [cs_bill_customer_sk#135, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_customer_sk#146, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149] +Arguments: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149], [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149] + +(98) ReusedExchange [Reuses operator id: 17] +Output [1]: [cd_demo_sk#150] + +(99) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149] +Right output [1]: [cd_demo_sk#150] +Arguments: [c_current_cdemo_sk#147], [cd_demo_sk#150], Inner, BuildRight + +(100) CometProject +Input [12]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149, cd_demo_sk#150] +Arguments: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_addr_sk#148, c_birth_year#149], [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_addr_sk#148, c_birth_year#149] + +(101) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#151, ca_state#152] +Arguments: [ca_address_sk#151, ca_state#152] + +(102) CometFilter +Input [2]: [ca_address_sk#151, ca_state#152] +Condition : (ca_state#152 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#151)) + +(103) CometProject +Input [2]: [ca_address_sk#151, ca_state#152] +Arguments: [ca_address_sk#151], [ca_address_sk#151] + +(104) CometBroadcastExchange +Input [1]: [ca_address_sk#151] +Arguments: [ca_address_sk#151] + +(105) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_addr_sk#148, c_birth_year#149] +Right output [1]: [ca_address_sk#151] +Arguments: [c_current_addr_sk#148], [ca_address_sk#151], Inner, BuildRight + +(106) CometProject +Input [11]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_addr_sk#148, c_birth_year#149, ca_address_sk#151] +Arguments: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_birth_year#149], [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_birth_year#149] + +(107) ReusedExchange [Reuses operator id: 28] +Output [1]: [d_date_sk#153] + +(108) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_birth_year#149] +Right output [1]: [d_date_sk#153] +Arguments: [cs_sold_date_sk#143], [d_date_sk#153], Inner, BuildRight + +(109) CometProject +Input [10]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_birth_year#149, d_date_sk#153] +Arguments: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cd_dep_count#145, c_birth_year#149], [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cd_dep_count#145, c_birth_year#149] + +(110) ReusedExchange [Reuses operator id: 33] +Output [2]: [i_item_sk#154, i_item_id#155] + +(111) CometBroadcastHashJoin +Left output [8]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cd_dep_count#145, c_birth_year#149] +Right output [2]: [i_item_sk#154, i_item_id#155] +Arguments: [cs_item_sk#137], [i_item_sk#154], Inner, BuildRight + +(112) CometProject +Input [10]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cd_dep_count#145, c_birth_year#149, i_item_sk#154, i_item_id#155] +Arguments: [i_item_id#155, agg1#156, agg2#157, agg3#158, agg4#159, agg5#160, agg6#161, agg7#162], [i_item_id#155, cast(cs_quantity#138 as decimal(12,2)) AS agg1#156, cast(cs_list_price#139 as decimal(12,2)) AS agg2#157, cast(cs_coupon_amt#141 as decimal(12,2)) AS agg3#158, cast(cs_sales_price#140 as decimal(12,2)) AS agg4#159, cast(cs_net_profit#142 as decimal(12,2)) AS agg5#160, cast(c_birth_year#149 as decimal(12,2)) AS agg6#161, cast(cd_dep_count#145 as decimal(12,2)) AS agg7#162] + +(113) CometHashAggregate +Input [8]: [i_item_id#155, agg1#156, agg2#157, agg3#158, agg4#159, agg5#160, agg6#161, agg7#162] +Keys [1]: [i_item_id#155] +Functions [7]: [partial_avg(agg1#156), partial_avg(agg2#157), partial_avg(agg3#158), partial_avg(agg4#159), partial_avg(agg5#160), partial_avg(agg6#161), partial_avg(agg7#162)] + +(114) CometExchange +Input [15]: [i_item_id#155, sum#163, count#164, sum#165, count#166, sum#167, count#168, sum#169, count#170, sum#171, count#172, sum#173, count#174, sum#175, count#176] +Arguments: hashpartitioning(i_item_id#155, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(115) CometHashAggregate +Input [15]: [i_item_id#155, sum#163, count#164, sum#165, count#166, sum#167, count#168, sum#169, count#170, sum#171, count#172, sum#173, count#174, sum#175, count#176] +Keys [1]: [i_item_id#155] +Functions [7]: [avg(agg1#156), avg(agg2#157), avg(agg3#158), avg(agg4#159), avg(agg5#160), avg(agg6#161), avg(agg7#162)] + +(116) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [9]: [cs_bill_customer_sk#177, cs_bill_cdemo_sk#178, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185] +Arguments: [cs_bill_customer_sk#177, cs_bill_cdemo_sk#178, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185] + +(117) CometFilter +Input [9]: [cs_bill_customer_sk#177, cs_bill_cdemo_sk#178, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185] +Condition : ((isnotnull(cs_bill_cdemo_sk#178) AND isnotnull(cs_bill_customer_sk#177)) AND isnotnull(cs_item_sk#179)) + +(118) ReusedExchange [Reuses operator id: 6] +Output [2]: [cd_demo_sk#186, cd_dep_count#187] + +(119) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#177, cs_bill_cdemo_sk#178, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185] +Right output [2]: [cd_demo_sk#186, cd_dep_count#187] +Arguments: [cs_bill_cdemo_sk#178], [cd_demo_sk#186], Inner, BuildRight + +(120) CometProject +Input [11]: [cs_bill_customer_sk#177, cs_bill_cdemo_sk#178, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_demo_sk#186, cd_dep_count#187] +Arguments: [cs_bill_customer_sk#177, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187], [cs_bill_customer_sk#177, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187] + +(121) ReusedExchange [Reuses operator id: 12] +Output [4]: [c_customer_sk#188, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191] + +(122) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#177, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187] +Right output [4]: [c_customer_sk#188, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191] +Arguments: [cs_bill_customer_sk#177], [c_customer_sk#188], Inner, BuildRight + +(123) CometProject +Input [13]: [cs_bill_customer_sk#177, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_customer_sk#188, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191] +Arguments: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191], [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191] + +(124) ReusedExchange [Reuses operator id: 17] +Output [1]: [cd_demo_sk#192] + +(125) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191] +Right output [1]: [cd_demo_sk#192] +Arguments: [c_current_cdemo_sk#189], [cd_demo_sk#192], Inner, BuildRight + +(126) CometProject +Input [12]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191, cd_demo_sk#192] +Arguments: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_addr_sk#190, c_birth_year#191], [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_addr_sk#190, c_birth_year#191] + +(127) ReusedExchange [Reuses operator id: 104] +Output [1]: [ca_address_sk#193] + +(128) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_addr_sk#190, c_birth_year#191] +Right output [1]: [ca_address_sk#193] +Arguments: [c_current_addr_sk#190], [ca_address_sk#193], Inner, BuildRight + +(129) CometProject +Input [11]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_addr_sk#190, c_birth_year#191, ca_address_sk#193] +Arguments: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_birth_year#191], [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_birth_year#191] + +(130) ReusedExchange [Reuses operator id: 28] +Output [1]: [d_date_sk#194] + +(131) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_birth_year#191] +Right output [1]: [d_date_sk#194] +Arguments: [cs_sold_date_sk#185], [d_date_sk#194], Inner, BuildRight + +(132) CometProject +Input [10]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_birth_year#191, d_date_sk#194] +Arguments: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cd_dep_count#187, c_birth_year#191], [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cd_dep_count#187, c_birth_year#191] + +(133) ReusedExchange [Reuses operator id: 17] +Output [1]: [i_item_sk#195] + +(134) CometBroadcastHashJoin +Left output [8]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cd_dep_count#187, c_birth_year#191] +Right output [1]: [i_item_sk#195] +Arguments: [cs_item_sk#179], [i_item_sk#195], Inner, BuildRight + +(135) CometProject +Input [9]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cd_dep_count#187, c_birth_year#191, i_item_sk#195] +Arguments: [agg1#196, agg2#197, agg3#198, agg4#199, agg5#200, agg6#201, agg7#202], [cast(cs_quantity#180 as decimal(12,2)) AS agg1#196, cast(cs_list_price#181 as decimal(12,2)) AS agg2#197, cast(cs_coupon_amt#183 as decimal(12,2)) AS agg3#198, cast(cs_sales_price#182 as decimal(12,2)) AS agg4#199, cast(cs_net_profit#184 as decimal(12,2)) AS agg5#200, cast(c_birth_year#191 as decimal(12,2)) AS agg6#201, cast(cd_dep_count#187 as decimal(12,2)) AS agg7#202] + +(136) CometHashAggregate +Input [7]: [agg1#196, agg2#197, agg3#198, agg4#199, agg5#200, agg6#201, agg7#202] +Keys: [] +Functions [7]: [partial_avg(agg1#196), partial_avg(agg2#197), partial_avg(agg3#198), partial_avg(agg4#199), partial_avg(agg5#200), partial_avg(agg6#201), partial_avg(agg7#202)] + +(137) CometExchange +Input [14]: [sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214, sum#215, count#216] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(138) CometHashAggregate +Input [14]: [sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214, sum#215, count#216] +Keys: [] +Functions [7]: [avg(agg1#196), avg(agg2#197), avg(agg3#198), avg(agg4#199), avg(agg5#200), avg(agg6#201), avg(agg7#202)] + +(139) CometUnion +Child 0 Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#217, agg2#218, agg3#219, agg4#220, agg5#221, agg6#222, agg7#223] +Child 1 Input [11]: [i_item_id#70, ca_country#67, ca_state#66, county#224, agg1#225, agg2#226, agg3#227, agg4#228, agg5#229, agg6#230, agg7#231] +Child 2 Input [11]: [i_item_id#113, ca_country#110, ca_state#232, county#233, agg1#234, agg2#235, agg3#236, agg4#237, agg5#238, agg6#239, agg7#240] +Child 3 Input [11]: [i_item_id#155, ca_country#241, ca_state#242, county#243, agg1#244, agg2#245, agg3#246, agg4#247, agg5#248, agg6#249, agg7#250] +Child 4 Input [11]: [i_item_id#251, ca_country#252, ca_state#253, county#254, agg1#255, agg2#256, agg3#257, agg4#258, agg5#259, agg6#260, agg7#261] + +(140) CometTakeOrderedAndProject +Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#217, agg2#218, agg3#219, agg4#220, agg5#221, agg6#222, agg7#223] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ca_country#23 ASC NULLS FIRST,ca_state#22 ASC NULLS FIRST,ca_county#21 ASC NULLS FIRST,i_item_id#27 ASC NULLS FIRST], output=[i_item_id#27,ca_country#23,ca_state#22,ca_county#21,agg1#217,agg2#218,agg3#219,agg4#220,agg5#221,agg6#222,agg7#223]), [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#217, agg2#218, agg3#219, agg4#220, agg5#221, agg6#222, agg7#223], 100, [ca_country#23 ASC NULLS FIRST, ca_state#22 ASC NULLS FIRST, ca_county#21 ASC NULLS FIRST, i_item_id#27 ASC NULLS FIRST], [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#217, agg2#218, agg3#219, agg4#220, agg5#221, agg6#222, agg7#223] + +(141) ColumnarToRow [codegen id : 1] +Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#217, agg2#218, agg3#219, agg4#220, agg5#221, agg6#222, agg7#223] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..e4f78115a8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_datafusion/simplified.txt @@ -0,0 +1,143 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometUnion [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometHashAggregate [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7)] + CometExchange [i_item_id,ca_country,ca_state,ca_county] #1 + CometHashAggregate [i_item_id,ca_country,ca_state,ca_county,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometProject [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk,ca_county,ca_state,ca_country] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometBroadcastExchange [cd_demo_sk,cd_dep_count] #2 + CometProject [cd_demo_sk,cd_dep_count] + CometFilter [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + CometProject [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + CometBroadcastExchange [cd_demo_sk] #4 + CometFilter [cd_demo_sk] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk] + CometBroadcastExchange [ca_address_sk,ca_county,ca_state,ca_country] #5 + CometFilter [ca_address_sk,ca_county,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_county,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #6 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #7 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometHashAggregate [i_item_id,ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7)] + CometExchange [i_item_id,ca_country,ca_state] #8 + CometHashAggregate [i_item_id,ca_country,ca_state,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometProject [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [i_item_id,ca_country,ca_state,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_state,ca_country,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_state,ca_country,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk,ca_state,ca_country] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + ReusedExchange [cd_demo_sk] #4 + CometBroadcastExchange [ca_address_sk,ca_state,ca_country] #9 + CometFilter [ca_address_sk,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_country] + ReusedExchange [d_date_sk] #6 + ReusedExchange [i_item_sk,i_item_id] #7 + CometHashAggregate [i_item_id,ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7)] + CometExchange [i_item_id,ca_country] #10 + CometHashAggregate [i_item_id,ca_country,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometProject [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [i_item_id,ca_country,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_country,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_country,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk,ca_country] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + ReusedExchange [cd_demo_sk] #4 + CometBroadcastExchange [ca_address_sk,ca_country] #11 + CometProject [ca_address_sk,ca_country] + CometFilter [ca_address_sk,ca_state,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_country] + ReusedExchange [d_date_sk] #6 + ReusedExchange [i_item_sk,i_item_id] #7 + CometHashAggregate [i_item_id,ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7)] + CometExchange [i_item_id] #12 + CometHashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometProject [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + ReusedExchange [cd_demo_sk] #4 + CometBroadcastExchange [ca_address_sk] #13 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + ReusedExchange [d_date_sk] #6 + ReusedExchange [i_item_sk,i_item_id] #7 + CometHashAggregate [i_item_id,ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7)] + CometExchange #14 + CometHashAggregate [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometProject [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + ReusedExchange [cd_demo_sk] #4 + ReusedExchange [ca_address_sk] #13 + ReusedExchange [d_date_sk] #6 + ReusedExchange [i_item_sk] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..eb97a02b1d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_iceberg_compat/explain.txt @@ -0,0 +1,790 @@ +== Physical Plan == +* ColumnarToRow (143) ++- CometTakeOrderedAndProject (142) + +- CometUnion (141) + :- CometHashAggregate (38) + : +- CometExchange (37) + : +- CometHashAggregate (36) + : +- CometProject (35) + : +- CometBroadcastHashJoin (34) + : :- CometProject (30) + : : +- CometBroadcastHashJoin (29) + : : :- CometProject (24) + : : : +- CometBroadcastHashJoin (23) + : : : :- CometProject (19) + : : : : +- CometBroadcastHashJoin (18) + : : : : :- CometProject (14) + : : : : : +- CometBroadcastHashJoin (13) + : : : : : :- CometProject (8) + : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : :- CometFilter (2) + : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : : : : +- CometBroadcastExchange (6) + : : : : : : +- CometProject (5) + : : : : : : +- CometFilter (4) + : : : : : : +- CometScan parquet spark_catalog.default.customer_demographics (3) + : : : : : +- CometBroadcastExchange (12) + : : : : : +- CometProject (11) + : : : : : +- CometFilter (10) + : : : : : +- CometScan parquet spark_catalog.default.customer (9) + : : : : +- CometBroadcastExchange (17) + : : : : +- CometFilter (16) + : : : : +- CometScan parquet spark_catalog.default.customer_demographics (15) + : : : +- CometBroadcastExchange (22) + : : : +- CometFilter (21) + : : : +- CometScan parquet spark_catalog.default.customer_address (20) + : : +- CometBroadcastExchange (28) + : : +- CometProject (27) + : : +- CometFilter (26) + : : +- CometScan parquet spark_catalog.default.date_dim (25) + : +- CometBroadcastExchange (33) + : +- CometFilter (32) + : +- CometScan parquet spark_catalog.default.item (31) + :- CometHashAggregate (63) + : +- CometExchange (62) + : +- CometHashAggregate (61) + : +- CometProject (60) + : +- CometBroadcastHashJoin (59) + : :- CometProject (57) + : : +- CometBroadcastHashJoin (56) + : : :- CometProject (54) + : : : +- CometBroadcastHashJoin (53) + : : : :- CometProject (49) + : : : : +- CometBroadcastHashJoin (48) + : : : : :- CometProject (46) + : : : : : +- CometBroadcastHashJoin (45) + : : : : : :- CometProject (43) + : : : : : : +- CometBroadcastHashJoin (42) + : : : : : : :- CometFilter (40) + : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (39) + : : : : : : +- ReusedExchange (41) + : : : : : +- ReusedExchange (44) + : : : : +- ReusedExchange (47) + : : : +- CometBroadcastExchange (52) + : : : +- CometFilter (51) + : : : +- CometScan parquet spark_catalog.default.customer_address (50) + : : +- ReusedExchange (55) + : +- ReusedExchange (58) + :- CometHashAggregate (89) + : +- CometExchange (88) + : +- CometHashAggregate (87) + : +- CometProject (86) + : +- CometBroadcastHashJoin (85) + : :- CometProject (83) + : : +- CometBroadcastHashJoin (82) + : : :- CometProject (80) + : : : +- CometBroadcastHashJoin (79) + : : : :- CometProject (74) + : : : : +- CometBroadcastHashJoin (73) + : : : : :- CometProject (71) + : : : : : +- CometBroadcastHashJoin (70) + : : : : : :- CometProject (68) + : : : : : : +- CometBroadcastHashJoin (67) + : : : : : : :- CometFilter (65) + : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (64) + : : : : : : +- ReusedExchange (66) + : : : : : +- ReusedExchange (69) + : : : : +- ReusedExchange (72) + : : : +- CometBroadcastExchange (78) + : : : +- CometProject (77) + : : : +- CometFilter (76) + : : : +- CometScan parquet spark_catalog.default.customer_address (75) + : : +- ReusedExchange (81) + : +- ReusedExchange (84) + :- CometHashAggregate (115) + : +- CometExchange (114) + : +- CometHashAggregate (113) + : +- CometProject (112) + : +- CometBroadcastHashJoin (111) + : :- CometProject (109) + : : +- CometBroadcastHashJoin (108) + : : :- CometProject (106) + : : : +- CometBroadcastHashJoin (105) + : : : :- CometProject (100) + : : : : +- CometBroadcastHashJoin (99) + : : : : :- CometProject (97) + : : : : : +- CometBroadcastHashJoin (96) + : : : : : :- CometProject (94) + : : : : : : +- CometBroadcastHashJoin (93) + : : : : : : :- CometFilter (91) + : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (90) + : : : : : : +- ReusedExchange (92) + : : : : : +- ReusedExchange (95) + : : : : +- ReusedExchange (98) + : : : +- CometBroadcastExchange (104) + : : : +- CometProject (103) + : : : +- CometFilter (102) + : : : +- CometScan parquet spark_catalog.default.customer_address (101) + : : +- ReusedExchange (107) + : +- ReusedExchange (110) + +- CometHashAggregate (140) + +- CometExchange (139) + +- CometHashAggregate (138) + +- CometProject (137) + +- CometBroadcastHashJoin (136) + :- CometProject (132) + : +- CometBroadcastHashJoin (131) + : :- CometProject (129) + : : +- CometBroadcastHashJoin (128) + : : :- CometProject (126) + : : : +- CometBroadcastHashJoin (125) + : : : :- CometProject (123) + : : : : +- CometBroadcastHashJoin (122) + : : : : :- CometProject (120) + : : : : : +- CometBroadcastHashJoin (119) + : : : : : :- CometFilter (117) + : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (116) + : : : : : +- ReusedExchange (118) + : : : : +- ReusedExchange (121) + : : : +- ReusedExchange (124) + : : +- ReusedExchange (127) + : +- ReusedExchange (130) + +- CometBroadcastExchange (135) + +- CometFilter (134) + +- CometScan parquet spark_catalog.default.item (133) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(3) CometScan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = M)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#10)) + +(5) CometProject +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_dep_count#13], [cd_demo_sk#10, cd_dep_count#13] + +(6) CometBroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: [cd_demo_sk#10, cd_dep_count#13] + +(7) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Right output [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: [cs_bill_cdemo_sk#2], [cd_demo_sk#10], Inner, BuildRight + +(8) CometProject +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] +Arguments: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13], [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] + +(9) CometScan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [In(c_birth_month, [1,10,12,4,5,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(10) CometFilter +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Condition : (((c_birth_month#17 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#14)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_addr_sk#16)) + +(11) CometProject +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18], [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(12) CometBroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(13) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Right output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [cs_bill_customer_sk#1], [c_customer_sk#14], Inner, BuildRight + +(14) CometProject +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(15) CometScan parquet spark_catalog.default.customer_demographics +Output [1]: [cd_demo_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(16) CometFilter +Input [1]: [cd_demo_sk#19] +Condition : isnotnull(cd_demo_sk#19) + +(17) CometBroadcastExchange +Input [1]: [cd_demo_sk#19] +Arguments: [cd_demo_sk#19] + +(18) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Right output [1]: [cd_demo_sk#19] +Arguments: [c_current_cdemo_sk#15], [cd_demo_sk#19], Inner, BuildRight + +(19) CometProject +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] + +(20) CometScan parquet spark_catalog.default.customer_address +Output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(21) CometFilter +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#20)) + +(22) CometBroadcastExchange +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(23) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Right output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [c_current_addr_sk#16], [ca_address_sk#20], Inner, BuildRight + +(24) CometProject +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] + +(25) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#24, d_year#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(26) CometFilter +Input [2]: [d_date_sk#24, d_year#25] +Condition : ((isnotnull(d_year#25) AND (d_year#25 = 2001)) AND isnotnull(d_date_sk#24)) + +(27) CometProject +Input [2]: [d_date_sk#24, d_year#25] +Arguments: [d_date_sk#24], [d_date_sk#24] + +(28) CometBroadcastExchange +Input [1]: [d_date_sk#24] +Arguments: [d_date_sk#24] + +(29) CometBroadcastHashJoin +Left output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Right output [1]: [d_date_sk#24] +Arguments: [cs_sold_date_sk#9], [d_date_sk#24], Inner, BuildRight + +(30) CometProject +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, d_date_sk#24] +Arguments: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23], [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] + +(31) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#26, i_item_id#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(32) CometFilter +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) + +(33) CometBroadcastExchange +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: [i_item_sk#26, i_item_id#27] + +(34) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Right output [2]: [i_item_sk#26, i_item_id#27] +Arguments: [cs_item_sk#3], [i_item_sk#26], Inner, BuildRight + +(35) CometProject +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] +Arguments: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34], [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] + +(36) CometHashAggregate +Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [4]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] + +(37) CometExchange +Input [18]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Arguments: hashpartitioning(i_item_id#27, ca_country#23, ca_state#22, ca_county#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(38) CometHashAggregate +Input [18]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Keys [4]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] + +(39) CometScan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#49, cs_bill_cdemo_sk#50, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#57)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(40) CometFilter +Input [9]: [cs_bill_customer_sk#49, cs_bill_cdemo_sk#50, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57] +Condition : ((isnotnull(cs_bill_cdemo_sk#50) AND isnotnull(cs_bill_customer_sk#49)) AND isnotnull(cs_item_sk#51)) + +(41) ReusedExchange [Reuses operator id: 6] +Output [2]: [cd_demo_sk#58, cd_dep_count#59] + +(42) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#49, cs_bill_cdemo_sk#50, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57] +Right output [2]: [cd_demo_sk#58, cd_dep_count#59] +Arguments: [cs_bill_cdemo_sk#50], [cd_demo_sk#58], Inner, BuildRight + +(43) CometProject +Input [11]: [cs_bill_customer_sk#49, cs_bill_cdemo_sk#50, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_demo_sk#58, cd_dep_count#59] +Arguments: [cs_bill_customer_sk#49, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59], [cs_bill_customer_sk#49, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59] + +(44) ReusedExchange [Reuses operator id: 12] +Output [4]: [c_customer_sk#60, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63] + +(45) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#49, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59] +Right output [4]: [c_customer_sk#60, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63] +Arguments: [cs_bill_customer_sk#49], [c_customer_sk#60], Inner, BuildRight + +(46) CometProject +Input [13]: [cs_bill_customer_sk#49, cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_customer_sk#60, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63] +Arguments: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63], [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63] + +(47) ReusedExchange [Reuses operator id: 17] +Output [1]: [cd_demo_sk#64] + +(48) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63] +Right output [1]: [cd_demo_sk#64] +Arguments: [c_current_cdemo_sk#61], [cd_demo_sk#64], Inner, BuildRight + +(49) CometProject +Input [12]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_cdemo_sk#61, c_current_addr_sk#62, c_birth_year#63, cd_demo_sk#64] +Arguments: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_addr_sk#62, c_birth_year#63], [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_addr_sk#62, c_birth_year#63] + +(50) CometScan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#65, ca_state#66, ca_country#67] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(51) CometFilter +Input [3]: [ca_address_sk#65, ca_state#66, ca_country#67] +Condition : (ca_state#66 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#65)) + +(52) CometBroadcastExchange +Input [3]: [ca_address_sk#65, ca_state#66, ca_country#67] +Arguments: [ca_address_sk#65, ca_state#66, ca_country#67] + +(53) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_addr_sk#62, c_birth_year#63] +Right output [3]: [ca_address_sk#65, ca_state#66, ca_country#67] +Arguments: [c_current_addr_sk#62], [ca_address_sk#65], Inner, BuildRight + +(54) CometProject +Input [13]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_current_addr_sk#62, c_birth_year#63, ca_address_sk#65, ca_state#66, ca_country#67] +Arguments: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67], [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67] + +(55) ReusedExchange [Reuses operator id: 28] +Output [1]: [d_date_sk#68] + +(56) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67] +Right output [1]: [d_date_sk#68] +Arguments: [cs_sold_date_sk#57], [d_date_sk#68], Inner, BuildRight + +(57) CometProject +Input [12]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cs_sold_date_sk#57, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67, d_date_sk#68] +Arguments: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67], [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67] + +(58) ReusedExchange [Reuses operator id: 33] +Output [2]: [i_item_sk#69, i_item_id#70] + +(59) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67] +Right output [2]: [i_item_sk#69, i_item_id#70] +Arguments: [cs_item_sk#51], [i_item_sk#69], Inner, BuildRight + +(60) CometProject +Input [12]: [cs_item_sk#51, cs_quantity#52, cs_list_price#53, cs_sales_price#54, cs_coupon_amt#55, cs_net_profit#56, cd_dep_count#59, c_birth_year#63, ca_state#66, ca_country#67, i_item_sk#69, i_item_id#70] +Arguments: [i_item_id#70, ca_country#67, ca_state#66, agg1#71, agg2#72, agg3#73, agg4#74, agg5#75, agg6#76, agg7#77], [i_item_id#70, ca_country#67, ca_state#66, cast(cs_quantity#52 as decimal(12,2)) AS agg1#71, cast(cs_list_price#53 as decimal(12,2)) AS agg2#72, cast(cs_coupon_amt#55 as decimal(12,2)) AS agg3#73, cast(cs_sales_price#54 as decimal(12,2)) AS agg4#74, cast(cs_net_profit#56 as decimal(12,2)) AS agg5#75, cast(c_birth_year#63 as decimal(12,2)) AS agg6#76, cast(cd_dep_count#59 as decimal(12,2)) AS agg7#77] + +(61) CometHashAggregate +Input [10]: [i_item_id#70, ca_country#67, ca_state#66, agg1#71, agg2#72, agg3#73, agg4#74, agg5#75, agg6#76, agg7#77] +Keys [3]: [i_item_id#70, ca_country#67, ca_state#66] +Functions [7]: [partial_avg(agg1#71), partial_avg(agg2#72), partial_avg(agg3#73), partial_avg(agg4#74), partial_avg(agg5#75), partial_avg(agg6#76), partial_avg(agg7#77)] + +(62) CometExchange +Input [17]: [i_item_id#70, ca_country#67, ca_state#66, sum#78, count#79, sum#80, count#81, sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89, sum#90, count#91] +Arguments: hashpartitioning(i_item_id#70, ca_country#67, ca_state#66, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(63) CometHashAggregate +Input [17]: [i_item_id#70, ca_country#67, ca_state#66, sum#78, count#79, sum#80, count#81, sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89, sum#90, count#91] +Keys [3]: [i_item_id#70, ca_country#67, ca_state#66] +Functions [7]: [avg(agg1#71), avg(agg2#72), avg(agg3#73), avg(agg4#74), avg(agg5#75), avg(agg6#76), avg(agg7#77)] + +(64) CometScan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#92, cs_bill_cdemo_sk#93, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#100)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(65) CometFilter +Input [9]: [cs_bill_customer_sk#92, cs_bill_cdemo_sk#93, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100] +Condition : ((isnotnull(cs_bill_cdemo_sk#93) AND isnotnull(cs_bill_customer_sk#92)) AND isnotnull(cs_item_sk#94)) + +(66) ReusedExchange [Reuses operator id: 6] +Output [2]: [cd_demo_sk#101, cd_dep_count#102] + +(67) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#92, cs_bill_cdemo_sk#93, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100] +Right output [2]: [cd_demo_sk#101, cd_dep_count#102] +Arguments: [cs_bill_cdemo_sk#93], [cd_demo_sk#101], Inner, BuildRight + +(68) CometProject +Input [11]: [cs_bill_customer_sk#92, cs_bill_cdemo_sk#93, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_demo_sk#101, cd_dep_count#102] +Arguments: [cs_bill_customer_sk#92, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102], [cs_bill_customer_sk#92, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102] + +(69) ReusedExchange [Reuses operator id: 12] +Output [4]: [c_customer_sk#103, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106] + +(70) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#92, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102] +Right output [4]: [c_customer_sk#103, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106] +Arguments: [cs_bill_customer_sk#92], [c_customer_sk#103], Inner, BuildRight + +(71) CometProject +Input [13]: [cs_bill_customer_sk#92, cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_customer_sk#103, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106] +Arguments: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106], [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106] + +(72) ReusedExchange [Reuses operator id: 17] +Output [1]: [cd_demo_sk#107] + +(73) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106] +Right output [1]: [cd_demo_sk#107] +Arguments: [c_current_cdemo_sk#104], [cd_demo_sk#107], Inner, BuildRight + +(74) CometProject +Input [12]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_cdemo_sk#104, c_current_addr_sk#105, c_birth_year#106, cd_demo_sk#107] +Arguments: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_addr_sk#105, c_birth_year#106], [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_addr_sk#105, c_birth_year#106] + +(75) CometScan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#108, ca_state#109, ca_country#110] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(76) CometFilter +Input [3]: [ca_address_sk#108, ca_state#109, ca_country#110] +Condition : (ca_state#109 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#108)) + +(77) CometProject +Input [3]: [ca_address_sk#108, ca_state#109, ca_country#110] +Arguments: [ca_address_sk#108, ca_country#110], [ca_address_sk#108, ca_country#110] + +(78) CometBroadcastExchange +Input [2]: [ca_address_sk#108, ca_country#110] +Arguments: [ca_address_sk#108, ca_country#110] + +(79) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_addr_sk#105, c_birth_year#106] +Right output [2]: [ca_address_sk#108, ca_country#110] +Arguments: [c_current_addr_sk#105], [ca_address_sk#108], Inner, BuildRight + +(80) CometProject +Input [12]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_current_addr_sk#105, c_birth_year#106, ca_address_sk#108, ca_country#110] +Arguments: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_birth_year#106, ca_country#110], [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_birth_year#106, ca_country#110] + +(81) ReusedExchange [Reuses operator id: 28] +Output [1]: [d_date_sk#111] + +(82) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_birth_year#106, ca_country#110] +Right output [1]: [d_date_sk#111] +Arguments: [cs_sold_date_sk#100], [d_date_sk#111], Inner, BuildRight + +(83) CometProject +Input [11]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cs_sold_date_sk#100, cd_dep_count#102, c_birth_year#106, ca_country#110, d_date_sk#111] +Arguments: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cd_dep_count#102, c_birth_year#106, ca_country#110], [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cd_dep_count#102, c_birth_year#106, ca_country#110] + +(84) ReusedExchange [Reuses operator id: 33] +Output [2]: [i_item_sk#112, i_item_id#113] + +(85) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cd_dep_count#102, c_birth_year#106, ca_country#110] +Right output [2]: [i_item_sk#112, i_item_id#113] +Arguments: [cs_item_sk#94], [i_item_sk#112], Inner, BuildRight + +(86) CometProject +Input [11]: [cs_item_sk#94, cs_quantity#95, cs_list_price#96, cs_sales_price#97, cs_coupon_amt#98, cs_net_profit#99, cd_dep_count#102, c_birth_year#106, ca_country#110, i_item_sk#112, i_item_id#113] +Arguments: [i_item_id#113, ca_country#110, agg1#114, agg2#115, agg3#116, agg4#117, agg5#118, agg6#119, agg7#120], [i_item_id#113, ca_country#110, cast(cs_quantity#95 as decimal(12,2)) AS agg1#114, cast(cs_list_price#96 as decimal(12,2)) AS agg2#115, cast(cs_coupon_amt#98 as decimal(12,2)) AS agg3#116, cast(cs_sales_price#97 as decimal(12,2)) AS agg4#117, cast(cs_net_profit#99 as decimal(12,2)) AS agg5#118, cast(c_birth_year#106 as decimal(12,2)) AS agg6#119, cast(cd_dep_count#102 as decimal(12,2)) AS agg7#120] + +(87) CometHashAggregate +Input [9]: [i_item_id#113, ca_country#110, agg1#114, agg2#115, agg3#116, agg4#117, agg5#118, agg6#119, agg7#120] +Keys [2]: [i_item_id#113, ca_country#110] +Functions [7]: [partial_avg(agg1#114), partial_avg(agg2#115), partial_avg(agg3#116), partial_avg(agg4#117), partial_avg(agg5#118), partial_avg(agg6#119), partial_avg(agg7#120)] + +(88) CometExchange +Input [16]: [i_item_id#113, ca_country#110, sum#121, count#122, sum#123, count#124, sum#125, count#126, sum#127, count#128, sum#129, count#130, sum#131, count#132, sum#133, count#134] +Arguments: hashpartitioning(i_item_id#113, ca_country#110, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(89) CometHashAggregate +Input [16]: [i_item_id#113, ca_country#110, sum#121, count#122, sum#123, count#124, sum#125, count#126, sum#127, count#128, sum#129, count#130, sum#131, count#132, sum#133, count#134] +Keys [2]: [i_item_id#113, ca_country#110] +Functions [7]: [avg(agg1#114), avg(agg2#115), avg(agg3#116), avg(agg4#117), avg(agg5#118), avg(agg6#119), avg(agg7#120)] + +(90) CometScan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#135, cs_bill_cdemo_sk#136, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#143)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(91) CometFilter +Input [9]: [cs_bill_customer_sk#135, cs_bill_cdemo_sk#136, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143] +Condition : ((isnotnull(cs_bill_cdemo_sk#136) AND isnotnull(cs_bill_customer_sk#135)) AND isnotnull(cs_item_sk#137)) + +(92) ReusedExchange [Reuses operator id: 6] +Output [2]: [cd_demo_sk#144, cd_dep_count#145] + +(93) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#135, cs_bill_cdemo_sk#136, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143] +Right output [2]: [cd_demo_sk#144, cd_dep_count#145] +Arguments: [cs_bill_cdemo_sk#136], [cd_demo_sk#144], Inner, BuildRight + +(94) CometProject +Input [11]: [cs_bill_customer_sk#135, cs_bill_cdemo_sk#136, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_demo_sk#144, cd_dep_count#145] +Arguments: [cs_bill_customer_sk#135, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145], [cs_bill_customer_sk#135, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145] + +(95) ReusedExchange [Reuses operator id: 12] +Output [4]: [c_customer_sk#146, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149] + +(96) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#135, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145] +Right output [4]: [c_customer_sk#146, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149] +Arguments: [cs_bill_customer_sk#135], [c_customer_sk#146], Inner, BuildRight + +(97) CometProject +Input [13]: [cs_bill_customer_sk#135, cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_customer_sk#146, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149] +Arguments: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149], [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149] + +(98) ReusedExchange [Reuses operator id: 17] +Output [1]: [cd_demo_sk#150] + +(99) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149] +Right output [1]: [cd_demo_sk#150] +Arguments: [c_current_cdemo_sk#147], [cd_demo_sk#150], Inner, BuildRight + +(100) CometProject +Input [12]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_cdemo_sk#147, c_current_addr_sk#148, c_birth_year#149, cd_demo_sk#150] +Arguments: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_addr_sk#148, c_birth_year#149], [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_addr_sk#148, c_birth_year#149] + +(101) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#151, ca_state#152] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(102) CometFilter +Input [2]: [ca_address_sk#151, ca_state#152] +Condition : (ca_state#152 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#151)) + +(103) CometProject +Input [2]: [ca_address_sk#151, ca_state#152] +Arguments: [ca_address_sk#151], [ca_address_sk#151] + +(104) CometBroadcastExchange +Input [1]: [ca_address_sk#151] +Arguments: [ca_address_sk#151] + +(105) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_addr_sk#148, c_birth_year#149] +Right output [1]: [ca_address_sk#151] +Arguments: [c_current_addr_sk#148], [ca_address_sk#151], Inner, BuildRight + +(106) CometProject +Input [11]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_current_addr_sk#148, c_birth_year#149, ca_address_sk#151] +Arguments: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_birth_year#149], [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_birth_year#149] + +(107) ReusedExchange [Reuses operator id: 28] +Output [1]: [d_date_sk#153] + +(108) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_birth_year#149] +Right output [1]: [d_date_sk#153] +Arguments: [cs_sold_date_sk#143], [d_date_sk#153], Inner, BuildRight + +(109) CometProject +Input [10]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cs_sold_date_sk#143, cd_dep_count#145, c_birth_year#149, d_date_sk#153] +Arguments: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cd_dep_count#145, c_birth_year#149], [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cd_dep_count#145, c_birth_year#149] + +(110) ReusedExchange [Reuses operator id: 33] +Output [2]: [i_item_sk#154, i_item_id#155] + +(111) CometBroadcastHashJoin +Left output [8]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cd_dep_count#145, c_birth_year#149] +Right output [2]: [i_item_sk#154, i_item_id#155] +Arguments: [cs_item_sk#137], [i_item_sk#154], Inner, BuildRight + +(112) CometProject +Input [10]: [cs_item_sk#137, cs_quantity#138, cs_list_price#139, cs_sales_price#140, cs_coupon_amt#141, cs_net_profit#142, cd_dep_count#145, c_birth_year#149, i_item_sk#154, i_item_id#155] +Arguments: [i_item_id#155, agg1#156, agg2#157, agg3#158, agg4#159, agg5#160, agg6#161, agg7#162], [i_item_id#155, cast(cs_quantity#138 as decimal(12,2)) AS agg1#156, cast(cs_list_price#139 as decimal(12,2)) AS agg2#157, cast(cs_coupon_amt#141 as decimal(12,2)) AS agg3#158, cast(cs_sales_price#140 as decimal(12,2)) AS agg4#159, cast(cs_net_profit#142 as decimal(12,2)) AS agg5#160, cast(c_birth_year#149 as decimal(12,2)) AS agg6#161, cast(cd_dep_count#145 as decimal(12,2)) AS agg7#162] + +(113) CometHashAggregate +Input [8]: [i_item_id#155, agg1#156, agg2#157, agg3#158, agg4#159, agg5#160, agg6#161, agg7#162] +Keys [1]: [i_item_id#155] +Functions [7]: [partial_avg(agg1#156), partial_avg(agg2#157), partial_avg(agg3#158), partial_avg(agg4#159), partial_avg(agg5#160), partial_avg(agg6#161), partial_avg(agg7#162)] + +(114) CometExchange +Input [15]: [i_item_id#155, sum#163, count#164, sum#165, count#166, sum#167, count#168, sum#169, count#170, sum#171, count#172, sum#173, count#174, sum#175, count#176] +Arguments: hashpartitioning(i_item_id#155, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(115) CometHashAggregate +Input [15]: [i_item_id#155, sum#163, count#164, sum#165, count#166, sum#167, count#168, sum#169, count#170, sum#171, count#172, sum#173, count#174, sum#175, count#176] +Keys [1]: [i_item_id#155] +Functions [7]: [avg(agg1#156), avg(agg2#157), avg(agg3#158), avg(agg4#159), avg(agg5#160), avg(agg6#161), avg(agg7#162)] + +(116) CometScan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#177, cs_bill_cdemo_sk#178, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#185)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(117) CometFilter +Input [9]: [cs_bill_customer_sk#177, cs_bill_cdemo_sk#178, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185] +Condition : ((isnotnull(cs_bill_cdemo_sk#178) AND isnotnull(cs_bill_customer_sk#177)) AND isnotnull(cs_item_sk#179)) + +(118) ReusedExchange [Reuses operator id: 6] +Output [2]: [cd_demo_sk#186, cd_dep_count#187] + +(119) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#177, cs_bill_cdemo_sk#178, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185] +Right output [2]: [cd_demo_sk#186, cd_dep_count#187] +Arguments: [cs_bill_cdemo_sk#178], [cd_demo_sk#186], Inner, BuildRight + +(120) CometProject +Input [11]: [cs_bill_customer_sk#177, cs_bill_cdemo_sk#178, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_demo_sk#186, cd_dep_count#187] +Arguments: [cs_bill_customer_sk#177, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187], [cs_bill_customer_sk#177, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187] + +(121) ReusedExchange [Reuses operator id: 12] +Output [4]: [c_customer_sk#188, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191] + +(122) CometBroadcastHashJoin +Left output [9]: [cs_bill_customer_sk#177, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187] +Right output [4]: [c_customer_sk#188, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191] +Arguments: [cs_bill_customer_sk#177], [c_customer_sk#188], Inner, BuildRight + +(123) CometProject +Input [13]: [cs_bill_customer_sk#177, cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_customer_sk#188, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191] +Arguments: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191], [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191] + +(124) ReusedExchange [Reuses operator id: 17] +Output [1]: [cd_demo_sk#192] + +(125) CometBroadcastHashJoin +Left output [11]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191] +Right output [1]: [cd_demo_sk#192] +Arguments: [c_current_cdemo_sk#189], [cd_demo_sk#192], Inner, BuildRight + +(126) CometProject +Input [12]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_cdemo_sk#189, c_current_addr_sk#190, c_birth_year#191, cd_demo_sk#192] +Arguments: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_addr_sk#190, c_birth_year#191], [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_addr_sk#190, c_birth_year#191] + +(127) ReusedExchange [Reuses operator id: 104] +Output [1]: [ca_address_sk#193] + +(128) CometBroadcastHashJoin +Left output [10]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_addr_sk#190, c_birth_year#191] +Right output [1]: [ca_address_sk#193] +Arguments: [c_current_addr_sk#190], [ca_address_sk#193], Inner, BuildRight + +(129) CometProject +Input [11]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_current_addr_sk#190, c_birth_year#191, ca_address_sk#193] +Arguments: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_birth_year#191], [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_birth_year#191] + +(130) ReusedExchange [Reuses operator id: 28] +Output [1]: [d_date_sk#194] + +(131) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_birth_year#191] +Right output [1]: [d_date_sk#194] +Arguments: [cs_sold_date_sk#185], [d_date_sk#194], Inner, BuildRight + +(132) CometProject +Input [10]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cs_sold_date_sk#185, cd_dep_count#187, c_birth_year#191, d_date_sk#194] +Arguments: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cd_dep_count#187, c_birth_year#191], [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cd_dep_count#187, c_birth_year#191] + +(133) CometScan parquet spark_catalog.default.item +Output [1]: [i_item_sk#195] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(134) CometFilter +Input [1]: [i_item_sk#195] +Condition : isnotnull(i_item_sk#195) + +(135) CometBroadcastExchange +Input [1]: [i_item_sk#195] +Arguments: [i_item_sk#195] + +(136) CometBroadcastHashJoin +Left output [8]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cd_dep_count#187, c_birth_year#191] +Right output [1]: [i_item_sk#195] +Arguments: [cs_item_sk#179], [i_item_sk#195], Inner, BuildRight + +(137) CometProject +Input [9]: [cs_item_sk#179, cs_quantity#180, cs_list_price#181, cs_sales_price#182, cs_coupon_amt#183, cs_net_profit#184, cd_dep_count#187, c_birth_year#191, i_item_sk#195] +Arguments: [agg1#196, agg2#197, agg3#198, agg4#199, agg5#200, agg6#201, agg7#202], [cast(cs_quantity#180 as decimal(12,2)) AS agg1#196, cast(cs_list_price#181 as decimal(12,2)) AS agg2#197, cast(cs_coupon_amt#183 as decimal(12,2)) AS agg3#198, cast(cs_sales_price#182 as decimal(12,2)) AS agg4#199, cast(cs_net_profit#184 as decimal(12,2)) AS agg5#200, cast(c_birth_year#191 as decimal(12,2)) AS agg6#201, cast(cd_dep_count#187 as decimal(12,2)) AS agg7#202] + +(138) CometHashAggregate +Input [7]: [agg1#196, agg2#197, agg3#198, agg4#199, agg5#200, agg6#201, agg7#202] +Keys: [] +Functions [7]: [partial_avg(agg1#196), partial_avg(agg2#197), partial_avg(agg3#198), partial_avg(agg4#199), partial_avg(agg5#200), partial_avg(agg6#201), partial_avg(agg7#202)] + +(139) CometExchange +Input [14]: [sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214, sum#215, count#216] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(140) CometHashAggregate +Input [14]: [sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214, sum#215, count#216] +Keys: [] +Functions [7]: [avg(agg1#196), avg(agg2#197), avg(agg3#198), avg(agg4#199), avg(agg5#200), avg(agg6#201), avg(agg7#202)] + +(141) CometUnion +Child 0 Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#217, agg2#218, agg3#219, agg4#220, agg5#221, agg6#222, agg7#223] +Child 1 Input [11]: [i_item_id#70, ca_country#67, ca_state#66, county#224, agg1#225, agg2#226, agg3#227, agg4#228, agg5#229, agg6#230, agg7#231] +Child 2 Input [11]: [i_item_id#113, ca_country#110, ca_state#232, county#233, agg1#234, agg2#235, agg3#236, agg4#237, agg5#238, agg6#239, agg7#240] +Child 3 Input [11]: [i_item_id#155, ca_country#241, ca_state#242, county#243, agg1#244, agg2#245, agg3#246, agg4#247, agg5#248, agg6#249, agg7#250] +Child 4 Input [11]: [i_item_id#251, ca_country#252, ca_state#253, county#254, agg1#255, agg2#256, agg3#257, agg4#258, agg5#259, agg6#260, agg7#261] + +(142) CometTakeOrderedAndProject +Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#217, agg2#218, agg3#219, agg4#220, agg5#221, agg6#222, agg7#223] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ca_country#23 ASC NULLS FIRST,ca_state#22 ASC NULLS FIRST,ca_county#21 ASC NULLS FIRST,i_item_id#27 ASC NULLS FIRST], output=[i_item_id#27,ca_country#23,ca_state#22,ca_county#21,agg1#217,agg2#218,agg3#219,agg4#220,agg5#221,agg6#222,agg7#223]), [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#217, agg2#218, agg3#219, agg4#220, agg5#221, agg6#222, agg7#223], 100, [ca_country#23 ASC NULLS FIRST, ca_state#22 ASC NULLS FIRST, ca_county#21 ASC NULLS FIRST, i_item_id#27 ASC NULLS FIRST], [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#217, agg2#218, agg3#219, agg4#220, agg5#221, agg6#222, agg7#223] + +(143) ColumnarToRow [codegen id : 1] +Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#217, agg2#218, agg3#219, agg4#220, agg5#221, agg6#222, agg7#223] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a7639e0822 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q18a.native_iceberg_compat/simplified.txt @@ -0,0 +1,145 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometUnion [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometHashAggregate [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7)] + CometExchange [i_item_id,ca_country,ca_state,ca_county] #1 + CometHashAggregate [i_item_id,ca_country,ca_state,ca_county,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometProject [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk,ca_county,ca_state,ca_country] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometBroadcastExchange [cd_demo_sk,cd_dep_count] #2 + CometProject [cd_demo_sk,cd_dep_count] + CometFilter [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + CometProject [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + CometBroadcastExchange [cd_demo_sk] #4 + CometFilter [cd_demo_sk] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk] + CometBroadcastExchange [ca_address_sk,ca_county,ca_state,ca_country] #5 + CometFilter [ca_address_sk,ca_county,ca_state,ca_country] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] + CometBroadcastExchange [d_date_sk] #6 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_item_id] #7 + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] + CometHashAggregate [i_item_id,ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7)] + CometExchange [i_item_id,ca_country,ca_state] #8 + CometHashAggregate [i_item_id,ca_country,ca_state,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometProject [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [i_item_id,ca_country,ca_state,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_state,ca_country,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_state,ca_country,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_state,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk,ca_state,ca_country] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + ReusedExchange [cd_demo_sk] #4 + CometBroadcastExchange [ca_address_sk,ca_state,ca_country] #9 + CometFilter [ca_address_sk,ca_state,ca_country] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + ReusedExchange [d_date_sk] #6 + ReusedExchange [i_item_sk,i_item_id] #7 + CometHashAggregate [i_item_id,ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7)] + CometExchange [i_item_id,ca_country] #10 + CometHashAggregate [i_item_id,ca_country,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometProject [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [i_item_id,ca_country,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_country,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_country,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_country] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk,ca_country] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + ReusedExchange [cd_demo_sk] #4 + CometBroadcastExchange [ca_address_sk,ca_country] #11 + CometProject [ca_address_sk,ca_country] + CometFilter [ca_address_sk,ca_state,ca_country] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + ReusedExchange [d_date_sk] #6 + ReusedExchange [i_item_sk,i_item_id] #7 + CometHashAggregate [i_item_id,ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7)] + CometExchange [i_item_id] #12 + CometHashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometProject [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_sk,i_item_id] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + ReusedExchange [cd_demo_sk] #4 + CometBroadcastExchange [ca_address_sk] #13 + CometProject [ca_address_sk] + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + ReusedExchange [d_date_sk] #6 + ReusedExchange [i_item_sk,i_item_id] #7 + CometHashAggregate [i_item_id,ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7)] + CometExchange #14 + CometHashAggregate [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometProject [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [agg1,agg2,agg3,agg4,agg5,agg6,agg7] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year,ca_address_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year,cd_demo_sk] + CometProject [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_demo_sk,cd_dep_count] + CometFilter [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + ReusedExchange [cd_demo_sk] #4 + ReusedExchange [ca_address_sk] #13 + ReusedExchange [d_date_sk] #6 + CometBroadcastExchange [i_item_sk] #15 + CometFilter [i_item_sk] + CometScan parquet spark_catalog.default.item [i_item_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_datafusion/explain.txt new file mode 100644 index 0000000000..48efd7d6ca --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_datafusion/explain.txt @@ -0,0 +1,116 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Arguments: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] + +(2) CometFilter +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [cs_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [cs_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] + +(20) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, _we0#15] + +(22) TakeOrderedAndProject +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST], [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_datafusion/simplified.txt new file mode 100644 index 0000000000..5fc5ba1ef8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_datafusion/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (2) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0] + CometExchange [i_class] #1 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #3 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ee4b1b3a3c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_iceberg_compat/explain.txt @@ -0,0 +1,126 @@ +== Physical Plan == +TakeOrderedAndProject (22) ++- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometScan parquet spark_catalog.default.item (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometScan parquet spark_catalog.default.date_dim (8) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [cs_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [cs_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] + +(20) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, _we0#15] + +(22) TakeOrderedAndProject +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST], [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..dd437e40f4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q20.native_iceberg_compat/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (2) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0] + CometExchange [i_class] #1 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,sum,sum(UnscaledValue(cs_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,cs_ext_sales_price] + CometProject [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #3 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_datafusion/explain.txt new file mode 100644 index 0000000000..eefb6710c5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_datafusion/explain.txt @@ -0,0 +1,128 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Expand (20) + +- * Project (19) + +- * BroadcastNestedLoopJoin Inner BuildRight (18) + :- * ColumnarToRow (14) + : +- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + +- BroadcastExchange (17) + +- * ColumnarToRow (16) + +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (15) + + +(1) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] +Arguments: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] + +(2) CometFilter +Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] +Condition : isnotnull(inv_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4, d_month_seq#5] + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [inv_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3, d_date_sk#4] +Arguments: [inv_item_sk#1, inv_quantity_on_hand#2], [inv_item_sk#1, inv_quantity_on_hand#2] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(10) CometFilter +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Condition : isnotnull(i_item_sk#6) + +(11) CometBroadcastExchange +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(12) CometBroadcastHashJoin +Left output [2]: [inv_item_sk#1, inv_quantity_on_hand#2] +Right output [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: [inv_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(13) CometProject +Input [7]: [inv_item_sk#1, inv_quantity_on_hand#2, i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10], [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(14) ColumnarToRow [codegen id : 2] +Input [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(15) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output: [] + +(16) ColumnarToRow [codegen id : 1] +Input: [] + +(17) BroadcastExchange +Input: [] +Arguments: IdentityBroadcastMode, [plan_id=1] + +(18) BroadcastNestedLoopJoin [codegen id : 2] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 2] +Output [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] +Input [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(20) Expand [codegen id : 2] +Input [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] +Arguments: [[inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9, 0], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, null, 1], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, null, null, 3], [inv_quantity_on_hand#2, i_product_name#10, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] + +(21) HashAggregate [codegen id : 2] +Input [6]: [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Functions [1]: [partial_avg(inv_quantity_on_hand#2)] +Aggregate Attributes [2]: [sum#16, count#17] +Results [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] + +(22) Exchange +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Arguments: hashpartitioning(i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(23) HashAggregate [codegen id : 3] +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Functions [1]: [avg(inv_quantity_on_hand#2)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#20] +Results [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, avg(inv_quantity_on_hand#2)#20 AS qoh#21] + +(24) TakeOrderedAndProject +Input [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] +Arguments: 100, [qoh#21 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, i_class#13 ASC NULLS FIRST, i_category#14 ASC NULLS FIRST], [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_datafusion/simplified.txt new file mode 100644 index 0000000000..febd5d2e62 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_datafusion/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] + WholeStageCodegen (3) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 + WholeStageCodegen (2) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count] + Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + Project [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + BroadcastNestedLoopJoin + ColumnarToRow + InputAdapter + CometProject [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [inv_item_sk,inv_quantity_on_hand,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [inv_item_sk,inv_quantity_on_hand] + CometBroadcastHashJoin [inv_item_sk,inv_quantity_on_hand,inv_date_sk,d_date_sk] + CometFilter [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #3 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_product_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometNativeScan: `spark_catalog`.`default`.`warehouse` diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..611efcf2c3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_iceberg_compat/explain.txt @@ -0,0 +1,141 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Expand (20) + +- * Project (19) + +- * BroadcastNestedLoopJoin Inner BuildRight (18) + :- * ColumnarToRow (14) + : +- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.inventory (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.date_dim (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometScan parquet spark_catalog.default.item (9) + +- BroadcastExchange (17) + +- * ColumnarToRow (16) + +- CometScan parquet spark_catalog.default.warehouse (15) + + +(1) CometScan parquet spark_catalog.default.inventory +Output [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#3)] +PushedFilters: [IsNotNull(inv_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] +Condition : isnotnull(inv_item_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [inv_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3, d_date_sk#4] +Arguments: [inv_item_sk#1, inv_quantity_on_hand#2], [inv_item_sk#1, inv_quantity_on_hand#2] + +(9) CometScan parquet spark_catalog.default.item +Output [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Condition : isnotnull(i_item_sk#6) + +(11) CometBroadcastExchange +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(12) CometBroadcastHashJoin +Left output [2]: [inv_item_sk#1, inv_quantity_on_hand#2] +Right output [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: [inv_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(13) CometProject +Input [7]: [inv_item_sk#1, inv_quantity_on_hand#2, i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10], [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(14) ColumnarToRow [codegen id : 2] +Input [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(15) CometScan parquet spark_catalog.default.warehouse +Output: [] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +ReadSchema: struct<> + +(16) ColumnarToRow [codegen id : 1] +Input: [] + +(17) BroadcastExchange +Input: [] +Arguments: IdentityBroadcastMode, [plan_id=1] + +(18) BroadcastNestedLoopJoin [codegen id : 2] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 2] +Output [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] +Input [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(20) Expand [codegen id : 2] +Input [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] +Arguments: [[inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9, 0], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, null, 1], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, null, null, 3], [inv_quantity_on_hand#2, i_product_name#10, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] + +(21) HashAggregate [codegen id : 2] +Input [6]: [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Functions [1]: [partial_avg(inv_quantity_on_hand#2)] +Aggregate Attributes [2]: [sum#16, count#17] +Results [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] + +(22) Exchange +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Arguments: hashpartitioning(i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(23) HashAggregate [codegen id : 3] +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Functions [1]: [avg(inv_quantity_on_hand#2)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#20] +Results [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, avg(inv_quantity_on_hand#2)#20 AS qoh#21] + +(24) TakeOrderedAndProject +Input [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] +Arguments: 100, [qoh#21 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, i_class#13 ASC NULLS FIRST, i_category#14 ASC NULLS FIRST], [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..2d4d026928 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22.native_iceberg_compat/simplified.txt @@ -0,0 +1,31 @@ +TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] + WholeStageCodegen (3) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 + WholeStageCodegen (2) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count] + Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + Project [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + BroadcastNestedLoopJoin + ColumnarToRow + InputAdapter + CometProject [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [inv_item_sk,inv_quantity_on_hand,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [inv_item_sk,inv_quantity_on_hand] + CometBroadcastHashJoin [inv_item_sk,inv_quantity_on_hand,inv_date_sk,d_date_sk] + CometFilter [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #3 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometScan parquet spark_catalog.default.warehouse diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_datafusion/explain.txt new file mode 100644 index 0000000000..7eeb898493 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_datafusion/explain.txt @@ -0,0 +1,250 @@ +== Physical Plan == +* ColumnarToRow (46) ++- CometTakeOrderedAndProject (45) + +- CometUnion (44) + :- CometHashAggregate (23) + : +- CometHashAggregate (22) + : +- CometHashAggregate (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + : +- CometBroadcastExchange (16) + : +- CometFilter (15) + : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (14) + :- CometHashAggregate (28) + : +- CometExchange (27) + : +- CometHashAggregate (26) + : +- CometHashAggregate (25) + : +- ReusedExchange (24) + :- CometHashAggregate (33) + : +- CometExchange (32) + : +- CometHashAggregate (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + :- CometHashAggregate (38) + : +- CometExchange (37) + : +- CometHashAggregate (36) + : +- CometHashAggregate (35) + : +- ReusedExchange (34) + +- CometHashAggregate (43) + +- CometExchange (42) + +- CometHashAggregate (41) + +- CometHashAggregate (40) + +- ReusedExchange (39) + + +(1) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_month_seq#6] +Arguments: [d_date_sk#5, d_month_seq#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_month_seq#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [inv_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#5] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3], [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(10) CometFilter +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Condition : isnotnull(i_item_sk#7) + +(11) CometBroadcastExchange +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(12) CometBroadcastHashJoin +Left output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] +Right output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [inv_item_sk#1], [i_item_sk#7], Inner, BuildRight + +(13) CometProject +Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11], [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(14) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [1]: [w_warehouse_sk#12] +Arguments: [w_warehouse_sk#12] + +(15) CometFilter +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) + +(16) CometBroadcastExchange +Input [1]: [w_warehouse_sk#12] +Arguments: [w_warehouse_sk#12] + +(17) CometBroadcastHashJoin +Left output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Right output [1]: [w_warehouse_sk#12] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#12], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] +Arguments: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11], [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(19) CometHashAggregate +Input [5]: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [partial_avg(inv_quantity_on_hand#3)] + +(20) CometExchange +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#13, count#14] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, i_class#9, i_category#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#13, count#14] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] + +(22) CometHashAggregate +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#15] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [partial_avg(qoh#15)] + +(23) CometHashAggregate +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#16, count#17] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(qoh#15)] + +(24) ReusedExchange [Reuses operator id: 20] +Output [6]: [i_product_name#18, i_brand#19, i_class#20, i_category#21, sum#22, count#23] + +(25) CometHashAggregate +Input [6]: [i_product_name#18, i_brand#19, i_class#20, i_category#21, sum#22, count#23] +Keys [4]: [i_product_name#18, i_brand#19, i_class#20, i_category#21] +Functions [1]: [avg(inv_quantity_on_hand#24)] + +(26) CometHashAggregate +Input [4]: [i_product_name#18, i_brand#19, i_class#20, qoh#25] +Keys [3]: [i_product_name#18, i_brand#19, i_class#20] +Functions [1]: [partial_avg(qoh#25)] + +(27) CometExchange +Input [5]: [i_product_name#18, i_brand#19, i_class#20, sum#26, count#27] +Arguments: hashpartitioning(i_product_name#18, i_brand#19, i_class#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(28) CometHashAggregate +Input [5]: [i_product_name#18, i_brand#19, i_class#20, sum#26, count#27] +Keys [3]: [i_product_name#18, i_brand#19, i_class#20] +Functions [1]: [avg(qoh#25)] + +(29) ReusedExchange [Reuses operator id: 20] +Output [6]: [i_product_name#28, i_brand#29, i_class#30, i_category#31, sum#32, count#33] + +(30) CometHashAggregate +Input [6]: [i_product_name#28, i_brand#29, i_class#30, i_category#31, sum#32, count#33] +Keys [4]: [i_product_name#28, i_brand#29, i_class#30, i_category#31] +Functions [1]: [avg(inv_quantity_on_hand#34)] + +(31) CometHashAggregate +Input [3]: [i_product_name#28, i_brand#29, qoh#35] +Keys [2]: [i_product_name#28, i_brand#29] +Functions [1]: [partial_avg(qoh#35)] + +(32) CometExchange +Input [4]: [i_product_name#28, i_brand#29, sum#36, count#37] +Arguments: hashpartitioning(i_product_name#28, i_brand#29, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(33) CometHashAggregate +Input [4]: [i_product_name#28, i_brand#29, sum#36, count#37] +Keys [2]: [i_product_name#28, i_brand#29] +Functions [1]: [avg(qoh#35)] + +(34) ReusedExchange [Reuses operator id: 20] +Output [6]: [i_product_name#38, i_brand#39, i_class#40, i_category#41, sum#42, count#43] + +(35) CometHashAggregate +Input [6]: [i_product_name#38, i_brand#39, i_class#40, i_category#41, sum#42, count#43] +Keys [4]: [i_product_name#38, i_brand#39, i_class#40, i_category#41] +Functions [1]: [avg(inv_quantity_on_hand#44)] + +(36) CometHashAggregate +Input [2]: [i_product_name#38, qoh#45] +Keys [1]: [i_product_name#38] +Functions [1]: [partial_avg(qoh#45)] + +(37) CometExchange +Input [3]: [i_product_name#38, sum#46, count#47] +Arguments: hashpartitioning(i_product_name#38, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(38) CometHashAggregate +Input [3]: [i_product_name#38, sum#46, count#47] +Keys [1]: [i_product_name#38] +Functions [1]: [avg(qoh#45)] + +(39) ReusedExchange [Reuses operator id: 20] +Output [6]: [i_product_name#48, i_brand#49, i_class#50, i_category#51, sum#52, count#53] + +(40) CometHashAggregate +Input [6]: [i_product_name#48, i_brand#49, i_class#50, i_category#51, sum#52, count#53] +Keys [4]: [i_product_name#48, i_brand#49, i_class#50, i_category#51] +Functions [1]: [avg(inv_quantity_on_hand#54)] + +(41) CometHashAggregate +Input [1]: [qoh#55] +Keys: [] +Functions [1]: [partial_avg(qoh#55)] + +(42) CometExchange +Input [2]: [sum#56, count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(43) CometHashAggregate +Input [2]: [sum#56, count#57] +Keys: [] +Functions [1]: [avg(qoh#55)] + +(44) CometUnion +Child 0 Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#58] +Child 1 Input [5]: [i_product_name#18, i_brand#19, i_class#20, i_category#59, qoh#60] +Child 2 Input [5]: [i_product_name#28, i_brand#29, i_class#61, i_category#62, qoh#63] +Child 3 Input [5]: [i_product_name#38, i_brand#64, i_class#65, i_category#66, qoh#67] +Child 4 Input [5]: [i_product_name#68, i_brand#69, i_class#70, i_category#71, qoh#72] + +(45) CometTakeOrderedAndProject +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#58] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[qoh#58 ASC NULLS FIRST,i_product_name#11 ASC NULLS FIRST,i_brand#8 ASC NULLS FIRST,i_class#9 ASC NULLS FIRST,i_category#10 ASC NULLS FIRST], output=[i_product_name#11,i_brand#8,i_class#9,i_category#10,qoh#58]), [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#58], 100, [qoh#58 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#8 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#58] + +(46) ColumnarToRow [codegen id : 1] +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#58] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..43704500c6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_datafusion/simplified.txt @@ -0,0 +1,48 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_product_name,i_brand,i_class,i_category,qoh] + CometUnion [i_product_name,i_brand,i_class,i_category,qoh] + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(qoh)] + CometHashAggregate [i_product_name,i_brand,i_class,i_category,sum,count,qoh] + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(inv_quantity_on_hand)] + CometExchange [i_product_name,i_brand,i_class,i_category] #1 + CometHashAggregate [i_product_name,i_brand,i_class,i_category,sum,count,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name,w_warehouse_sk] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,d_date_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #3 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometBroadcastExchange [w_warehouse_sk] #4 + CometFilter [w_warehouse_sk] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk] + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(qoh)] + CometExchange [i_product_name,i_brand,i_class] #5 + CometHashAggregate [i_product_name,i_brand,i_class,sum,count,qoh] + CometHashAggregate [i_product_name,i_brand,i_class,qoh,i_category,sum,count,avg(inv_quantity_on_hand)] + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(qoh)] + CometExchange [i_product_name,i_brand] #6 + CometHashAggregate [i_product_name,i_brand,sum,count,qoh] + CometHashAggregate [i_product_name,i_brand,qoh,i_class,i_category,sum,count,avg(inv_quantity_on_hand)] + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(qoh)] + CometExchange [i_product_name] #7 + CometHashAggregate [i_product_name,sum,count,qoh] + CometHashAggregate [i_product_name,qoh,i_brand,i_class,i_category,sum,count,avg(inv_quantity_on_hand)] + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(qoh)] + CometExchange #8 + CometHashAggregate [sum,count,qoh] + CometHashAggregate [qoh,i_product_name,i_brand,i_class,i_category,sum,count,avg(inv_quantity_on_hand)] + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ab2d098345 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_iceberg_compat/explain.txt @@ -0,0 +1,263 @@ +== Physical Plan == +* ColumnarToRow (46) ++- CometTakeOrderedAndProject (45) + +- CometUnion (44) + :- CometHashAggregate (23) + : +- CometHashAggregate (22) + : +- CometHashAggregate (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.inventory (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.item (9) + : +- CometBroadcastExchange (16) + : +- CometFilter (15) + : +- CometScan parquet spark_catalog.default.warehouse (14) + :- CometHashAggregate (28) + : +- CometExchange (27) + : +- CometHashAggregate (26) + : +- CometHashAggregate (25) + : +- ReusedExchange (24) + :- CometHashAggregate (33) + : +- CometExchange (32) + : +- CometHashAggregate (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + :- CometHashAggregate (38) + : +- CometExchange (37) + : +- CometHashAggregate (36) + : +- CometHashAggregate (35) + : +- ReusedExchange (34) + +- CometHashAggregate (43) + +- CometExchange (42) + +- CometHashAggregate (41) + +- CometHashAggregate (40) + +- ReusedExchange (39) + + +(1) CometScan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_month_seq#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [inv_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#5] +Arguments: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3], [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] + +(9) CometScan parquet spark_catalog.default.item +Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Condition : isnotnull(i_item_sk#7) + +(11) CometBroadcastExchange +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(12) CometBroadcastHashJoin +Left output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] +Right output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [inv_item_sk#1], [i_item_sk#7], Inner, BuildRight + +(13) CometProject +Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11], [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(14) CometScan parquet spark_catalog.default.warehouse +Output [1]: [w_warehouse_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(15) CometFilter +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) + +(16) CometBroadcastExchange +Input [1]: [w_warehouse_sk#12] +Arguments: [w_warehouse_sk#12] + +(17) CometBroadcastHashJoin +Left output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Right output [1]: [w_warehouse_sk#12] +Arguments: [inv_warehouse_sk#2], [w_warehouse_sk#12], Inner, BuildRight + +(18) CometProject +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] +Arguments: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11], [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(19) CometHashAggregate +Input [5]: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [partial_avg(inv_quantity_on_hand#3)] + +(20) CometExchange +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#13, count#14] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, i_class#9, i_category#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#13, count#14] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] + +(22) CometHashAggregate +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#15] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [partial_avg(qoh#15)] + +(23) CometHashAggregate +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#16, count#17] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(qoh#15)] + +(24) ReusedExchange [Reuses operator id: 20] +Output [6]: [i_product_name#18, i_brand#19, i_class#20, i_category#21, sum#22, count#23] + +(25) CometHashAggregate +Input [6]: [i_product_name#18, i_brand#19, i_class#20, i_category#21, sum#22, count#23] +Keys [4]: [i_product_name#18, i_brand#19, i_class#20, i_category#21] +Functions [1]: [avg(inv_quantity_on_hand#24)] + +(26) CometHashAggregate +Input [4]: [i_product_name#18, i_brand#19, i_class#20, qoh#25] +Keys [3]: [i_product_name#18, i_brand#19, i_class#20] +Functions [1]: [partial_avg(qoh#25)] + +(27) CometExchange +Input [5]: [i_product_name#18, i_brand#19, i_class#20, sum#26, count#27] +Arguments: hashpartitioning(i_product_name#18, i_brand#19, i_class#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(28) CometHashAggregate +Input [5]: [i_product_name#18, i_brand#19, i_class#20, sum#26, count#27] +Keys [3]: [i_product_name#18, i_brand#19, i_class#20] +Functions [1]: [avg(qoh#25)] + +(29) ReusedExchange [Reuses operator id: 20] +Output [6]: [i_product_name#28, i_brand#29, i_class#30, i_category#31, sum#32, count#33] + +(30) CometHashAggregate +Input [6]: [i_product_name#28, i_brand#29, i_class#30, i_category#31, sum#32, count#33] +Keys [4]: [i_product_name#28, i_brand#29, i_class#30, i_category#31] +Functions [1]: [avg(inv_quantity_on_hand#34)] + +(31) CometHashAggregate +Input [3]: [i_product_name#28, i_brand#29, qoh#35] +Keys [2]: [i_product_name#28, i_brand#29] +Functions [1]: [partial_avg(qoh#35)] + +(32) CometExchange +Input [4]: [i_product_name#28, i_brand#29, sum#36, count#37] +Arguments: hashpartitioning(i_product_name#28, i_brand#29, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(33) CometHashAggregate +Input [4]: [i_product_name#28, i_brand#29, sum#36, count#37] +Keys [2]: [i_product_name#28, i_brand#29] +Functions [1]: [avg(qoh#35)] + +(34) ReusedExchange [Reuses operator id: 20] +Output [6]: [i_product_name#38, i_brand#39, i_class#40, i_category#41, sum#42, count#43] + +(35) CometHashAggregate +Input [6]: [i_product_name#38, i_brand#39, i_class#40, i_category#41, sum#42, count#43] +Keys [4]: [i_product_name#38, i_brand#39, i_class#40, i_category#41] +Functions [1]: [avg(inv_quantity_on_hand#44)] + +(36) CometHashAggregate +Input [2]: [i_product_name#38, qoh#45] +Keys [1]: [i_product_name#38] +Functions [1]: [partial_avg(qoh#45)] + +(37) CometExchange +Input [3]: [i_product_name#38, sum#46, count#47] +Arguments: hashpartitioning(i_product_name#38, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(38) CometHashAggregate +Input [3]: [i_product_name#38, sum#46, count#47] +Keys [1]: [i_product_name#38] +Functions [1]: [avg(qoh#45)] + +(39) ReusedExchange [Reuses operator id: 20] +Output [6]: [i_product_name#48, i_brand#49, i_class#50, i_category#51, sum#52, count#53] + +(40) CometHashAggregate +Input [6]: [i_product_name#48, i_brand#49, i_class#50, i_category#51, sum#52, count#53] +Keys [4]: [i_product_name#48, i_brand#49, i_class#50, i_category#51] +Functions [1]: [avg(inv_quantity_on_hand#54)] + +(41) CometHashAggregate +Input [1]: [qoh#55] +Keys: [] +Functions [1]: [partial_avg(qoh#55)] + +(42) CometExchange +Input [2]: [sum#56, count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(43) CometHashAggregate +Input [2]: [sum#56, count#57] +Keys: [] +Functions [1]: [avg(qoh#55)] + +(44) CometUnion +Child 0 Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#58] +Child 1 Input [5]: [i_product_name#18, i_brand#19, i_class#20, i_category#59, qoh#60] +Child 2 Input [5]: [i_product_name#28, i_brand#29, i_class#61, i_category#62, qoh#63] +Child 3 Input [5]: [i_product_name#38, i_brand#64, i_class#65, i_category#66, qoh#67] +Child 4 Input [5]: [i_product_name#68, i_brand#69, i_class#70, i_category#71, qoh#72] + +(45) CometTakeOrderedAndProject +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#58] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[qoh#58 ASC NULLS FIRST,i_product_name#11 ASC NULLS FIRST,i_brand#8 ASC NULLS FIRST,i_class#9 ASC NULLS FIRST,i_category#10 ASC NULLS FIRST], output=[i_product_name#11,i_brand#8,i_class#9,i_category#10,qoh#58]), [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#58], 100, [qoh#58 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#8 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#58] + +(46) ColumnarToRow [codegen id : 1] +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#58] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8b3bcd4d27 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q22a.native_iceberg_compat/simplified.txt @@ -0,0 +1,48 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_product_name,i_brand,i_class,i_category,qoh] + CometUnion [i_product_name,i_brand,i_class,i_category,qoh] + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(qoh)] + CometHashAggregate [i_product_name,i_brand,i_class,i_category,sum,count,qoh] + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(inv_quantity_on_hand)] + CometExchange [i_product_name,i_brand,i_class,i_category] #1 + CometHashAggregate [i_product_name,i_brand,i_class,i_category,sum,count,inv_quantity_on_hand] + CometProject [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name,w_warehouse_sk] + CometProject [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + CometBroadcastHashJoin [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk,d_date_sk] + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [d_date_sk] #2 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #3 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometBroadcastExchange [w_warehouse_sk] #4 + CometFilter [w_warehouse_sk] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk] + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(qoh)] + CometExchange [i_product_name,i_brand,i_class] #5 + CometHashAggregate [i_product_name,i_brand,i_class,sum,count,qoh] + CometHashAggregate [i_product_name,i_brand,i_class,qoh,i_category,sum,count,avg(inv_quantity_on_hand)] + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(qoh)] + CometExchange [i_product_name,i_brand] #6 + CometHashAggregate [i_product_name,i_brand,sum,count,qoh] + CometHashAggregate [i_product_name,i_brand,qoh,i_class,i_category,sum,count,avg(inv_quantity_on_hand)] + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(qoh)] + CometExchange [i_product_name] #7 + CometHashAggregate [i_product_name,sum,count,qoh] + CometHashAggregate [i_product_name,qoh,i_brand,i_class,i_category,sum,count,avg(inv_quantity_on_hand)] + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + CometHashAggregate [i_product_name,i_brand,i_class,i_category,qoh,sum,count,avg(qoh)] + CometExchange #8 + CometHashAggregate [sum,count,qoh] + CometHashAggregate [qoh,i_product_name,i_brand,i_class,i_category,sum,count,avg(inv_quantity_on_hand)] + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_datafusion/explain.txt new file mode 100644 index 0000000000..8b77e3c896 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_datafusion/explain.txt @@ -0,0 +1,396 @@ +== Physical Plan == +* ColumnarToRow (45) ++- CometSort (44) + +- CometColumnarExchange (43) + +- * Filter (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * ColumnarToRow (29) + : +- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (12) + : : : : +- CometSortMergeJoin (11) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometSort (10) + : : : : +- CometExchange (9) + : : : : +- CometProject (8) + : : : : +- CometFilter (7) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (6) + : : : +- CometBroadcastExchange (16) + : : : +- CometProject (15) + : : : +- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (13) + : : +- CometBroadcastExchange (21) + : : +- CometFilter (20) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (19) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer` (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(3) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(4) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(6) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(7) CometFilter +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(8) CometProject +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_item_sk#7, sr_ticket_number#8] + +(9) CometExchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST] + +(11) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Right output [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_ticket_number#4, ss_item_sk#1], [sr_ticket_number#8, sr_item_sk#7], Inner + +(12) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] + +(13) CometNativeScan: `spark_catalog`.`default`.`store` +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(14) CometFilter +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(15) CometProject +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14], [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(16) CometBroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Right output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(18) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14], [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] + +(19) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(20) CometFilter +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) + +(21) CometBroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(22) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Right output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(23) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20], [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(25) CometFilter +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : ((isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) AND isnotnull(c_birth_country#25)) + +(26) CometBroadcastExchange +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(27) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Right output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: [ss_customer_sk#2], [c_customer_sk#21], Inner, BuildRight + +(28) CometProject +Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25], [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(29) ColumnarToRow [codegen id : 2] +Input [13]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Arguments: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(31) CometFilter +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Condition : ((isnotnull(ca_address_sk#26) AND isnotnull(ca_country#29)) AND isnotnull(ca_zip#28)) + +(32) ColumnarToRow [codegen id : 1] +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(33) BroadcastExchange +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(input[0, int, false], upper(input[3, string, false]), input[2, string, false]),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 2] +Left keys [3]: [c_current_addr_sk#22, c_birth_country#25, s_zip#14] +Right keys [3]: [ca_address_sk#26, upper(ca_country#29), ca_zip#28] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 2] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [17]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25, ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(36) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#30] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] + +(37) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#32] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#32,17,2) AS netpaid#33] + +(39) HashAggregate [codegen id : 3] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, netpaid#33] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#11] +Functions [1]: [partial_sum(netpaid#33)] +Aggregate Attributes [2]: [sum#34, isEmpty#35] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] + +(40) Exchange +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(41) HashAggregate [codegen id : 4] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#11] +Functions [1]: [sum(netpaid#33)] +Aggregate Attributes [1]: [sum(netpaid#33)#38] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#11, sum(netpaid#33)#38 AS paid#39] + +(42) Filter [codegen id : 4] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Condition : (isnotnull(paid#39) AND (cast(paid#39 as decimal(33,8)) > cast(Subquery scalar-subquery#40, [id=#41] as decimal(33,8)))) + +(43) CometColumnarExchange +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Arguments: rangepartitioning(c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=6] + +(44) CometSort +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Arguments: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39], [c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST] + +(45) ColumnarToRow [codegen id : 5] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#40, [id=#41] +* HashAggregate (72) ++- Exchange (71) + +- * HashAggregate (70) + +- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Project (66) + +- * BroadcastHashJoin Inner BuildRight (65) + :- * ColumnarToRow (63) + : +- CometProject (62) + : +- CometBroadcastHashJoin (61) + : :- CometProject (59) + : : +- CometBroadcastHashJoin (58) + : : :- CometProject (54) + : : : +- CometBroadcastHashJoin (53) + : : : :- CometProject (51) + : : : : +- CometSortMergeJoin (50) + : : : : :- CometSort (47) + : : : : : +- ReusedExchange (46) + : : : : +- CometSort (49) + : : : : +- ReusedExchange (48) + : : : +- ReusedExchange (52) + : : +- CometBroadcastExchange (57) + : : +- CometFilter (56) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (55) + : +- ReusedExchange (60) + +- ReusedExchange (64) + + +(46) ReusedExchange [Reuses operator id: 4] +Output [5]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_ticket_number#45, ss_net_paid#46] + +(47) CometSort +Input [5]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_ticket_number#45, ss_net_paid#46] +Arguments: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_ticket_number#45, ss_net_paid#46], [ss_ticket_number#45 ASC NULLS FIRST, ss_item_sk#42 ASC NULLS FIRST] + +(48) ReusedExchange [Reuses operator id: 9] +Output [2]: [sr_item_sk#47, sr_ticket_number#48] + +(49) CometSort +Input [2]: [sr_item_sk#47, sr_ticket_number#48] +Arguments: [sr_item_sk#47, sr_ticket_number#48], [sr_ticket_number#48 ASC NULLS FIRST, sr_item_sk#47 ASC NULLS FIRST] + +(50) CometSortMergeJoin +Left output [5]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_ticket_number#45, ss_net_paid#46] +Right output [2]: [sr_item_sk#47, sr_ticket_number#48] +Arguments: [ss_ticket_number#45, ss_item_sk#42], [sr_ticket_number#48, sr_item_sk#47], Inner + +(51) CometProject +Input [7]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_ticket_number#45, ss_net_paid#46, sr_item_sk#47, sr_ticket_number#48] +Arguments: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_net_paid#46], [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_net_paid#46] + +(52) ReusedExchange [Reuses operator id: 16] +Output [4]: [s_store_sk#49, s_store_name#50, s_state#51, s_zip#52] + +(53) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_net_paid#46] +Right output [4]: [s_store_sk#49, s_store_name#50, s_state#51, s_zip#52] +Arguments: [ss_store_sk#44], [s_store_sk#49], Inner, BuildRight + +(54) CometProject +Input [8]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_net_paid#46, s_store_sk#49, s_store_name#50, s_state#51, s_zip#52] +Arguments: [ss_item_sk#42, ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52], [ss_item_sk#42, ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52] + +(55) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Arguments: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] + +(56) CometFilter +Input [6]: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Condition : isnotnull(i_item_sk#53) + +(57) CometBroadcastExchange +Input [6]: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Arguments: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] + +(58) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#42, ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52] +Right output [6]: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Arguments: [ss_item_sk#42], [i_item_sk#53], Inner, BuildRight + +(59) CometProject +Input [12]: [ss_item_sk#42, ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Arguments: [ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58], [ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] + +(60) ReusedExchange [Reuses operator id: 26] +Output [5]: [c_customer_sk#59, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63] + +(61) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Right output [5]: [c_customer_sk#59, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63] +Arguments: [ss_customer_sk#43], [c_customer_sk#59], Inner, BuildRight + +(62) CometProject +Input [15]: [ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_customer_sk#59, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63] +Arguments: [ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63], [ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63] + +(63) ColumnarToRow [codegen id : 2] +Input [13]: [ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63] + +(64) ReusedExchange [Reuses operator id: 33] +Output [4]: [ca_address_sk#64, ca_state#65, ca_zip#66, ca_country#67] + +(65) BroadcastHashJoin [codegen id : 2] +Left keys [3]: [c_current_addr_sk#60, c_birth_country#63, s_zip#52] +Right keys [3]: [ca_address_sk#64, upper(ca_country#67), ca_zip#66] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 2] +Output [11]: [ss_net_paid#46, s_store_name#50, s_state#51, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_first_name#61, c_last_name#62, ca_state#65] +Input [17]: [ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63, ca_address_sk#64, ca_state#65, ca_zip#66, ca_country#67] + +(67) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#46, s_store_name#50, s_state#51, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_first_name#61, c_last_name#62, ca_state#65] +Keys [10]: [c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#46))] +Aggregate Attributes [1]: [sum#68] +Results [11]: [c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55, sum#69] + +(68) Exchange +Input [11]: [c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55, sum#69] +Arguments: hashpartitioning(c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(69) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55, sum#69] +Keys [10]: [c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55] +Functions [1]: [sum(UnscaledValue(ss_net_paid#46))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#46))#32] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#46))#32,17,2) AS netpaid#70] + +(70) HashAggregate [codegen id : 3] +Input [1]: [netpaid#70] +Keys: [] +Functions [1]: [partial_avg(netpaid#70)] +Aggregate Attributes [2]: [sum#71, count#72] +Results [2]: [sum#73, count#74] + +(71) Exchange +Input [2]: [sum#73, count#74] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(72) HashAggregate [codegen id : 4] +Input [2]: [sum#73, count#74] +Keys: [] +Functions [1]: [avg(netpaid#70)] +Aggregate Attributes [1]: [avg(netpaid#70)#75] +Results [1]: [(0.05 * avg(netpaid#70)#75) AS (0.05 * avg(netpaid))#76] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c52c1b407a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_datafusion/simplified.txt @@ -0,0 +1,91 @@ +WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [c_last_name,c_first_name,s_store_name,paid] + CometColumnarExchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (4) + Filter [paid] + Subquery #1 + WholeStageCodegen (4) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #10 + WholeStageCodegen (3) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #11 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_current_addr_sk,c_birth_country,s_zip,ca_address_sk,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #4 + CometSort [sr_item_sk,sr_ticket_number] + ReusedExchange [sr_item_sk,sr_ticket_number] #5 + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #6 + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #12 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + ReusedExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] #8 + InputAdapter + ReusedExchange [ca_address_sk,ca_state,ca_zip,ca_country] #9 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #2 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #3 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_current_addr_sk,c_birth_country,s_zip,ca_address_sk,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometExchange [ss_ticket_number,ss_item_sk] #4 + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #5 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_state,s_zip] #6 + CometProject [s_store_sk,s_store_name,s_state,s_zip] + CometFilter [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #7 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] #8 + CometFilter [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_state,ca_zip,ca_country] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..522e028b5a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_iceberg_compat/explain.txt @@ -0,0 +1,417 @@ +== Physical Plan == +* ColumnarToRow (45) ++- CometSort (44) + +- CometColumnarExchange (43) + +- * Filter (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * ColumnarToRow (29) + : +- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometProject (18) + : : : +- CometBroadcastHashJoin (17) + : : : :- CometProject (12) + : : : : +- CometSortMergeJoin (11) + : : : : :- CometSort (5) + : : : : : +- CometExchange (4) + : : : : : +- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : +- CometSort (10) + : : : : +- CometExchange (9) + : : : : +- CometProject (8) + : : : : +- CometFilter (7) + : : : : +- CometScan parquet spark_catalog.default.store_returns (6) + : : : +- CometBroadcastExchange (16) + : : : +- CometProject (15) + : : : +- CometFilter (14) + : : : +- CometScan parquet spark_catalog.default.store (13) + : : +- CometBroadcastExchange (21) + : : +- CometFilter (20) + : : +- CometScan parquet spark_catalog.default.item (19) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometScan parquet spark_catalog.default.customer (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometScan parquet spark_catalog.default.customer_address (30) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(3) CometProject +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(4) CometExchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(5) CometSort +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5], [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(6) CometScan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(7) CometFilter +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(8) CometProject +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_item_sk#7, sr_ticket_number#8] + +(9) CometExchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(10) CometSort +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_item_sk#7, sr_ticket_number#8], [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST] + +(11) CometSortMergeJoin +Left output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Right output [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_ticket_number#4, ss_item_sk#1], [sr_ticket_number#8, sr_item_sk#7], Inner + +(12) CometProject +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5], [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] + +(13) CometScan parquet spark_catalog.default.store +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(14) CometFilter +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(15) CometProject +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14], [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(16) CometBroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(17) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Right output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_store_sk#3], [s_store_sk#10], Inner, BuildRight + +(18) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14], [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] + +(19) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) CometFilter +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) + +(21) CometBroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(22) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Right output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_item_sk#1], [i_item_sk#15], Inner, BuildRight + +(23) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20], [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) CometScan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(25) CometFilter +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : ((isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) AND isnotnull(c_birth_country#25)) + +(26) CometBroadcastExchange +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(27) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Right output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: [ss_customer_sk#2], [c_customer_sk#21], Inner, BuildRight + +(28) CometProject +Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25], [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(29) ColumnarToRow [codegen id : 2] +Input [13]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(30) CometScan parquet spark_catalog.default.customer_address +Output [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(31) CometFilter +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Condition : ((isnotnull(ca_address_sk#26) AND isnotnull(ca_country#29)) AND isnotnull(ca_zip#28)) + +(32) ColumnarToRow [codegen id : 1] +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(33) BroadcastExchange +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(input[0, int, false], upper(input[3, string, false]), input[2, string, false]),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 2] +Left keys [3]: [c_current_addr_sk#22, c_birth_country#25, s_zip#14] +Right keys [3]: [ca_address_sk#26, upper(ca_country#29), ca_zip#28] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 2] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [17]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25, ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(36) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#30] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] + +(37) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#32] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#32,17,2) AS netpaid#33] + +(39) HashAggregate [codegen id : 3] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, netpaid#33] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#11] +Functions [1]: [partial_sum(netpaid#33)] +Aggregate Attributes [2]: [sum#34, isEmpty#35] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] + +(40) Exchange +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(41) HashAggregate [codegen id : 4] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#11] +Functions [1]: [sum(netpaid#33)] +Aggregate Attributes [1]: [sum(netpaid#33)#38] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#11, sum(netpaid#33)#38 AS paid#39] + +(42) Filter [codegen id : 4] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Condition : (isnotnull(paid#39) AND (cast(paid#39 as decimal(33,8)) > cast(Subquery scalar-subquery#40, [id=#41] as decimal(33,8)))) + +(43) CometColumnarExchange +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Arguments: rangepartitioning(c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=6] + +(44) CometSort +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Arguments: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39], [c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST] + +(45) ColumnarToRow [codegen id : 5] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#40, [id=#41] +* HashAggregate (72) ++- Exchange (71) + +- * HashAggregate (70) + +- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Project (66) + +- * BroadcastHashJoin Inner BuildRight (65) + :- * ColumnarToRow (63) + : +- CometProject (62) + : +- CometBroadcastHashJoin (61) + : :- CometProject (59) + : : +- CometBroadcastHashJoin (58) + : : :- CometProject (54) + : : : +- CometBroadcastHashJoin (53) + : : : :- CometProject (51) + : : : : +- CometSortMergeJoin (50) + : : : : :- CometSort (47) + : : : : : +- ReusedExchange (46) + : : : : +- CometSort (49) + : : : : +- ReusedExchange (48) + : : : +- ReusedExchange (52) + : : +- CometBroadcastExchange (57) + : : +- CometFilter (56) + : : +- CometScan parquet spark_catalog.default.item (55) + : +- ReusedExchange (60) + +- ReusedExchange (64) + + +(46) ReusedExchange [Reuses operator id: 4] +Output [5]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_ticket_number#45, ss_net_paid#46] + +(47) CometSort +Input [5]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_ticket_number#45, ss_net_paid#46] +Arguments: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_ticket_number#45, ss_net_paid#46], [ss_ticket_number#45 ASC NULLS FIRST, ss_item_sk#42 ASC NULLS FIRST] + +(48) ReusedExchange [Reuses operator id: 9] +Output [2]: [sr_item_sk#47, sr_ticket_number#48] + +(49) CometSort +Input [2]: [sr_item_sk#47, sr_ticket_number#48] +Arguments: [sr_item_sk#47, sr_ticket_number#48], [sr_ticket_number#48 ASC NULLS FIRST, sr_item_sk#47 ASC NULLS FIRST] + +(50) CometSortMergeJoin +Left output [5]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_ticket_number#45, ss_net_paid#46] +Right output [2]: [sr_item_sk#47, sr_ticket_number#48] +Arguments: [ss_ticket_number#45, ss_item_sk#42], [sr_ticket_number#48, sr_item_sk#47], Inner + +(51) CometProject +Input [7]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_ticket_number#45, ss_net_paid#46, sr_item_sk#47, sr_ticket_number#48] +Arguments: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_net_paid#46], [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_net_paid#46] + +(52) ReusedExchange [Reuses operator id: 16] +Output [4]: [s_store_sk#49, s_store_name#50, s_state#51, s_zip#52] + +(53) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_net_paid#46] +Right output [4]: [s_store_sk#49, s_store_name#50, s_state#51, s_zip#52] +Arguments: [ss_store_sk#44], [s_store_sk#49], Inner, BuildRight + +(54) CometProject +Input [8]: [ss_item_sk#42, ss_customer_sk#43, ss_store_sk#44, ss_net_paid#46, s_store_sk#49, s_store_name#50, s_state#51, s_zip#52] +Arguments: [ss_item_sk#42, ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52], [ss_item_sk#42, ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52] + +(55) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(56) CometFilter +Input [6]: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Condition : isnotnull(i_item_sk#53) + +(57) CometBroadcastExchange +Input [6]: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Arguments: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] + +(58) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#42, ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52] +Right output [6]: [i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Arguments: [ss_item_sk#42], [i_item_sk#53], Inner, BuildRight + +(59) CometProject +Input [12]: [ss_item_sk#42, ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_item_sk#53, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Arguments: [ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58], [ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] + +(60) ReusedExchange [Reuses operator id: 26] +Output [5]: [c_customer_sk#59, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63] + +(61) CometBroadcastHashJoin +Left output [10]: [ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58] +Right output [5]: [c_customer_sk#59, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63] +Arguments: [ss_customer_sk#43], [c_customer_sk#59], Inner, BuildRight + +(62) CometProject +Input [15]: [ss_customer_sk#43, ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_customer_sk#59, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63] +Arguments: [ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63], [ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63] + +(63) ColumnarToRow [codegen id : 2] +Input [13]: [ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63] + +(64) ReusedExchange [Reuses operator id: 33] +Output [4]: [ca_address_sk#64, ca_state#65, ca_zip#66, ca_country#67] + +(65) BroadcastHashJoin [codegen id : 2] +Left keys [3]: [c_current_addr_sk#60, c_birth_country#63, s_zip#52] +Right keys [3]: [ca_address_sk#64, upper(ca_country#67), ca_zip#66] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 2] +Output [11]: [ss_net_paid#46, s_store_name#50, s_state#51, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_first_name#61, c_last_name#62, ca_state#65] +Input [17]: [ss_net_paid#46, s_store_name#50, s_state#51, s_zip#52, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_current_addr_sk#60, c_first_name#61, c_last_name#62, c_birth_country#63, ca_address_sk#64, ca_state#65, ca_zip#66, ca_country#67] + +(67) HashAggregate [codegen id : 2] +Input [11]: [ss_net_paid#46, s_store_name#50, s_state#51, i_current_price#54, i_size#55, i_color#56, i_units#57, i_manager_id#58, c_first_name#61, c_last_name#62, ca_state#65] +Keys [10]: [c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#46))] +Aggregate Attributes [1]: [sum#68] +Results [11]: [c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55, sum#69] + +(68) Exchange +Input [11]: [c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55, sum#69] +Arguments: hashpartitioning(c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(69) HashAggregate [codegen id : 3] +Input [11]: [c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55, sum#69] +Keys [10]: [c_last_name#62, c_first_name#61, s_store_name#50, ca_state#65, s_state#51, i_color#56, i_current_price#54, i_manager_id#58, i_units#57, i_size#55] +Functions [1]: [sum(UnscaledValue(ss_net_paid#46))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#46))#32] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#46))#32,17,2) AS netpaid#70] + +(70) HashAggregate [codegen id : 3] +Input [1]: [netpaid#70] +Keys: [] +Functions [1]: [partial_avg(netpaid#70)] +Aggregate Attributes [2]: [sum#71, count#72] +Results [2]: [sum#73, count#74] + +(71) Exchange +Input [2]: [sum#73, count#74] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(72) HashAggregate [codegen id : 4] +Input [2]: [sum#73, count#74] +Keys: [] +Functions [1]: [avg(netpaid#70)] +Aggregate Attributes [1]: [avg(netpaid#70)#75] +Results [1]: [(0.05 * avg(netpaid#70)#75) AS (0.05 * avg(netpaid))#76] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..f4bcae0f13 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q24.native_iceberg_compat/simplified.txt @@ -0,0 +1,91 @@ +WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [c_last_name,c_first_name,s_store_name,paid] + CometColumnarExchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (4) + Filter [paid] + Subquery #1 + WholeStageCodegen (4) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #10 + WholeStageCodegen (3) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #11 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_current_addr_sk,c_birth_country,s_zip,ca_address_sk,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #4 + CometSort [sr_item_sk,sr_ticket_number] + ReusedExchange [sr_item_sk,sr_ticket_number] #5 + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #6 + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #12 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + ReusedExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] #8 + InputAdapter + ReusedExchange [ca_address_sk,ca_state,ca_zip,ca_country] #9 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #2 + WholeStageCodegen (3) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #3 + WholeStageCodegen (2) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_current_addr_sk,c_birth_country,s_zip,ca_address_sk,ca_country,ca_zip] + ColumnarToRow + InputAdapter + CometProject [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + CometBroadcastHashJoin [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + CometProject [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometProject [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid,s_store_sk,s_store_name,s_state,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometExchange [ss_ticket_number,ss_item_sk] #4 + CometProject [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + CometFilter [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #5 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [s_store_sk,s_store_name,s_state,s_zip] #6 + CometProject [s_store_sk,s_store_name,s_state,s_zip] + CometFilter [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + CometBroadcastExchange [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] #7 + CometFilter [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] #8 + CometFilter [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_state,ca_zip,ca_country] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_datafusion/explain.txt new file mode 100644 index 0000000000..8b866beb2d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_datafusion/explain.txt @@ -0,0 +1,360 @@ +== Physical Plan == +* ColumnarToRow (69) ++- CometTakeOrderedAndProject (68) + +- CometUnion (67) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometProject (14) + : : : +- CometBroadcastHashJoin (13) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (3) + : : : +- CometBroadcastExchange (12) + : : : +- CometProject (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + : : +- CometBroadcastExchange (17) + : : +- CometFilter (16) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (15) + : +- CometBroadcastExchange (22) + : +- CometFilter (21) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (20) + :- CometHashAggregate (47) + : +- CometExchange (46) + : +- CometHashAggregate (45) + : +- CometProject (44) + : +- CometBroadcastHashJoin (43) + : :- CometProject (41) + : : +- CometBroadcastHashJoin (40) + : : :- CometProject (35) + : : : +- CometBroadcastHashJoin (34) + : : : :- CometProject (32) + : : : : +- CometBroadcastHashJoin (31) + : : : : :- CometFilter (29) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (28) + : : : : +- ReusedExchange (30) + : : : +- ReusedExchange (33) + : : +- CometBroadcastExchange (39) + : : +- CometProject (38) + : : +- CometFilter (37) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (36) + : +- ReusedExchange (42) + +- CometHashAggregate (66) + +- CometExchange (65) + +- CometHashAggregate (64) + +- CometProject (63) + +- CometBroadcastHashJoin (62) + :- CometProject (58) + : +- CometBroadcastHashJoin (57) + : :- CometProject (55) + : : +- CometBroadcastHashJoin (54) + : : :- CometProject (52) + : : : +- CometBroadcastHashJoin (51) + : : : :- CometFilter (49) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (48) + : : : +- ReusedExchange (50) + : : +- ReusedExchange (53) + : +- ReusedExchange (56) + +- CometBroadcastExchange (61) + +- CometFilter (60) + +- CometNativeScan: `spark_catalog`.`default`.`item` (59) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Arguments: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(2) CometFilter +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = F)) AND (cd_marital_status#11 = W)) AND (cd_education_status#12 = Primary )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [ss_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8], [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13, d_year#14] + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 1998)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7], [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] + +(15) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#15, s_state#16] +Arguments: [s_store_sk#15, s_state#16] + +(16) CometFilter +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(17) CometBroadcastExchange +Input [2]: [s_store_sk#15, s_state#16] +Arguments: [s_store_sk#15, s_state#16] + +(18) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Right output [2]: [s_store_sk#15, s_state#16] +Arguments: [ss_store_sk#3], [s_store_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] +Arguments: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16], [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] + +(20) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#17, i_item_id#18] +Arguments: [i_item_sk#17, i_item_id#18] + +(21) CometFilter +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: [i_item_sk#17, i_item_id#18] + +(23) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Right output [2]: [i_item_sk#17, i_item_id#18] +Arguments: [ss_item_sk#1], [i_item_sk#17], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] +Arguments: [i_item_id#18, s_state#16, agg1#19, agg2#20, agg3#21, agg4#22], [i_item_id#18, s_state#16, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] + +(25) CometHashAggregate +Input [6]: [i_item_id#18, s_state#16, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] + +(26) CometExchange +Input [10]: [i_item_id#18, s_state#16, sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] +Arguments: hashpartitioning(i_item_id#18, s_state#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [10]: [i_item_id#18, s_state#16, sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] + +(28) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_item_sk#31, ss_cdemo_sk#32, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] +Arguments: [ss_item_sk#31, ss_cdemo_sk#32, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] + +(29) CometFilter +Input [8]: [ss_item_sk#31, ss_cdemo_sk#32, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] +Condition : ((isnotnull(ss_cdemo_sk#32) AND isnotnull(ss_store_sk#33)) AND isnotnull(ss_item_sk#31)) + +(30) ReusedExchange [Reuses operator id: 6] +Output [1]: [cd_demo_sk#39] + +(31) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#31, ss_cdemo_sk#32, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] +Right output [1]: [cd_demo_sk#39] +Arguments: [ss_cdemo_sk#32], [cd_demo_sk#39], Inner, BuildRight + +(32) CometProject +Input [9]: [ss_item_sk#31, ss_cdemo_sk#32, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38, cd_demo_sk#39] +Arguments: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38], [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] + +(33) ReusedExchange [Reuses operator id: 12] +Output [1]: [d_date_sk#40] + +(34) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] +Right output [1]: [d_date_sk#40] +Arguments: [ss_sold_date_sk#38], [d_date_sk#40], Inner, BuildRight + +(35) CometProject +Input [8]: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38, d_date_sk#40] +Arguments: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37], [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37] + +(36) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#41, s_state#42] +Arguments: [s_store_sk#41, s_state#42] + +(37) CometFilter +Input [2]: [s_store_sk#41, s_state#42] +Condition : ((isnotnull(s_state#42) AND (s_state#42 = TN)) AND isnotnull(s_store_sk#41)) + +(38) CometProject +Input [2]: [s_store_sk#41, s_state#42] +Arguments: [s_store_sk#41], [s_store_sk#41] + +(39) CometBroadcastExchange +Input [1]: [s_store_sk#41] +Arguments: [s_store_sk#41] + +(40) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37] +Right output [1]: [s_store_sk#41] +Arguments: [ss_store_sk#33], [s_store_sk#41], Inner, BuildRight + +(41) CometProject +Input [7]: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, s_store_sk#41] +Arguments: [ss_item_sk#31, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37], [ss_item_sk#31, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37] + +(42) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#43, i_item_id#44] + +(43) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#31, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37] +Right output [2]: [i_item_sk#43, i_item_id#44] +Arguments: [ss_item_sk#31], [i_item_sk#43], Inner, BuildRight + +(44) CometProject +Input [7]: [ss_item_sk#31, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, i_item_sk#43, i_item_id#44] +Arguments: [i_item_id#44, agg1#45, agg2#46, agg3#47, agg4#48], [i_item_id#44, ss_quantity#34 AS agg1#45, ss_list_price#35 AS agg2#46, ss_coupon_amt#37 AS agg3#47, ss_sales_price#36 AS agg4#48] + +(45) CometHashAggregate +Input [5]: [i_item_id#44, agg1#45, agg2#46, agg3#47, agg4#48] +Keys [1]: [i_item_id#44] +Functions [4]: [partial_avg(agg1#45), partial_avg(UnscaledValue(agg2#46)), partial_avg(UnscaledValue(agg3#47)), partial_avg(UnscaledValue(agg4#48))] + +(46) CometExchange +Input [9]: [i_item_id#44, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56] +Arguments: hashpartitioning(i_item_id#44, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(47) CometHashAggregate +Input [9]: [i_item_id#44, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56] +Keys [1]: [i_item_id#44] +Functions [4]: [avg(agg1#45), avg(UnscaledValue(agg2#46)), avg(UnscaledValue(agg3#47)), avg(UnscaledValue(agg4#48))] + +(48) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [8]: [ss_item_sk#57, ss_cdemo_sk#58, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] +Arguments: [ss_item_sk#57, ss_cdemo_sk#58, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] + +(49) CometFilter +Input [8]: [ss_item_sk#57, ss_cdemo_sk#58, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] +Condition : ((isnotnull(ss_cdemo_sk#58) AND isnotnull(ss_store_sk#59)) AND isnotnull(ss_item_sk#57)) + +(50) ReusedExchange [Reuses operator id: 6] +Output [1]: [cd_demo_sk#65] + +(51) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#57, ss_cdemo_sk#58, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] +Right output [1]: [cd_demo_sk#65] +Arguments: [ss_cdemo_sk#58], [cd_demo_sk#65], Inner, BuildRight + +(52) CometProject +Input [9]: [ss_item_sk#57, ss_cdemo_sk#58, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64, cd_demo_sk#65] +Arguments: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64], [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] + +(53) ReusedExchange [Reuses operator id: 12] +Output [1]: [d_date_sk#66] + +(54) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] +Right output [1]: [d_date_sk#66] +Arguments: [ss_sold_date_sk#64], [d_date_sk#66], Inner, BuildRight + +(55) CometProject +Input [8]: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64, d_date_sk#66] +Arguments: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63], [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63] + +(56) ReusedExchange [Reuses operator id: 39] +Output [1]: [s_store_sk#67] + +(57) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63] +Right output [1]: [s_store_sk#67] +Arguments: [ss_store_sk#59], [s_store_sk#67], Inner, BuildRight + +(58) CometProject +Input [7]: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, s_store_sk#67] +Arguments: [ss_item_sk#57, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63], [ss_item_sk#57, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63] + +(59) CometNativeScan: `spark_catalog`.`default`.`item` +Output [1]: [i_item_sk#68] +Arguments: [i_item_sk#68] + +(60) CometFilter +Input [1]: [i_item_sk#68] +Condition : isnotnull(i_item_sk#68) + +(61) CometBroadcastExchange +Input [1]: [i_item_sk#68] +Arguments: [i_item_sk#68] + +(62) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#57, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63] +Right output [1]: [i_item_sk#68] +Arguments: [ss_item_sk#57], [i_item_sk#68], Inner, BuildRight + +(63) CometProject +Input [6]: [ss_item_sk#57, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, i_item_sk#68] +Arguments: [agg1#69, agg2#70, agg3#71, agg4#72], [ss_quantity#60 AS agg1#69, ss_list_price#61 AS agg2#70, ss_coupon_amt#63 AS agg3#71, ss_sales_price#62 AS agg4#72] + +(64) CometHashAggregate +Input [4]: [agg1#69, agg2#70, agg3#71, agg4#72] +Keys: [] +Functions [4]: [partial_avg(agg1#69), partial_avg(UnscaledValue(agg2#70)), partial_avg(UnscaledValue(agg3#71)), partial_avg(UnscaledValue(agg4#72))] + +(65) CometExchange +Input [8]: [sum#73, count#74, sum#75, count#76, sum#77, count#78, sum#79, count#80] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(66) CometHashAggregate +Input [8]: [sum#73, count#74, sum#75, count#76, sum#77, count#78, sum#79, count#80] +Keys: [] +Functions [4]: [avg(agg1#69), avg(UnscaledValue(agg2#70)), avg(UnscaledValue(agg3#71)), avg(UnscaledValue(agg4#72))] + +(67) CometUnion +Child 0 Input [7]: [i_item_id#18, s_state#16, g_state#81, agg1#82, agg2#83, agg3#84, agg4#85] +Child 1 Input [7]: [i_item_id#44, s_state#86, g_state#87, agg1#88, agg2#89, agg3#90, agg4#91] +Child 2 Input [7]: [i_item_id#92, s_state#93, g_state#94, agg1#95, agg2#96, agg3#97, agg4#98] + +(68) CometTakeOrderedAndProject +Input [7]: [i_item_id#18, s_state#16, g_state#81, agg1#82, agg2#83, agg3#84, agg4#85] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#18 ASC NULLS FIRST,s_state#16 ASC NULLS FIRST], output=[i_item_id#18,s_state#16,g_state#81,agg1#82,agg2#83,agg3#84,agg4#85]), [i_item_id#18, s_state#16, g_state#81, agg1#82, agg2#83, agg3#84, agg4#85], 100, [i_item_id#18 ASC NULLS FIRST, s_state#16 ASC NULLS FIRST], [i_item_id#18, s_state#16, g_state#81, agg1#82, agg2#83, agg3#84, agg4#85] + +(69) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#18, s_state#16, g_state#81, agg1#82, agg2#83, agg3#84, agg4#85] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..93723f1654 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_datafusion/simplified.txt @@ -0,0 +1,71 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + CometUnion [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + CometHashAggregate [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4))] + CometExchange [i_item_id,s_state] #1 + CometHashAggregate [i_item_id,s_state,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4] + CometProject [ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [i_item_id,s_state,agg1,agg2,agg3,agg4] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_store_sk,s_state] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_state] #4 + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometFilter [i_item_sk,i_item_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id] + CometHashAggregate [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4))] + CometExchange [i_item_id] #6 + CometHashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4] + CometProject [ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [i_item_id,agg1,agg2,agg3,agg4] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_store_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + ReusedExchange [cd_demo_sk] #2 + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [s_store_sk] #7 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + ReusedExchange [i_item_sk,i_item_id] #5 + CometHashAggregate [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4))] + CometExchange #8 + CometHashAggregate [sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4] + CometProject [ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [agg1,agg2,agg3,agg4] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_sk] + CometProject [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_store_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + ReusedExchange [cd_demo_sk] #2 + ReusedExchange [d_date_sk] #3 + ReusedExchange [s_store_sk] #7 + CometBroadcastExchange [i_item_sk] #9 + CometFilter [i_item_sk] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..49b931772c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_iceberg_compat/explain.txt @@ -0,0 +1,390 @@ +== Physical Plan == +* ColumnarToRow (69) ++- CometTakeOrderedAndProject (68) + +- CometUnion (67) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometProject (24) + : +- CometBroadcastHashJoin (23) + : :- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometProject (14) + : : : +- CometBroadcastHashJoin (13) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.customer_demographics (3) + : : : +- CometBroadcastExchange (12) + : : : +- CometProject (11) + : : : +- CometFilter (10) + : : : +- CometScan parquet spark_catalog.default.date_dim (9) + : : +- CometBroadcastExchange (17) + : : +- CometFilter (16) + : : +- CometScan parquet spark_catalog.default.store (15) + : +- CometBroadcastExchange (22) + : +- CometFilter (21) + : +- CometScan parquet spark_catalog.default.item (20) + :- CometHashAggregate (47) + : +- CometExchange (46) + : +- CometHashAggregate (45) + : +- CometProject (44) + : +- CometBroadcastHashJoin (43) + : :- CometProject (41) + : : +- CometBroadcastHashJoin (40) + : : :- CometProject (35) + : : : +- CometBroadcastHashJoin (34) + : : : :- CometProject (32) + : : : : +- CometBroadcastHashJoin (31) + : : : : :- CometFilter (29) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (28) + : : : : +- ReusedExchange (30) + : : : +- ReusedExchange (33) + : : +- CometBroadcastExchange (39) + : : +- CometProject (38) + : : +- CometFilter (37) + : : +- CometScan parquet spark_catalog.default.store (36) + : +- ReusedExchange (42) + +- CometHashAggregate (66) + +- CometExchange (65) + +- CometHashAggregate (64) + +- CometProject (63) + +- CometBroadcastHashJoin (62) + :- CometProject (58) + : +- CometBroadcastHashJoin (57) + : :- CometProject (55) + : : +- CometBroadcastHashJoin (54) + : : :- CometProject (52) + : : : +- CometBroadcastHashJoin (51) + : : : :- CometFilter (49) + : : : : +- CometScan parquet spark_catalog.default.store_sales (48) + : : : +- ReusedExchange (50) + : : +- ReusedExchange (53) + : +- ReusedExchange (56) + +- CometBroadcastExchange (61) + +- CometFilter (60) + +- CometScan parquet spark_catalog.default.item (59) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_marital_status,W), EqualTo(cd_education_status,Primary ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = F)) AND (cd_marital_status#11 = W)) AND (cd_education_status#12 = Primary )) AND isnotnull(cd_demo_sk#9)) + +(5) CometProject +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Arguments: [cd_demo_sk#9], [cd_demo_sk#9] + +(6) CometBroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: [cd_demo_sk#9] + +(7) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [cd_demo_sk#9] +Arguments: [ss_cdemo_sk#2], [cd_demo_sk#9], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8], [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(9) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 1998)) AND isnotnull(d_date_sk#13)) + +(11) CometProject +Input [2]: [d_date_sk#13, d_year#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(12) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(13) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#8], [d_date_sk#13], Inner, BuildRight + +(14) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7], [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] + +(15) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(16) CometFilter +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(17) CometBroadcastExchange +Input [2]: [s_store_sk#15, s_state#16] +Arguments: [s_store_sk#15, s_state#16] + +(18) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Right output [2]: [s_store_sk#15, s_state#16] +Arguments: [ss_store_sk#3], [s_store_sk#15], Inner, BuildRight + +(19) CometProject +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] +Arguments: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16], [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] + +(20) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(21) CometFilter +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(22) CometBroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: [i_item_sk#17, i_item_id#18] + +(23) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Right output [2]: [i_item_sk#17, i_item_id#18] +Arguments: [ss_item_sk#1], [i_item_sk#17], Inner, BuildRight + +(24) CometProject +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] +Arguments: [i_item_id#18, s_state#16, agg1#19, agg2#20, agg3#21, agg4#22], [i_item_id#18, s_state#16, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] + +(25) CometHashAggregate +Input [6]: [i_item_id#18, s_state#16, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] + +(26) CometExchange +Input [10]: [i_item_id#18, s_state#16, sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] +Arguments: hashpartitioning(i_item_id#18, s_state#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(27) CometHashAggregate +Input [10]: [i_item_id#18, s_state#16, sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] + +(28) CometScan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#31, ss_cdemo_sk#32, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#38)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(29) CometFilter +Input [8]: [ss_item_sk#31, ss_cdemo_sk#32, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] +Condition : ((isnotnull(ss_cdemo_sk#32) AND isnotnull(ss_store_sk#33)) AND isnotnull(ss_item_sk#31)) + +(30) ReusedExchange [Reuses operator id: 6] +Output [1]: [cd_demo_sk#39] + +(31) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#31, ss_cdemo_sk#32, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] +Right output [1]: [cd_demo_sk#39] +Arguments: [ss_cdemo_sk#32], [cd_demo_sk#39], Inner, BuildRight + +(32) CometProject +Input [9]: [ss_item_sk#31, ss_cdemo_sk#32, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38, cd_demo_sk#39] +Arguments: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38], [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] + +(33) ReusedExchange [Reuses operator id: 12] +Output [1]: [d_date_sk#40] + +(34) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38] +Right output [1]: [d_date_sk#40] +Arguments: [ss_sold_date_sk#38], [d_date_sk#40], Inner, BuildRight + +(35) CometProject +Input [8]: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, ss_sold_date_sk#38, d_date_sk#40] +Arguments: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37], [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37] + +(36) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#41, s_state#42] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(37) CometFilter +Input [2]: [s_store_sk#41, s_state#42] +Condition : ((isnotnull(s_state#42) AND (s_state#42 = TN)) AND isnotnull(s_store_sk#41)) + +(38) CometProject +Input [2]: [s_store_sk#41, s_state#42] +Arguments: [s_store_sk#41], [s_store_sk#41] + +(39) CometBroadcastExchange +Input [1]: [s_store_sk#41] +Arguments: [s_store_sk#41] + +(40) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37] +Right output [1]: [s_store_sk#41] +Arguments: [ss_store_sk#33], [s_store_sk#41], Inner, BuildRight + +(41) CometProject +Input [7]: [ss_item_sk#31, ss_store_sk#33, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, s_store_sk#41] +Arguments: [ss_item_sk#31, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37], [ss_item_sk#31, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37] + +(42) ReusedExchange [Reuses operator id: 22] +Output [2]: [i_item_sk#43, i_item_id#44] + +(43) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#31, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37] +Right output [2]: [i_item_sk#43, i_item_id#44] +Arguments: [ss_item_sk#31], [i_item_sk#43], Inner, BuildRight + +(44) CometProject +Input [7]: [ss_item_sk#31, ss_quantity#34, ss_list_price#35, ss_sales_price#36, ss_coupon_amt#37, i_item_sk#43, i_item_id#44] +Arguments: [i_item_id#44, agg1#45, agg2#46, agg3#47, agg4#48], [i_item_id#44, ss_quantity#34 AS agg1#45, ss_list_price#35 AS agg2#46, ss_coupon_amt#37 AS agg3#47, ss_sales_price#36 AS agg4#48] + +(45) CometHashAggregate +Input [5]: [i_item_id#44, agg1#45, agg2#46, agg3#47, agg4#48] +Keys [1]: [i_item_id#44] +Functions [4]: [partial_avg(agg1#45), partial_avg(UnscaledValue(agg2#46)), partial_avg(UnscaledValue(agg3#47)), partial_avg(UnscaledValue(agg4#48))] + +(46) CometExchange +Input [9]: [i_item_id#44, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56] +Arguments: hashpartitioning(i_item_id#44, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(47) CometHashAggregate +Input [9]: [i_item_id#44, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56] +Keys [1]: [i_item_id#44] +Functions [4]: [avg(agg1#45), avg(UnscaledValue(agg2#46)), avg(UnscaledValue(agg3#47)), avg(UnscaledValue(agg4#48))] + +(48) CometScan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#57, ss_cdemo_sk#58, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#64)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(49) CometFilter +Input [8]: [ss_item_sk#57, ss_cdemo_sk#58, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] +Condition : ((isnotnull(ss_cdemo_sk#58) AND isnotnull(ss_store_sk#59)) AND isnotnull(ss_item_sk#57)) + +(50) ReusedExchange [Reuses operator id: 6] +Output [1]: [cd_demo_sk#65] + +(51) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#57, ss_cdemo_sk#58, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] +Right output [1]: [cd_demo_sk#65] +Arguments: [ss_cdemo_sk#58], [cd_demo_sk#65], Inner, BuildRight + +(52) CometProject +Input [9]: [ss_item_sk#57, ss_cdemo_sk#58, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64, cd_demo_sk#65] +Arguments: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64], [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] + +(53) ReusedExchange [Reuses operator id: 12] +Output [1]: [d_date_sk#66] + +(54) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64] +Right output [1]: [d_date_sk#66] +Arguments: [ss_sold_date_sk#64], [d_date_sk#66], Inner, BuildRight + +(55) CometProject +Input [8]: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, ss_sold_date_sk#64, d_date_sk#66] +Arguments: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63], [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63] + +(56) ReusedExchange [Reuses operator id: 39] +Output [1]: [s_store_sk#67] + +(57) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63] +Right output [1]: [s_store_sk#67] +Arguments: [ss_store_sk#59], [s_store_sk#67], Inner, BuildRight + +(58) CometProject +Input [7]: [ss_item_sk#57, ss_store_sk#59, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, s_store_sk#67] +Arguments: [ss_item_sk#57, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63], [ss_item_sk#57, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63] + +(59) CometScan parquet spark_catalog.default.item +Output [1]: [i_item_sk#68] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(60) CometFilter +Input [1]: [i_item_sk#68] +Condition : isnotnull(i_item_sk#68) + +(61) CometBroadcastExchange +Input [1]: [i_item_sk#68] +Arguments: [i_item_sk#68] + +(62) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#57, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63] +Right output [1]: [i_item_sk#68] +Arguments: [ss_item_sk#57], [i_item_sk#68], Inner, BuildRight + +(63) CometProject +Input [6]: [ss_item_sk#57, ss_quantity#60, ss_list_price#61, ss_sales_price#62, ss_coupon_amt#63, i_item_sk#68] +Arguments: [agg1#69, agg2#70, agg3#71, agg4#72], [ss_quantity#60 AS agg1#69, ss_list_price#61 AS agg2#70, ss_coupon_amt#63 AS agg3#71, ss_sales_price#62 AS agg4#72] + +(64) CometHashAggregate +Input [4]: [agg1#69, agg2#70, agg3#71, agg4#72] +Keys: [] +Functions [4]: [partial_avg(agg1#69), partial_avg(UnscaledValue(agg2#70)), partial_avg(UnscaledValue(agg3#71)), partial_avg(UnscaledValue(agg4#72))] + +(65) CometExchange +Input [8]: [sum#73, count#74, sum#75, count#76, sum#77, count#78, sum#79, count#80] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(66) CometHashAggregate +Input [8]: [sum#73, count#74, sum#75, count#76, sum#77, count#78, sum#79, count#80] +Keys: [] +Functions [4]: [avg(agg1#69), avg(UnscaledValue(agg2#70)), avg(UnscaledValue(agg3#71)), avg(UnscaledValue(agg4#72))] + +(67) CometUnion +Child 0 Input [7]: [i_item_id#18, s_state#16, g_state#81, agg1#82, agg2#83, agg3#84, agg4#85] +Child 1 Input [7]: [i_item_id#44, s_state#86, g_state#87, agg1#88, agg2#89, agg3#90, agg4#91] +Child 2 Input [7]: [i_item_id#92, s_state#93, g_state#94, agg1#95, agg2#96, agg3#97, agg4#98] + +(68) CometTakeOrderedAndProject +Input [7]: [i_item_id#18, s_state#16, g_state#81, agg1#82, agg2#83, agg3#84, agg4#85] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[i_item_id#18 ASC NULLS FIRST,s_state#16 ASC NULLS FIRST], output=[i_item_id#18,s_state#16,g_state#81,agg1#82,agg2#83,agg3#84,agg4#85]), [i_item_id#18, s_state#16, g_state#81, agg1#82, agg2#83, agg3#84, agg4#85], 100, [i_item_id#18 ASC NULLS FIRST, s_state#16 ASC NULLS FIRST], [i_item_id#18, s_state#16, g_state#81, agg1#82, agg2#83, agg3#84, agg4#85] + +(69) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#18, s_state#16, g_state#81, agg1#82, agg2#83, agg3#84, agg4#85] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9ef5d68c7b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q27a.native_iceberg_compat/simplified.txt @@ -0,0 +1,71 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + CometUnion [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + CometHashAggregate [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4))] + CometExchange [i_item_id,s_state] #1 + CometHashAggregate [i_item_id,s_state,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4] + CometProject [ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [i_item_id,s_state,agg1,agg2,agg3,agg4] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_store_sk,s_state] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastExchange [cd_demo_sk] #2 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_state] #4 + CometFilter [s_store_sk,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_state] + CometBroadcastExchange [i_item_sk,i_item_id] #5 + CometFilter [i_item_sk,i_item_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id] + CometHashAggregate [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4))] + CometExchange [i_item_id] #6 + CometHashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4] + CometProject [ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [i_item_id,agg1,agg2,agg3,agg4] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_sk,i_item_id] + CometProject [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_store_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + ReusedExchange [cd_demo_sk] #2 + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [s_store_sk] #7 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_state] + ReusedExchange [i_item_sk,i_item_id] #5 + CometHashAggregate [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count,avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4))] + CometExchange #8 + CometHashAggregate [sum,count,sum,count,sum,count,sum,count,agg1,agg2,agg3,agg4] + CometProject [ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [agg1,agg2,agg3,agg4] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_sk] + CometProject [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_store_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk,cd_demo_sk] + CometFilter [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + ReusedExchange [cd_demo_sk] #2 + ReusedExchange [d_date_sk] #3 + ReusedExchange [s_store_sk] #7 + CometBroadcastExchange [i_item_sk] #9 + CometFilter [i_item_sk] + CometScan parquet spark_catalog.default.item [i_item_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_datafusion/explain.txt new file mode 100644 index 0000000000..15052b5542 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_datafusion/explain.txt @@ -0,0 +1,168 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometSort (31) + +- CometColumnarExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (24) + : +- CometHashAggregate (23) + : +- CometExchange (22) + : +- CometHashAggregate (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (15) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometNativeScan: `spark_catalog`.`default`.`customer` (25) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6, d_year#7, d_dom#8] + +(4) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9, s_county#10] + +(10) CometFilter +Input [2]: [s_store_sk#9, s_county#10] +Condition : ((isnotnull(s_county#10) AND (s_county#10 = Williamson County)) AND isnotnull(s_store_sk#9)) + +(11) CometProject +Input [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9], [s_store_sk#9] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: [s_store_sk#9] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Right output [1]: [s_store_sk#9] +Arguments: [ss_store_sk#3], [s_store_sk#9], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] + +(15) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(16) CometFilter +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(hd_dep_count#13 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(hd_vehicle_count#14 as double)))))) > 1.2) END) AND isnotnull(hd_demo_sk#11)) + +(17) CometProject +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11], [hd_demo_sk#11] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: [hd_demo_sk#11] + +(19) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Right output [1]: [hd_demo_sk#11] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#11], Inner, BuildRight + +(20) CometProject +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Arguments: [ss_customer_sk#1, ss_ticket_number#4], [ss_customer_sk#1, ss_ticket_number#4] + +(21) CometHashAggregate +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] + +(22) CometExchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] + +(24) CometFilter +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Condition : ((cnt#16 >= 15) AND (cnt#16 <= 20)) + +(25) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(26) CometFilter +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Condition : isnotnull(c_customer_sk#17) + +(27) CometBroadcastExchange +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(28) CometBroadcastHashJoin +Left output [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Right output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [ss_customer_sk#1], [c_customer_sk#17], Inner, BuildRight + +(29) CometProject +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16, c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + +(30) CometColumnarExchange +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: rangepartitioning(c_last_name#20 ASC NULLS FIRST, c_first_name#19 ASC NULLS FIRST, c_salutation#18 ASC NULLS FIRST, c_preferred_cust_flag#21 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(31) CometSort +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20 ASC NULLS FIRST, c_first_name#19 ASC NULLS FIRST, c_salutation#18 ASC NULLS FIRST, c_preferred_cust_flag#21 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST] + +(32) ColumnarToRow [codegen id : 1] +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_datafusion/simplified.txt new file mode 100644 index 0000000000..726262ac46 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_datafusion/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometColumnarExchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number] #1 + CometProject [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,cnt,c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometFilter [ss_ticket_number,ss_customer_sk,cnt] + CometHashAggregate [ss_ticket_number,ss_customer_sk,cnt,count,count(1)] + CometExchange [ss_ticket_number,ss_customer_sk] #2 + CometHashAggregate [ss_ticket_number,ss_customer_sk,count] + CometProject [ss_customer_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_county] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_county] + CometBroadcastExchange [hd_demo_sk] #5 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] #6 + CometFilter [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..0860b110b3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_iceberg_compat/explain.txt @@ -0,0 +1,184 @@ +== Physical Plan == +* ColumnarToRow (32) ++- CometSort (31) + +- CometColumnarExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (24) + : +- CometHashAggregate (23) + : +- CometExchange (22) + : +- CometHashAggregate (21) + : +- CometProject (20) + : +- CometBroadcastHashJoin (19) + : :- CometProject (14) + : : +- CometBroadcastHashJoin (13) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (12) + : : +- CometProject (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.store (9) + : +- CometBroadcastExchange (18) + : +- CometProject (17) + : +- CometFilter (16) + : +- CometScan parquet spark_catalog.default.household_demographics (15) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometScan parquet spark_catalog.default.customer (25) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] + +(9) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#9, s_county#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [s_store_sk#9, s_county#10] +Condition : ((isnotnull(s_county#10) AND (s_county#10 = Williamson County)) AND isnotnull(s_store_sk#9)) + +(11) CometProject +Input [2]: [s_store_sk#9, s_county#10] +Arguments: [s_store_sk#9], [s_store_sk#9] + +(12) CometBroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: [s_store_sk#9] + +(13) CometBroadcastHashJoin +Left output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Right output [1]: [s_store_sk#9] +Arguments: [ss_store_sk#3], [s_store_sk#9], Inner, BuildRight + +(14) CometProject +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] +Arguments: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4], [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] + +(15) CometScan parquet spark_catalog.default.household_demographics +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(16) CometFilter +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN (knownfloatingpointnormalized(normalizenanandzero((cast(hd_dep_count#13 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(hd_vehicle_count#14 as double)))))) > 1.2) END) AND isnotnull(hd_demo_sk#11)) + +(17) CometProject +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Arguments: [hd_demo_sk#11], [hd_demo_sk#11] + +(18) CometBroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: [hd_demo_sk#11] + +(19) CometBroadcastHashJoin +Left output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Right output [1]: [hd_demo_sk#11] +Arguments: [ss_hdemo_sk#2], [hd_demo_sk#11], Inner, BuildRight + +(20) CometProject +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Arguments: [ss_customer_sk#1, ss_ticket_number#4], [ss_customer_sk#1, ss_ticket_number#4] + +(21) CometHashAggregate +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] + +(22) CometExchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(23) CometHashAggregate +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] + +(24) CometFilter +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Condition : ((cnt#16 >= 15) AND (cnt#16 <= 20)) + +(25) CometScan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(26) CometFilter +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Condition : isnotnull(c_customer_sk#17) + +(27) CometBroadcastExchange +Input [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] + +(28) CometBroadcastHashJoin +Left output [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16] +Right output [5]: [c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [ss_customer_sk#1], [c_customer_sk#17], Inner, BuildRight + +(29) CometProject +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#16, c_customer_sk#17, c_salutation#18, c_first_name#19, c_last_name#20, c_preferred_cust_flag#21] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + +(30) CometColumnarExchange +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: rangepartitioning(c_last_name#20 ASC NULLS FIRST, c_first_name#19 ASC NULLS FIRST, c_salutation#18 ASC NULLS FIRST, c_preferred_cust_flag#21 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=2] + +(31) CometSort +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] +Arguments: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16], [c_last_name#20 ASC NULLS FIRST, c_first_name#19 ASC NULLS FIRST, c_salutation#18 ASC NULLS FIRST, c_preferred_cust_flag#21 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST] + +(32) ColumnarToRow [codegen id : 1] +Input [6]: [c_last_name#20, c_first_name#19, c_salutation#18, c_preferred_cust_flag#21, ss_ticket_number#4, cnt#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..50b93575c6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q34.native_iceberg_compat/simplified.txt @@ -0,0 +1,34 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometColumnarExchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number] #1 + CometProject [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + CometBroadcastHashJoin [ss_ticket_number,ss_customer_sk,cnt,c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometFilter [ss_ticket_number,ss_customer_sk,cnt] + CometHashAggregate [ss_ticket_number,ss_customer_sk,cnt,count,count(1)] + CometExchange [ss_ticket_number,ss_customer_sk] #2 + CometHashAggregate [ss_ticket_number,ss_customer_sk,count] + CometProject [ss_customer_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,hd_demo_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,s_store_sk] + CometProject [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + CometBroadcastHashJoin [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk,d_date_sk] + CometFilter [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_dom] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + CometBroadcastExchange [s_store_sk] #4 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_county] + CometScan parquet spark_catalog.default.store [s_store_sk,s_county] + CometBroadcastExchange [hd_demo_sk] #5 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + CometBroadcastExchange [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] #6 + CometFilter [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_datafusion/explain.txt new file mode 100644 index 0000000000..5bad8f9a0c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_datafusion/explain.txt @@ -0,0 +1,209 @@ +== Physical Plan == +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * Filter (22) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (21) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (13) + : : : +- ReusedExchange (14) + : : +- ReusedExchange (20) + : +- BroadcastExchange (27) + : +- * ColumnarToRow (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (24) + +- BroadcastExchange (33) + +- * ColumnarToRow (32) + +- CometFilter (31) + +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (30) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Arguments: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(2) CometFilter +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#6, ss_sold_date_sk#7] + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Arguments: [d_date_sk#8, d_year#9, d_qoy#10] + +(5) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ss_customer_sk#6], [ss_customer_sk#6] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Right output [1]: [ss_customer_sk#6] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(13) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Arguments: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#13] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Right output [1]: [d_date_sk#13] +Arguments: [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Arguments: [ws_bill_customer_sk#11], [ws_bill_customer_sk#11] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#11] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(20) ReusedExchange [Reuses operator id: 18] +Output [1]: [cs_ship_customer_sk#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(22) Filter [codegen id : 5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(23) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#15, ca_state#16] +Arguments: [ca_address_sk#15, ca_state#16] + +(25) CometFilter +Input [2]: [ca_address_sk#15, ca_state#16] +Condition : isnotnull(ca_address_sk#15) + +(26) ColumnarToRow [codegen id : 3] +Input [2]: [ca_address_sk#15, ca_state#16] + +(27) BroadcastExchange +Input [2]: [ca_address_sk#15, ca_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#15] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, ca_state#16] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#15, ca_state#16] + +(30) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Arguments: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] + +(31) CometFilter +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Condition : isnotnull(cd_demo_sk#17) + +(32) ColumnarToRow [codegen id : 4] +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] + +(33) BroadcastExchange +Input [6]: [cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(34) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 5] +Output [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Input [8]: [c_current_cdemo_sk#4, ca_state#16, cd_demo_sk#17, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] + +(36) HashAggregate [codegen id : 5] +Input [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Keys [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#20), partial_max(cd_dep_count#20), partial_sum(cd_dep_count#20), partial_avg(cd_dep_employed_count#21), partial_max(cd_dep_employed_count#21), partial_sum(cd_dep_employed_count#21), partial_avg(cd_dep_college_count#22), partial_max(cd_dep_college_count#22), partial_sum(cd_dep_college_count#22)] +Aggregate Attributes [13]: [count#23, sum#24, count#25, max#26, sum#27, sum#28, count#29, max#30, sum#31, sum#32, count#33, max#34, sum#35] +Results [19]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, count#36, sum#37, count#38, max#39, sum#40, sum#41, count#42, max#43, sum#44, sum#45, count#46, max#47, sum#48] + +(37) Exchange +Input [19]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, count#36, sum#37, count#38, max#39, sum#40, sum#41, count#42, max#43, sum#44, sum#45, count#46, max#47, sum#48] +Arguments: hashpartitioning(ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(38) HashAggregate [codegen id : 6] +Input [19]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22, count#36, sum#37, count#38, max#39, sum#40, sum#41, count#42, max#43, sum#44, sum#45, count#46, max#47, sum#48] +Keys [6]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cd_dep_employed_count#21, cd_dep_college_count#22] +Functions [10]: [count(1), avg(cd_dep_count#20), max(cd_dep_count#20), sum(cd_dep_count#20), avg(cd_dep_employed_count#21), max(cd_dep_employed_count#21), sum(cd_dep_employed_count#21), avg(cd_dep_college_count#22), max(cd_dep_college_count#22), sum(cd_dep_college_count#22)] +Aggregate Attributes [10]: [count(1)#49, avg(cd_dep_count#20)#50, max(cd_dep_count#20)#51, sum(cd_dep_count#20)#52, avg(cd_dep_employed_count#21)#53, max(cd_dep_employed_count#21)#54, sum(cd_dep_employed_count#21)#55, avg(cd_dep_college_count#22)#56, max(cd_dep_college_count#22)#57, sum(cd_dep_college_count#22)#58] +Results [18]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, count(1)#49 AS cnt1#59, avg(cd_dep_count#20)#50 AS avg(cd_dep_count)#60, max(cd_dep_count#20)#51 AS max(cd_dep_count)#61, sum(cd_dep_count#20)#52 AS sum(cd_dep_count)#62, cd_dep_employed_count#21, count(1)#49 AS cnt2#63, avg(cd_dep_employed_count#21)#53 AS avg(cd_dep_employed_count)#64, max(cd_dep_employed_count#21)#54 AS max(cd_dep_employed_count)#65, sum(cd_dep_employed_count#21)#55 AS sum(cd_dep_employed_count)#66, cd_dep_college_count#22, count(1)#49 AS cnt3#67, avg(cd_dep_college_count#22)#56 AS avg(cd_dep_college_count)#68, max(cd_dep_college_count#22)#57 AS max(cd_dep_college_count)#69, sum(cd_dep_college_count#22)#58 AS sum(cd_dep_college_count)#70] + +(39) TakeOrderedAndProject +Input [18]: [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cnt1#59, avg(cd_dep_count)#60, max(cd_dep_count)#61, sum(cd_dep_count)#62, cd_dep_employed_count#21, cnt2#63, avg(cd_dep_employed_count)#64, max(cd_dep_employed_count)#65, sum(cd_dep_employed_count)#66, cd_dep_college_count#22, cnt3#67, avg(cd_dep_college_count)#68, max(cd_dep_college_count)#69, sum(cd_dep_college_count)#70] +Arguments: 100, [ca_state#16 ASC NULLS FIRST, cd_gender#18 ASC NULLS FIRST, cd_marital_status#19 ASC NULLS FIRST, cd_dep_count#20 ASC NULLS FIRST, cd_dep_employed_count#21 ASC NULLS FIRST, cd_dep_college_count#22 ASC NULLS FIRST], [ca_state#16, cd_gender#18, cd_marital_status#19, cd_dep_count#20, cnt1#59, avg(cd_dep_count)#60, max(cd_dep_count)#61, sum(cd_dep_count)#62, cd_dep_employed_count#21, cnt2#63, avg(cd_dep_employed_count)#64, max(cd_dep_employed_count)#65, sum(cd_dep_employed_count)#66, cd_dep_college_count#22, cnt3#67, avg(cd_dep_college_count)#68, max(cd_dep_college_count)#69, sum(cd_dep_college_count)#70] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_datafusion/simplified.txt new file mode 100644 index 0000000000..7bd4e9bfca --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_datafusion/simplified.txt @@ -0,0 +1,53 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count)] + WholeStageCodegen (6) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] [count(1),avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (5) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [cs_ship_customer_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ad2994bb2f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_iceberg_compat/explain.txt @@ -0,0 +1,255 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (34) + : +- * BroadcastHashJoin Inner BuildRight (33) + : :- * Project (28) + : : +- * Filter (27) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (26) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (19) + : : : :- * ColumnarToRow (12) + : : : : +- CometBroadcastHashJoin (11) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : : +- CometBroadcastExchange (10) + : : : : +- CometProject (9) + : : : : +- CometBroadcastHashJoin (8) + : : : : :- CometScan parquet spark_catalog.default.store_sales (3) + : : : : +- CometBroadcastExchange (7) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometScan parquet spark_catalog.default.date_dim (4) + : : : +- BroadcastExchange (18) + : : : +- * ColumnarToRow (17) + : : : +- CometProject (16) + : : : +- CometBroadcastHashJoin (15) + : : : :- CometScan parquet spark_catalog.default.web_sales (13) + : : : +- ReusedExchange (14) + : : +- BroadcastExchange (25) + : : +- * ColumnarToRow (24) + : : +- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometScan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (21) + : +- BroadcastExchange (32) + : +- * ColumnarToRow (31) + : +- CometFilter (30) + : +- CometScan parquet spark_catalog.default.customer_address (29) + +- BroadcastExchange (38) + +- * ColumnarToRow (37) + +- CometFilter (36) + +- CometScan parquet spark_catalog.default.customer_demographics (35) + + +(1) CometScan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +ReadSchema: struct + +(4) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(6) CometProject +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ss_customer_sk#6], [ss_customer_sk#6] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Right output [1]: [ss_customer_sk#6] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], LeftSemi, BuildRight + +(12) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(13) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#12)] +ReadSchema: struct + +(14) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#13] + +(15) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Right output [1]: [d_date_sk#13] +Arguments: [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + +(16) CometProject +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Arguments: [ws_bill_customer_sk#11], [ws_bill_customer_sk#11] + +(17) ColumnarToRow [codegen id : 1] +Input [1]: [ws_bill_customer_sk#11] + +(18) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(20) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +ReadSchema: struct + +(21) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#16] + +(22) CometBroadcastHashJoin +Left output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Right output [1]: [d_date_sk#16] +Arguments: [cs_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight + +(23) CometProject +Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] +Arguments: [cs_ship_customer_sk#14], [cs_ship_customer_sk#14] + +(24) ColumnarToRow [codegen id : 2] +Input [1]: [cs_ship_customer_sk#14] + +(25) BroadcastExchange +Input [1]: [cs_ship_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(27) Filter [codegen id : 5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(28) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(29) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(30) CometFilter +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) + +(31) ColumnarToRow [codegen id : 3] +Input [2]: [ca_address_sk#17, ca_state#18] + +(32) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(33) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 5] +Output [2]: [c_current_cdemo_sk#4, ca_state#18] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#17, ca_state#18] + +(35) CometScan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(36) CometFilter +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Condition : isnotnull(cd_demo_sk#19) + +(37) ColumnarToRow [codegen id : 4] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(38) BroadcastExchange +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(39) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 5] +Output [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Input [8]: [c_current_cdemo_sk#4, ca_state#18, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(41) HashAggregate [codegen id : 5] +Input [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#22), partial_max(cd_dep_count#22), partial_sum(cd_dep_count#22), partial_avg(cd_dep_employed_count#23), partial_max(cd_dep_employed_count#23), partial_sum(cd_dep_employed_count#23), partial_avg(cd_dep_college_count#24), partial_max(cd_dep_college_count#24), partial_sum(cd_dep_college_count#24)] +Aggregate Attributes [13]: [count#25, sum#26, count#27, max#28, sum#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37] +Results [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] + +(42) Exchange +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] +Arguments: hashpartitioning(ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(43) HashAggregate [codegen id : 6] +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [count(1), avg(cd_dep_count#22), max(cd_dep_count#22), sum(cd_dep_count#22), avg(cd_dep_employed_count#23), max(cd_dep_employed_count#23), sum(cd_dep_employed_count#23), avg(cd_dep_college_count#24), max(cd_dep_college_count#24), sum(cd_dep_college_count#24)] +Aggregate Attributes [10]: [count(1)#51, avg(cd_dep_count#22)#52, max(cd_dep_count#22)#53, sum(cd_dep_count#22)#54, avg(cd_dep_employed_count#23)#55, max(cd_dep_employed_count#23)#56, sum(cd_dep_employed_count#23)#57, avg(cd_dep_college_count#24)#58, max(cd_dep_college_count#24)#59, sum(cd_dep_college_count#24)#60] +Results [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, count(1)#51 AS cnt1#61, avg(cd_dep_count#22)#52 AS avg(cd_dep_count)#62, max(cd_dep_count#22)#53 AS max(cd_dep_count)#63, sum(cd_dep_count#22)#54 AS sum(cd_dep_count)#64, cd_dep_employed_count#23, count(1)#51 AS cnt2#65, avg(cd_dep_employed_count#23)#55 AS avg(cd_dep_employed_count)#66, max(cd_dep_employed_count#23)#56 AS max(cd_dep_employed_count)#67, sum(cd_dep_employed_count#23)#57 AS sum(cd_dep_employed_count)#68, cd_dep_college_count#24, count(1)#51 AS cnt3#69, avg(cd_dep_college_count#24)#58 AS avg(cd_dep_college_count)#70, max(cd_dep_college_count#24)#59 AS max(cd_dep_college_count)#71, sum(cd_dep_college_count#24)#60 AS sum(cd_dep_college_count)#72] + +(44) TakeOrderedAndProject +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#61, avg(cd_dep_count)#62, max(cd_dep_count)#63, sum(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, avg(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, sum(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, avg(cd_dep_college_count)#70, max(cd_dep_college_count)#71, sum(cd_dep_college_count)#72] +Arguments: 100, [ca_state#18 ASC NULLS FIRST, cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_dep_count#22 ASC NULLS FIRST, cd_dep_employed_count#23 ASC NULLS FIRST, cd_dep_college_count#24 ASC NULLS FIRST], [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#61, avg(cd_dep_count)#62, max(cd_dep_count)#63, sum(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, avg(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, sum(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, avg(cd_dep_college_count)#70, max(cd_dep_college_count)#71, sum(cd_dep_college_count)#72] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a6d7a0be1b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35.native_iceberg_compat/simplified.txt @@ -0,0 +1,60 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count)] + WholeStageCodegen (6) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] [count(1),avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (5) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [ws_bill_customer_sk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometProject [cs_ship_customer_sk] + CometBroadcastHashJoin [cs_ship_customer_sk,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_datafusion/explain.txt new file mode 100644 index 0000000000..9fda42eeea --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_datafusion/explain.txt @@ -0,0 +1,199 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometTakeOrderedAndProject (37) + +- CometHashAggregate (36) + +- CometExchange (35) + +- CometHashAggregate (34) + +- CometProject (33) + +- CometBroadcastHashJoin (32) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometBroadcastHashJoin (11) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : +- CometBroadcastExchange (10) + : : : +- CometProject (9) + : : : +- CometBroadcastHashJoin (8) + : : : :- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (7) + : : : +- CometProject (6) + : : : +- CometFilter (5) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (4) + : : +- CometBroadcastExchange (21) + : : +- CometUnion (20) + : : :- CometProject (15) + : : : +- CometBroadcastHashJoin (14) + : : : :- CometNativeScan: `spark_catalog`.`default`.`web_sales` (12) + : : : +- ReusedExchange (13) + : : +- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (16) + : : +- ReusedExchange (17) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (24) + +- CometBroadcastExchange (31) + +- CometFilter (30) + +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (29) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(2) CometFilter +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Arguments: [ss_customer_sk#4, ss_sold_date_sk#5] + +(4) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Arguments: [d_date_sk#6, d_year#7, d_qoy#8] + +(5) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Condition : ((((isnotnull(d_year#7) AND isnotnull(d_qoy#8)) AND (d_year#7 = 1999)) AND (d_qoy#8 < 4)) AND isnotnull(d_date_sk#6)) + +(6) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#4], [ss_customer_sk#4] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: [ss_customer_sk#4] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [ss_customer_sk#4] +Arguments: [c_customer_sk#1], [ss_customer_sk#4], LeftSemi, BuildRight + +(12) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Arguments: [ws_bill_customer_sk#9, ws_sold_date_sk#10] + +(13) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#11] + +(14) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Right output [1]: [d_date_sk#11] +Arguments: [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(15) CometProject +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] +Arguments: [customsk#12], [ws_bill_customer_sk#9 AS customsk#12] + +(16) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Arguments: [cs_ship_customer_sk#13, cs_sold_date_sk#14] + +(17) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#15] + +(18) CometBroadcastHashJoin +Left output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Right output [1]: [d_date_sk#15] +Arguments: [cs_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + +(19) CometProject +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] +Arguments: [customsk#16], [cs_ship_customer_sk#13 AS customsk#16] + +(20) CometUnion +Child 0 Input [1]: [customsk#12] +Child 1 Input [1]: [customsk#16] + +(21) CometBroadcastExchange +Input [1]: [customsk#12] +Arguments: [customsk#12] + +(22) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [customsk#12] +Arguments: [c_customer_sk#1], [customsk#12], LeftSemi, BuildRight + +(23) CometProject +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_current_cdemo_sk#2, c_current_addr_sk#3], [c_current_cdemo_sk#2, c_current_addr_sk#3] + +(24) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#17, ca_state#18] +Arguments: [ca_address_sk#17, ca_state#18] + +(25) CometFilter +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) + +(26) CometBroadcastExchange +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: [ca_address_sk#17, ca_state#18] + +(27) CometBroadcastHashJoin +Left output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [2]: [ca_address_sk#17, ca_state#18] +Arguments: [c_current_addr_sk#3], [ca_address_sk#17], Inner, BuildRight + +(28) CometProject +Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17, ca_state#18] +Arguments: [c_current_cdemo_sk#2, ca_state#18], [c_current_cdemo_sk#2, ca_state#18] + +(29) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(30) CometFilter +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Condition : isnotnull(cd_demo_sk#19) + +(31) CometBroadcastExchange +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(32) CometBroadcastHashJoin +Left output [2]: [c_current_cdemo_sk#2, ca_state#18] +Right output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: [c_current_cdemo_sk#2], [cd_demo_sk#19], Inner, BuildRight + +(33) CometProject +Input [8]: [c_current_cdemo_sk#2, ca_state#18, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24], [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(34) CometHashAggregate +Input [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#22), partial_max(cd_dep_count#22), partial_sum(cd_dep_count#22), partial_avg(cd_dep_employed_count#23), partial_max(cd_dep_employed_count#23), partial_sum(cd_dep_employed_count#23), partial_avg(cd_dep_college_count#24), partial_max(cd_dep_college_count#24), partial_sum(cd_dep_college_count#24)] + +(35) CometExchange +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#25, sum#26, count#27, max#28, sum#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37] +Arguments: hashpartitioning(ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(36) CometHashAggregate +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#25, sum#26, count#27, max#28, sum#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [count(1), avg(cd_dep_count#22), max(cd_dep_count#22), sum(cd_dep_count#22), avg(cd_dep_employed_count#23), max(cd_dep_employed_count#23), sum(cd_dep_employed_count#23), avg(cd_dep_college_count#24), max(cd_dep_college_count#24), sum(cd_dep_college_count#24)] + +(37) CometTakeOrderedAndProject +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#38, avg(cd_dep_count)#39, max(cd_dep_count)#40, sum(cd_dep_count)#41, cd_dep_employed_count#23, cnt2#42, avg(cd_dep_employed_count)#43, max(cd_dep_employed_count)#44, sum(cd_dep_employed_count)#45, cd_dep_college_count#24, cnt3#46, avg(cd_dep_college_count)#47, max(cd_dep_college_count)#48, sum(cd_dep_college_count)#49] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ca_state#18 ASC NULLS FIRST,cd_gender#20 ASC NULLS FIRST,cd_marital_status#21 ASC NULLS FIRST,cd_dep_count#22 ASC NULLS FIRST,cd_dep_employed_count#23 ASC NULLS FIRST,cd_dep_college_count#24 ASC NULLS FIRST], output=[ca_state#18,cd_gender#20,cd_marital_status#21,cd_dep_count#22,cnt1#38,avg(cd_dep_count)#39,max(cd_dep_count)#40,sum(cd_dep_count)#41,cd_dep_employed_count#23,cnt2#42,avg(cd_dep_employed_count)#43,max(cd_dep_employed_count)#44,sum(cd_dep_employed_count)#45,cd_dep_college_count#24,cnt3#46,avg(cd_dep_college_count)#47,max(cd_dep_college_count)#48,sum(cd_dep_college_count)#49]), [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#38, avg(cd_dep_count)#39, max(cd_dep_count)#40, sum(cd_dep_count)#41, cd_dep_employed_count#23, cnt2#42, avg(cd_dep_employed_count)#43, max(cd_dep_employed_count)#44, sum(cd_dep_employed_count)#45, cd_dep_college_count#24, cnt3#46, avg(cd_dep_college_count)#47, max(cd_dep_college_count)#48, sum(cd_dep_college_count)#49], 100, [ca_state#18 ASC NULLS FIRST, cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_dep_count#22 ASC NULLS FIRST, cd_dep_employed_count#23 ASC NULLS FIRST, cd_dep_college_count#24 ASC NULLS FIRST], [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#38, avg(cd_dep_count)#39, max(cd_dep_count)#40, sum(cd_dep_count)#41, cd_dep_employed_count#23, cnt2#42, avg(cd_dep_employed_count)#43, max(cd_dep_employed_count)#44, sum(cd_dep_employed_count)#45, cd_dep_college_count#24, cnt3#46, avg(cd_dep_college_count)#47, max(cd_dep_college_count)#48, sum(cd_dep_college_count)#49] + +(38) ColumnarToRow [codegen id : 1] +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#38, avg(cd_dep_count)#39, max(cd_dep_count)#40, sum(cd_dep_count)#41, cd_dep_employed_count#23, cnt2#42, avg(cd_dep_employed_count)#43, max(cd_dep_employed_count)#44, sum(cd_dep_employed_count)#45, cd_dep_college_count#24, cnt3#46, avg(cd_dep_college_count)#47, max(cd_dep_college_count)#48, sum(cd_dep_college_count)#49] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b1134d0f8d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_datafusion/simplified.txt @@ -0,0 +1,40 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cd_dep_employed_count,cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cd_dep_college_count,cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count)] + CometHashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cd_dep_employed_count,cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cd_dep_college_count,cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum,count(1),avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count)] + CometExchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + CometHashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + CometProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometBroadcastHashJoin [c_current_cdemo_sk,ca_state,cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometProject [c_current_cdemo_sk,ca_state] + CometBroadcastHashJoin [c_current_cdemo_sk,c_current_addr_sk,ca_address_sk,ca_state] + CometProject [c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,customsk] + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [customsk] #4 + CometUnion [customsk] + CometProject [ws_bill_customer_sk] [customsk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + CometProject [cs_ship_customer_sk] [customsk] + CometBroadcastHashJoin [cs_ship_customer_sk,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_customer_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [ca_address_sk,ca_state] #5 + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #6 + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f047d46b7a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_iceberg_compat/explain.txt @@ -0,0 +1,220 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometTakeOrderedAndProject (37) + +- CometHashAggregate (36) + +- CometExchange (35) + +- CometHashAggregate (34) + +- CometProject (33) + +- CometBroadcastHashJoin (32) + :- CometProject (28) + : +- CometBroadcastHashJoin (27) + : :- CometProject (23) + : : +- CometBroadcastHashJoin (22) + : : :- CometBroadcastHashJoin (11) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : +- CometBroadcastExchange (10) + : : : +- CometProject (9) + : : : +- CometBroadcastHashJoin (8) + : : : :- CometScan parquet spark_catalog.default.store_sales (3) + : : : +- CometBroadcastExchange (7) + : : : +- CometProject (6) + : : : +- CometFilter (5) + : : : +- CometScan parquet spark_catalog.default.date_dim (4) + : : +- CometBroadcastExchange (21) + : : +- CometUnion (20) + : : :- CometProject (15) + : : : +- CometBroadcastHashJoin (14) + : : : :- CometScan parquet spark_catalog.default.web_sales (12) + : : : +- ReusedExchange (13) + : : +- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometScan parquet spark_catalog.default.catalog_sales (16) + : : +- ReusedExchange (17) + : +- CometBroadcastExchange (26) + : +- CometFilter (25) + : +- CometScan parquet spark_catalog.default.customer_address (24) + +- CometBroadcastExchange (31) + +- CometFilter (30) + +- CometScan parquet spark_catalog.default.customer_demographics (29) + + +(1) CometScan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +ReadSchema: struct + +(4) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,1999), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Condition : ((((isnotnull(d_year#7) AND isnotnull(d_qoy#8)) AND (d_year#7 = 1999)) AND (d_qoy#8 < 4)) AND isnotnull(d_date_sk#6)) + +(6) CometProject +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(7) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(8) CometBroadcastHashJoin +Left output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(9) CometProject +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_customer_sk#4], [ss_customer_sk#4] + +(10) CometBroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: [ss_customer_sk#4] + +(11) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [ss_customer_sk#4] +Arguments: [c_customer_sk#1], [ss_customer_sk#4], LeftSemi, BuildRight + +(12) CometScan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#10)] +ReadSchema: struct + +(13) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#11] + +(14) CometBroadcastHashJoin +Left output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Right output [1]: [d_date_sk#11] +Arguments: [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + +(15) CometProject +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] +Arguments: [customsk#12], [ws_bill_customer_sk#9 AS customsk#12] + +(16) CometScan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#14)] +ReadSchema: struct + +(17) ReusedExchange [Reuses operator id: 7] +Output [1]: [d_date_sk#15] + +(18) CometBroadcastHashJoin +Left output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Right output [1]: [d_date_sk#15] +Arguments: [cs_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + +(19) CometProject +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] +Arguments: [customsk#16], [cs_ship_customer_sk#13 AS customsk#16] + +(20) CometUnion +Child 0 Input [1]: [customsk#12] +Child 1 Input [1]: [customsk#16] + +(21) CometBroadcastExchange +Input [1]: [customsk#12] +Arguments: [customsk#12] + +(22) CometBroadcastHashJoin +Left output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [1]: [customsk#12] +Arguments: [c_customer_sk#1], [customsk#12], LeftSemi, BuildRight + +(23) CometProject +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Arguments: [c_current_cdemo_sk#2, c_current_addr_sk#3], [c_current_cdemo_sk#2, c_current_addr_sk#3] + +(24) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(25) CometFilter +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) + +(26) CometBroadcastExchange +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: [ca_address_sk#17, ca_state#18] + +(27) CometBroadcastHashJoin +Left output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Right output [2]: [ca_address_sk#17, ca_state#18] +Arguments: [c_current_addr_sk#3], [ca_address_sk#17], Inner, BuildRight + +(28) CometProject +Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17, ca_state#18] +Arguments: [c_current_cdemo_sk#2, ca_state#18], [c_current_cdemo_sk#2, ca_state#18] + +(29) CometScan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(30) CometFilter +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Condition : isnotnull(cd_demo_sk#19) + +(31) CometBroadcastExchange +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(32) CometBroadcastHashJoin +Left output [2]: [c_current_cdemo_sk#2, ca_state#18] +Right output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: [c_current_cdemo_sk#2], [cd_demo_sk#19], Inner, BuildRight + +(33) CometProject +Input [8]: [c_current_cdemo_sk#2, ca_state#18, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24], [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(34) CometHashAggregate +Input [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#22), partial_max(cd_dep_count#22), partial_sum(cd_dep_count#22), partial_avg(cd_dep_employed_count#23), partial_max(cd_dep_employed_count#23), partial_sum(cd_dep_employed_count#23), partial_avg(cd_dep_college_count#24), partial_max(cd_dep_college_count#24), partial_sum(cd_dep_college_count#24)] + +(35) CometExchange +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#25, sum#26, count#27, max#28, sum#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37] +Arguments: hashpartitioning(ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(36) CometHashAggregate +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#25, sum#26, count#27, max#28, sum#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [count(1), avg(cd_dep_count#22), max(cd_dep_count#22), sum(cd_dep_count#22), avg(cd_dep_employed_count#23), max(cd_dep_employed_count#23), sum(cd_dep_employed_count#23), avg(cd_dep_college_count#24), max(cd_dep_college_count#24), sum(cd_dep_college_count#24)] + +(37) CometTakeOrderedAndProject +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#38, avg(cd_dep_count)#39, max(cd_dep_count)#40, sum(cd_dep_count)#41, cd_dep_employed_count#23, cnt2#42, avg(cd_dep_employed_count)#43, max(cd_dep_employed_count)#44, sum(cd_dep_employed_count)#45, cd_dep_college_count#24, cnt3#46, avg(cd_dep_college_count)#47, max(cd_dep_college_count)#48, sum(cd_dep_college_count)#49] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[ca_state#18 ASC NULLS FIRST,cd_gender#20 ASC NULLS FIRST,cd_marital_status#21 ASC NULLS FIRST,cd_dep_count#22 ASC NULLS FIRST,cd_dep_employed_count#23 ASC NULLS FIRST,cd_dep_college_count#24 ASC NULLS FIRST], output=[ca_state#18,cd_gender#20,cd_marital_status#21,cd_dep_count#22,cnt1#38,avg(cd_dep_count)#39,max(cd_dep_count)#40,sum(cd_dep_count)#41,cd_dep_employed_count#23,cnt2#42,avg(cd_dep_employed_count)#43,max(cd_dep_employed_count)#44,sum(cd_dep_employed_count)#45,cd_dep_college_count#24,cnt3#46,avg(cd_dep_college_count)#47,max(cd_dep_college_count)#48,sum(cd_dep_college_count)#49]), [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#38, avg(cd_dep_count)#39, max(cd_dep_count)#40, sum(cd_dep_count)#41, cd_dep_employed_count#23, cnt2#42, avg(cd_dep_employed_count)#43, max(cd_dep_employed_count)#44, sum(cd_dep_employed_count)#45, cd_dep_college_count#24, cnt3#46, avg(cd_dep_college_count)#47, max(cd_dep_college_count)#48, sum(cd_dep_college_count)#49], 100, [ca_state#18 ASC NULLS FIRST, cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_dep_count#22 ASC NULLS FIRST, cd_dep_employed_count#23 ASC NULLS FIRST, cd_dep_college_count#24 ASC NULLS FIRST], [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#38, avg(cd_dep_count)#39, max(cd_dep_count)#40, sum(cd_dep_count)#41, cd_dep_employed_count#23, cnt2#42, avg(cd_dep_employed_count)#43, max(cd_dep_employed_count)#44, sum(cd_dep_employed_count)#45, cd_dep_college_count#24, cnt3#46, avg(cd_dep_college_count)#47, max(cd_dep_college_count)#48, sum(cd_dep_college_count)#49] + +(38) ColumnarToRow [codegen id : 1] +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#38, avg(cd_dep_count)#39, max(cd_dep_count)#40, sum(cd_dep_count)#41, cd_dep_employed_count#23, cnt2#42, avg(cd_dep_employed_count)#43, max(cd_dep_employed_count)#44, sum(cd_dep_employed_count)#45, cd_dep_college_count#24, cnt3#46, avg(cd_dep_college_count)#47, max(cd_dep_college_count)#48, sum(cd_dep_college_count)#49] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c84f0a6481 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q35a.native_iceberg_compat/simplified.txt @@ -0,0 +1,40 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cd_dep_employed_count,cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cd_dep_college_count,cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count)] + CometHashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cd_dep_employed_count,cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cd_dep_college_count,cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum,count(1),avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count)] + CometExchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + CometHashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + CometProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometBroadcastHashJoin [c_current_cdemo_sk,ca_state,cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometProject [c_current_cdemo_sk,ca_state] + CometBroadcastHashJoin [c_current_cdemo_sk,c_current_addr_sk,ca_address_sk,ca_state] + CometProject [c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,customsk] + CometBroadcastHashJoin [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,ss_customer_sk] + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + CometBroadcastExchange [ss_customer_sk] #2 + CometProject [ss_customer_sk] + CometBroadcastHashJoin [ss_customer_sk,ss_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #3 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + CometBroadcastExchange [customsk] #4 + CometUnion [customsk] + CometProject [ws_bill_customer_sk] [customsk] + CometBroadcastHashJoin [ws_bill_customer_sk,ws_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + ReusedExchange [d_date_sk] #3 + CometProject [cs_ship_customer_sk] [customsk] + CometBroadcastHashJoin [cs_ship_customer_sk,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + ReusedExchange [d_date_sk] #3 + CometBroadcastExchange [ca_address_sk,ca_state] #5 + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + CometBroadcastExchange [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #6 + CometFilter [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_datafusion/explain.txt new file mode 100644 index 0000000000..5f6d368556 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_datafusion/explain.txt @@ -0,0 +1,224 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- Window (40) + +- * ColumnarToRow (39) + +- CometSort (38) + +- CometExchange (37) + +- CometHashAggregate (36) + +- CometExchange (35) + +- CometHashAggregate (34) + +- CometUnion (33) + :- CometHashAggregate (22) + : +- CometExchange (21) + : +- CometHashAggregate (20) + : +- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + : +- CometBroadcastExchange (17) + : +- CometProject (16) + : +- CometFilter (15) + : +- CometNativeScan: `spark_catalog`.`default`.`store` (14) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometHashAggregate (24) + : +- ReusedExchange (23) + +- CometHashAggregate (32) + +- CometExchange (31) + +- CometHashAggregate (30) + +- CometHashAggregate (29) + +- ReusedExchange (28) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#6, d_year#7] +Arguments: [d_date_sk#6, d_year#7] + +(4) CometFilter +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [2]: [d_date_sk#6, d_year#7] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4], [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [i_item_sk#8, i_class#9, i_category#10] + +(10) CometFilter +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Condition : isnotnull(i_item_sk#8) + +(11) CometBroadcastExchange +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [i_item_sk#8, i_class#9, i_category#10] + +(12) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Right output [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [ss_item_sk#1], [i_item_sk#8], Inner, BuildRight + +(13) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] +Arguments: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10], [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] + +(14) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#11, s_state#12] +Arguments: [s_store_sk#11, s_state#12] + +(15) CometFilter +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) + +(16) CometProject +Input [2]: [s_store_sk#11, s_state#12] +Arguments: [s_store_sk#11], [s_store_sk#11] + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(18) CometBroadcastHashJoin +Left output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#2], [s_store_sk#11], Inner, BuildRight + +(19) CometProject +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] +Arguments: [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10], [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] + +(20) CometHashAggregate +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] + +(21) CometExchange +Input [4]: [i_category#10, i_class#9, sum#13, sum#14] +Arguments: hashpartitioning(i_category#10, i_class#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [4]: [i_category#10, i_class#9, sum#13, sum#14] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] + +(23) ReusedExchange [Reuses operator id: 21] +Output [4]: [i_category#15, i_class#16, sum#17, sum#18] + +(24) CometHashAggregate +Input [4]: [i_category#15, i_class#16, sum#17, sum#18] +Keys [2]: [i_category#15, i_class#16] +Functions [2]: [sum(UnscaledValue(ss_net_profit#19)), sum(UnscaledValue(ss_ext_sales_price#20))] + +(25) CometHashAggregate +Input [3]: [ss_net_profit#21, ss_ext_sales_price#22, i_category#15] +Keys [1]: [i_category#15] +Functions [2]: [partial_sum(ss_net_profit#21), partial_sum(ss_ext_sales_price#22)] + +(26) CometExchange +Input [5]: [i_category#15, sum#23, isEmpty#24, sum#25, isEmpty#26] +Arguments: hashpartitioning(i_category#15, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(27) CometHashAggregate +Input [5]: [i_category#15, sum#23, isEmpty#24, sum#25, isEmpty#26] +Keys [1]: [i_category#15] +Functions [2]: [sum(ss_net_profit#21), sum(ss_ext_sales_price#22)] + +(28) ReusedExchange [Reuses operator id: 21] +Output [4]: [i_category#27, i_class#28, sum#29, sum#30] + +(29) CometHashAggregate +Input [4]: [i_category#27, i_class#28, sum#29, sum#30] +Keys [2]: [i_category#27, i_class#28] +Functions [2]: [sum(UnscaledValue(ss_net_profit#31)), sum(UnscaledValue(ss_ext_sales_price#32))] + +(30) CometHashAggregate +Input [2]: [ss_net_profit#33, ss_ext_sales_price#34] +Keys: [] +Functions [2]: [partial_sum(ss_net_profit#33), partial_sum(ss_ext_sales_price#34)] + +(31) CometExchange +Input [4]: [sum#35, isEmpty#36, sum#37, isEmpty#38] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometHashAggregate +Input [4]: [sum#35, isEmpty#36, sum#37, isEmpty#38] +Keys: [] +Functions [2]: [sum(ss_net_profit#33), sum(ss_ext_sales_price#34)] + +(33) CometUnion +Child 0 Input [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Child 1 Input [6]: [gross_margin#43, i_category#15, i_class#44, t_category#45, t_class#46, lochierarchy#47] +Child 2 Input [6]: [gross_margin#48, i_category#49, i_class#50, t_category#51, t_class#52, lochierarchy#53] + +(34) CometHashAggregate +Input [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Keys [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Functions: [] + +(35) CometExchange +Input [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Arguments: hashpartitioning(gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(36) CometHashAggregate +Input [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Keys [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Functions: [] + +(37) CometExchange +Input [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54] +Arguments: hashpartitioning(lochierarchy#42, _w0#54, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(38) CometSort +Input [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54] +Arguments: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54], [lochierarchy#42 ASC NULLS FIRST, _w0#54 ASC NULLS FIRST, gross_margin#39 ASC NULLS FIRST] + +(39) ColumnarToRow [codegen id : 1] +Input [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54] + +(40) Window +Input [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54] +Arguments: [rank(gross_margin#39) windowspecdefinition(lochierarchy#42, _w0#54, gross_margin#39 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#55], [lochierarchy#42, _w0#54], [gross_margin#39 ASC NULLS FIRST] + +(41) Project [codegen id : 2] +Output [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, rank_within_parent#55] +Input [6]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54, rank_within_parent#55] + +(42) TakeOrderedAndProject +Input [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, rank_within_parent#55] +Arguments: 100, [lochierarchy#42 DESC NULLS LAST, CASE WHEN (lochierarchy#42 = 0) THEN i_category#10 END ASC NULLS FIRST, rank_within_parent#55 ASC NULLS FIRST], [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, rank_within_parent#55] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..f0d5567dc4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_datafusion/simplified.txt @@ -0,0 +1,46 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] + WholeStageCodegen (2) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [gross_margin,lochierarchy,_w0] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [gross_margin,i_category,i_class,lochierarchy,_w0] + CometExchange [lochierarchy,_w0] #1 + CometHashAggregate [gross_margin,i_category,i_class,lochierarchy,_w0,t_category,t_class] + CometExchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #2 + CometHashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] + CometUnion [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] + CometHashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_category,i_class] #3 + CometHashAggregate [i_category,i_class,sum,sum,ss_net_profit,ss_ext_sales_price] + CometProject [ss_ext_sales_price,ss_net_profit,i_class,i_category] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category,s_store_sk] + CometProject [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,i_item_sk,i_class,i_category] + CometProject [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_class,i_category] #5 + CometFilter [i_item_sk,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_class,i_category] + CometBroadcastExchange [s_store_sk] #6 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + CometHashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty,sum(ss_net_profit),sum(ss_ext_sales_price)] + CometExchange [i_category] #7 + CometHashAggregate [i_category,sum,isEmpty,sum,isEmpty,ss_net_profit,ss_ext_sales_price] + CometHashAggregate [ss_net_profit,ss_ext_sales_price,i_category,i_class,sum,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price))] + ReusedExchange [i_category,i_class,sum,sum] #3 + CometHashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty,sum(ss_net_profit),sum(ss_ext_sales_price)] + CometExchange #8 + CometHashAggregate [sum,isEmpty,sum,isEmpty,ss_net_profit,ss_ext_sales_price] + CometHashAggregate [ss_net_profit,ss_ext_sales_price,i_category,i_class,sum,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price))] + ReusedExchange [i_category,i_class,sum,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d2e02e4c3e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_iceberg_compat/explain.txt @@ -0,0 +1,237 @@ +== Physical Plan == +TakeOrderedAndProject (42) ++- * Project (41) + +- Window (40) + +- * ColumnarToRow (39) + +- CometSort (38) + +- CometExchange (37) + +- CometHashAggregate (36) + +- CometExchange (35) + +- CometHashAggregate (34) + +- CometUnion (33) + :- CometHashAggregate (22) + : +- CometExchange (21) + : +- CometHashAggregate (20) + : +- CometProject (19) + : +- CometBroadcastHashJoin (18) + : :- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.item (9) + : +- CometBroadcastExchange (17) + : +- CometProject (16) + : +- CometFilter (15) + : +- CometScan parquet spark_catalog.default.store (14) + :- CometHashAggregate (27) + : +- CometExchange (26) + : +- CometHashAggregate (25) + : +- CometHashAggregate (24) + : +- ReusedExchange (23) + +- CometHashAggregate (32) + +- CometExchange (31) + +- CometHashAggregate (30) + +- CometHashAggregate (29) + +- ReusedExchange (28) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [2]: [d_date_sk#6, d_year#7] +Arguments: [d_date_sk#6], [d_date_sk#6] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: [d_date_sk#6] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Right output [1]: [d_date_sk#6] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#6] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4], [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(9) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#8, i_class#9, i_category#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Condition : isnotnull(i_item_sk#8) + +(11) CometBroadcastExchange +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [i_item_sk#8, i_class#9, i_category#10] + +(12) CometBroadcastHashJoin +Left output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Right output [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: [ss_item_sk#1], [i_item_sk#8], Inner, BuildRight + +(13) CometProject +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] +Arguments: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10], [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] + +(14) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_state#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(15) CometFilter +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) + +(16) CometProject +Input [2]: [s_store_sk#11, s_state#12] +Arguments: [s_store_sk#11], [s_store_sk#11] + +(17) CometBroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: [s_store_sk#11] + +(18) CometBroadcastHashJoin +Left output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Right output [1]: [s_store_sk#11] +Arguments: [ss_store_sk#2], [s_store_sk#11], Inner, BuildRight + +(19) CometProject +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] +Arguments: [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10], [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] + +(20) CometHashAggregate +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] + +(21) CometExchange +Input [4]: [i_category#10, i_class#9, sum#13, sum#14] +Arguments: hashpartitioning(i_category#10, i_class#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(22) CometHashAggregate +Input [4]: [i_category#10, i_class#9, sum#13, sum#14] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] + +(23) ReusedExchange [Reuses operator id: 21] +Output [4]: [i_category#15, i_class#16, sum#17, sum#18] + +(24) CometHashAggregate +Input [4]: [i_category#15, i_class#16, sum#17, sum#18] +Keys [2]: [i_category#15, i_class#16] +Functions [2]: [sum(UnscaledValue(ss_net_profit#19)), sum(UnscaledValue(ss_ext_sales_price#20))] + +(25) CometHashAggregate +Input [3]: [ss_net_profit#21, ss_ext_sales_price#22, i_category#15] +Keys [1]: [i_category#15] +Functions [2]: [partial_sum(ss_net_profit#21), partial_sum(ss_ext_sales_price#22)] + +(26) CometExchange +Input [5]: [i_category#15, sum#23, isEmpty#24, sum#25, isEmpty#26] +Arguments: hashpartitioning(i_category#15, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(27) CometHashAggregate +Input [5]: [i_category#15, sum#23, isEmpty#24, sum#25, isEmpty#26] +Keys [1]: [i_category#15] +Functions [2]: [sum(ss_net_profit#21), sum(ss_ext_sales_price#22)] + +(28) ReusedExchange [Reuses operator id: 21] +Output [4]: [i_category#27, i_class#28, sum#29, sum#30] + +(29) CometHashAggregate +Input [4]: [i_category#27, i_class#28, sum#29, sum#30] +Keys [2]: [i_category#27, i_class#28] +Functions [2]: [sum(UnscaledValue(ss_net_profit#31)), sum(UnscaledValue(ss_ext_sales_price#32))] + +(30) CometHashAggregate +Input [2]: [ss_net_profit#33, ss_ext_sales_price#34] +Keys: [] +Functions [2]: [partial_sum(ss_net_profit#33), partial_sum(ss_ext_sales_price#34)] + +(31) CometExchange +Input [4]: [sum#35, isEmpty#36, sum#37, isEmpty#38] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometHashAggregate +Input [4]: [sum#35, isEmpty#36, sum#37, isEmpty#38] +Keys: [] +Functions [2]: [sum(ss_net_profit#33), sum(ss_ext_sales_price#34)] + +(33) CometUnion +Child 0 Input [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Child 1 Input [6]: [gross_margin#43, i_category#15, i_class#44, t_category#45, t_class#46, lochierarchy#47] +Child 2 Input [6]: [gross_margin#48, i_category#49, i_class#50, t_category#51, t_class#52, lochierarchy#53] + +(34) CometHashAggregate +Input [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Keys [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Functions: [] + +(35) CometExchange +Input [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Arguments: hashpartitioning(gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(36) CometHashAggregate +Input [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Keys [6]: [gross_margin#39, i_category#10, i_class#9, t_category#40, t_class#41, lochierarchy#42] +Functions: [] + +(37) CometExchange +Input [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54] +Arguments: hashpartitioning(lochierarchy#42, _w0#54, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(38) CometSort +Input [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54] +Arguments: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54], [lochierarchy#42 ASC NULLS FIRST, _w0#54 ASC NULLS FIRST, gross_margin#39 ASC NULLS FIRST] + +(39) ColumnarToRow [codegen id : 1] +Input [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54] + +(40) Window +Input [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54] +Arguments: [rank(gross_margin#39) windowspecdefinition(lochierarchy#42, _w0#54, gross_margin#39 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#55], [lochierarchy#42, _w0#54], [gross_margin#39 ASC NULLS FIRST] + +(41) Project [codegen id : 2] +Output [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, rank_within_parent#55] +Input [6]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, _w0#54, rank_within_parent#55] + +(42) TakeOrderedAndProject +Input [5]: [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, rank_within_parent#55] +Arguments: 100, [lochierarchy#42 DESC NULLS LAST, CASE WHEN (lochierarchy#42 = 0) THEN i_category#10 END ASC NULLS FIRST, rank_within_parent#55 ASC NULLS FIRST], [gross_margin#39, i_category#10, i_class#9, lochierarchy#42, rank_within_parent#55] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..d92039020f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q36a.native_iceberg_compat/simplified.txt @@ -0,0 +1,46 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] + WholeStageCodegen (2) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [gross_margin,lochierarchy,_w0] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [gross_margin,i_category,i_class,lochierarchy,_w0] + CometExchange [lochierarchy,_w0] #1 + CometHashAggregate [gross_margin,i_category,i_class,lochierarchy,_w0,t_category,t_class] + CometExchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #2 + CometHashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] + CometUnion [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] + CometHashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_category,i_class] #3 + CometHashAggregate [i_category,i_class,sum,sum,ss_net_profit,ss_ext_sales_price] + CometProject [ss_ext_sales_price,ss_net_profit,i_class,i_category] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category,s_store_sk] + CometProject [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,i_item_sk,i_class,i_category] + CometProject [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [i_item_sk,i_class,i_category] #5 + CometFilter [i_item_sk,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + CometBroadcastExchange [s_store_sk] #6 + CometProject [s_store_sk] + CometFilter [s_store_sk,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_state] + CometHashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty,sum(ss_net_profit),sum(ss_ext_sales_price)] + CometExchange [i_category] #7 + CometHashAggregate [i_category,sum,isEmpty,sum,isEmpty,ss_net_profit,ss_ext_sales_price] + CometHashAggregate [ss_net_profit,ss_ext_sales_price,i_category,i_class,sum,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price))] + ReusedExchange [i_category,i_class,sum,sum] #3 + CometHashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty,sum(ss_net_profit),sum(ss_ext_sales_price)] + CometExchange #8 + CometHashAggregate [sum,isEmpty,sum,isEmpty,ss_net_profit,ss_ext_sales_price] + CometHashAggregate [ss_net_profit,ss_ext_sales_price,i_category,i_class,sum,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price))] + ReusedExchange [i_category,i_class,sum,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_datafusion/explain.txt new file mode 100644 index 0000000000..37d9b5e7a8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_datafusion/explain.txt @@ -0,0 +1,243 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (28) + : : +- * Filter (27) + : : +- Window (26) + : : +- * Filter (25) + : : +- Window (24) + : : +- * ColumnarToRow (23) + : : +- CometSort (22) + : : +- CometExchange (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (13) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Arguments: [i_item_sk#1, i_brand#2, i_category#3] + +(2) CometFilter +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(4) CometFilter +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) + +(5) CometBroadcastExchange +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Right output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [i_item_sk#1], [ss_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7], [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(9) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(11) CometBroadcastHashJoin +Left output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] +Arguments: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10], [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] + +(13) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(14) CometFilter +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Condition : ((isnotnull(s_store_sk#11) AND isnotnull(s_store_name#12)) AND isnotnull(s_company_name#13)) + +(15) CometBroadcastExchange +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(16) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] +Right output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [ss_store_sk#5], [s_store_sk#11], Inner, BuildRight + +(17) CometProject +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10, s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13], [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] + +(18) CometHashAggregate +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] + +(19) CometExchange +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#14] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(20) CometHashAggregate +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#14] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] + +(21) CometExchange +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(22) CometSort +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16], [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST, s_company_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(23) ColumnarToRow [codegen id : 1] +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] + +(24) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#17], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(25) Filter [codegen id : 2] +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(26) Window +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17] +Arguments: [avg(_w0#16) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#18], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9] + +(27) Filter [codegen id : 7] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] +Condition : ((isnotnull(avg_monthly_sales#18) AND (avg_monthly_sales#18 > 0.000000)) AND CASE WHEN (avg_monthly_sales#18 > 0.000000) THEN ((abs((sum_sales#15 - avg_monthly_sales#18)) / avg_monthly_sales#18) > 0.1000000000000000) END) + +(28) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] + +(29) ReusedExchange [Reuses operator id: 19] +Output [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum#25] + +(30) CometHashAggregate +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum#25] +Keys [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24] +Functions [1]: [sum(UnscaledValue(ss_sales_price#26))] + +(31) CometExchange +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15], [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(33) ColumnarToRow [codegen id : 3] +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] + +(34) Window +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: [rank(d_year#23, d_moy#24) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#27], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(35) Project [codegen id : 4] +Output [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#15 AS sum_sales#28, rn#27] +Input [8]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15, rn#27] + +(36) BroadcastExchange +Input [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#28, rn#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=4] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#17] +Right keys [5]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#27 + 1)] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#28] +Input [15]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#28, rn#27] + +(39) ReusedExchange [Reuses operator id: 31] +Output [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] + +(40) CometSort +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] +Arguments: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15], [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST, s_company_name#32 ASC NULLS FIRST, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] + +(41) ColumnarToRow [codegen id : 5] +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] + +(42) Window +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] +Arguments: [rank(d_year#33, d_moy#34) windowspecdefinition(i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#29, i_brand#30, s_store_name#31, s_company_name#32], [d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] + +(43) Project [codegen id : 6] +Output [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#15 AS sum_sales#36, rn#35] +Input [8]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15, rn#35] + +(44) BroadcastExchange +Input [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#36, rn#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 7] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#17] +Right keys [5]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, (rn#35 - 1)] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 7] +Output [7]: [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, sum_sales#28 AS psum#37, sum_sales#36 AS nsum#38] +Input [16]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#28, i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#36, rn#35] + +(47) TakeOrderedAndProject +Input [7]: [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, psum#37, nsum#38] +Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, psum#37, nsum#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_datafusion/simplified.txt new file mode 100644 index 0000000000..196a6d2c7d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_datafusion/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum,nsum] + WholeStageCodegen (7) + Project [i_category,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] + WholeStageCodegen (2) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,_w0] + CometExchange [i_category,i_brand,s_store_name,s_company_name] #1 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,_w0,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum,ss_sales_price] + CometProject [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] + CometBroadcastHashJoin [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy,s_store_sk,s_store_name,s_company_name] + CometProject [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] + CometBroadcastHashJoin [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_moy] + CometProject [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_category,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_category] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_name] #5 + CometFilter [s_store_sk,s_store_name,s_company_name] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] + CometExchange [i_category,i_brand,s_store_name,s_company_name] #7 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(ss_sales_price))] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..dbb08b552a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_iceberg_compat/explain.txt @@ -0,0 +1,256 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (28) + : : +- * Filter (27) + : : +- Window (26) + : : +- * Filter (25) + : : +- Window (24) + : : +- * ColumnarToRow (23) + : : +- CometSort (22) + : : +- CometExchange (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.item (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.store_sales (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometScan parquet spark_catalog.default.store (13) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) + + +(1) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) + +(5) CometBroadcastExchange +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Right output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [i_item_sk#1], [ss_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7], [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(11) CometBroadcastHashJoin +Left output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] +Arguments: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10], [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] + +(13) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] +ReadSchema: struct + +(14) CometFilter +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Condition : ((isnotnull(s_store_sk#11) AND isnotnull(s_store_name#12)) AND isnotnull(s_company_name#13)) + +(15) CometBroadcastExchange +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(16) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] +Right output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [ss_store_sk#5], [s_store_sk#11], Inner, BuildRight + +(17) CometProject +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10, s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13], [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] + +(18) CometHashAggregate +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] + +(19) CometExchange +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#14] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(20) CometHashAggregate +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#14] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] + +(21) CometExchange +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(22) CometSort +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16], [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST, s_company_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(23) ColumnarToRow [codegen id : 1] +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] + +(24) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#17], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(25) Filter [codegen id : 2] +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(26) Window +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17] +Arguments: [avg(_w0#16) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#18], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9] + +(27) Filter [codegen id : 7] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] +Condition : ((isnotnull(avg_monthly_sales#18) AND (avg_monthly_sales#18 > 0.000000)) AND CASE WHEN (avg_monthly_sales#18 > 0.000000) THEN ((abs((sum_sales#15 - avg_monthly_sales#18)) / avg_monthly_sales#18) > 0.1000000000000000) END) + +(28) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] + +(29) ReusedExchange [Reuses operator id: 19] +Output [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum#25] + +(30) CometHashAggregate +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum#25] +Keys [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24] +Functions [1]: [sum(UnscaledValue(ss_sales_price#26))] + +(31) CometExchange +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15], [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(33) ColumnarToRow [codegen id : 3] +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] + +(34) Window +Input [7]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15] +Arguments: [rank(d_year#23, d_moy#24) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#27], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(35) Project [codegen id : 4] +Output [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#15 AS sum_sales#28, rn#27] +Input [8]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#23, d_moy#24, sum_sales#15, rn#27] + +(36) BroadcastExchange +Input [6]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#28, rn#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=4] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#17] +Right keys [5]: [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#27 + 1)] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#28] +Input [15]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#28, rn#27] + +(39) ReusedExchange [Reuses operator id: 31] +Output [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] + +(40) CometSort +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] +Arguments: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15], [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST, s_company_name#32 ASC NULLS FIRST, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] + +(41) ColumnarToRow [codegen id : 5] +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] + +(42) Window +Input [7]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15] +Arguments: [rank(d_year#33, d_moy#34) windowspecdefinition(i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#29, i_brand#30, s_store_name#31, s_company_name#32], [d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] + +(43) Project [codegen id : 6] +Output [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#15 AS sum_sales#36, rn#35] +Input [8]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, d_year#33, d_moy#34, sum_sales#15, rn#35] + +(44) BroadcastExchange +Input [6]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#36, rn#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 7] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#17] +Right keys [5]: [i_category#29, i_brand#30, s_store_name#31, s_company_name#32, (rn#35 - 1)] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 7] +Output [7]: [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, sum_sales#28 AS psum#37, sum_sales#36 AS nsum#38] +Input [16]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#28, i_category#29, i_brand#30, s_store_name#31, s_company_name#32, sum_sales#36, rn#35] + +(47) TakeOrderedAndProject +Input [7]: [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, psum#37, nsum#38] +Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#18, sum_sales#15, psum#37, nsum#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9dbc07f8fa --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum,nsum] + WholeStageCodegen (7) + Project [i_category,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] + WholeStageCodegen (2) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,_w0] + CometExchange [i_category,i_brand,s_store_name,s_company_name] #1 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,_w0,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum,ss_sales_price] + CometProject [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] + CometBroadcastHashJoin [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy,s_store_sk,s_store_name,s_company_name] + CometProject [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] + CometBroadcastHashJoin [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_moy] + CometProject [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_category,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + CometBroadcastExchange [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [s_store_sk,s_store_name,s_company_name] #5 + CometFilter [s_store_sk,s_store_name,s_company_name] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] + CometExchange [i_category,i_brand,s_store_name,s_company_name] #7 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(ss_sales_price))] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_datafusion/explain.txt new file mode 100644 index 0000000000..ef09c3f299 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_datafusion/explain.txt @@ -0,0 +1,241 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- Union (43) + :- * Project (26) + : +- * Filter (25) + : +- Window (24) + : +- * Sort (23) + : +- Window (22) + : +- * ColumnarToRow (21) + : +- CometSort (20) + : +- CometExchange (19) + : +- CometHashAggregate (18) + : +- CometExchange (17) + : +- CometHashAggregate (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometBroadcastExchange (4) + : : : +- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : +- CometProject (7) + : : +- CometFilter (6) + : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (5) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (10) + :- * Project (34) + : +- * Filter (33) + : +- Window (32) + : +- * Sort (31) + : +- Window (30) + : +- * ColumnarToRow (29) + : +- CometSort (28) + : +- ReusedExchange (27) + +- * Project (42) + +- * Filter (41) + +- Window (40) + +- * Sort (39) + +- Window (38) + +- * ColumnarToRow (37) + +- CometSort (36) + +- ReusedExchange (35) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(2) CometFilter +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Condition : (((((((isnotnull(ws_net_profit#5) AND isnotnull(ws_net_paid#4)) AND isnotnull(ws_quantity#3)) AND (ws_net_profit#5 > 1.00)) AND (ws_net_paid#4 > 0.00)) AND (ws_quantity#3 > 0)) AND isnotnull(ws_order_number#2)) AND isnotnull(ws_item_sk#1)) + +(3) CometProject +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6], [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] + +(4) CometBroadcastExchange +Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] + +(5) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Arguments: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(6) CometFilter +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(7) CometProject +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Arguments: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10], [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(8) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Right output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: [ws_order_number#2, ws_item_sk#1], [wr_order_number#8, wr_item_sk#7], Inner, BuildLeft + +(9) CometProject +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10], [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] + +(10) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Arguments: [d_date_sk#12, d_year#13, d_moy#14] + +(11) CometFilter +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(12) CometProject +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(14) CometBroadcastHashJoin +Left output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] +Right output [1]: [d_date_sk#12] +Arguments: [ws_sold_date_sk#6], [d_date_sk#12], Inner, BuildRight + +(15) CometProject +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] +Arguments: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10], [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] + +(16) CometHashAggregate +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#1] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#9, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] + +(17) CometExchange +Input [7]: [ws_item_sk#1, sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(18) CometHashAggregate +Input [7]: [ws_item_sk#1, sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Keys [1]: [ws_item_sk#1] +Functions [4]: [sum(coalesce(wr_return_quantity#9, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] + +(19) CometExchange +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometSort +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: [item#21, return_ratio#22, currency_ratio#23], [return_ratio#22 ASC NULLS FIRST] + +(21) ColumnarToRow [codegen id : 1] +Input [3]: [item#21, return_ratio#22, currency_ratio#23] + +(22) Window +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#24], [return_ratio#22 ASC NULLS FIRST] + +(23) Sort [codegen id : 2] +Input [4]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24] +Arguments: [currency_ratio#23 ASC NULLS FIRST], false, 0 + +(24) Window +Input [4]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24] +Arguments: [rank(currency_ratio#23) windowspecdefinition(currency_ratio#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#25], [currency_ratio#23 ASC NULLS FIRST] + +(25) Filter [codegen id : 3] +Input [5]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24, currency_rank#25] +Condition : ((return_rank#24 <= 10) OR (currency_rank#25 <= 10)) + +(26) Project [codegen id : 3] +Output [5]: [web AS channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Input [5]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24, currency_rank#25] + +(27) ReusedExchange [Reuses operator id: 19] +Output [3]: [item#27, return_ratio#28, currency_ratio#29] + +(28) CometSort +Input [3]: [item#27, return_ratio#28, currency_ratio#29] +Arguments: [item#27, return_ratio#28, currency_ratio#29], [return_ratio#28 ASC NULLS FIRST] + +(29) ColumnarToRow [codegen id : 4] +Input [3]: [item#27, return_ratio#28, currency_ratio#29] + +(30) Window +Input [3]: [item#27, return_ratio#28, currency_ratio#29] +Arguments: [rank(return_ratio#28) windowspecdefinition(return_ratio#28 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#30], [return_ratio#28 ASC NULLS FIRST] + +(31) Sort [codegen id : 5] +Input [4]: [item#27, return_ratio#28, currency_ratio#29, return_rank#30] +Arguments: [currency_ratio#29 ASC NULLS FIRST], false, 0 + +(32) Window +Input [4]: [item#27, return_ratio#28, currency_ratio#29, return_rank#30] +Arguments: [rank(currency_ratio#29) windowspecdefinition(currency_ratio#29 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#31], [currency_ratio#29 ASC NULLS FIRST] + +(33) Filter [codegen id : 6] +Input [5]: [item#27, return_ratio#28, currency_ratio#29, return_rank#30, currency_rank#31] +Condition : ((return_rank#30 <= 10) OR (currency_rank#31 <= 10)) + +(34) Project [codegen id : 6] +Output [5]: [catalog AS channel#32, item#27, return_ratio#28, return_rank#30, currency_rank#31] +Input [5]: [item#27, return_ratio#28, currency_ratio#29, return_rank#30, currency_rank#31] + +(35) ReusedExchange [Reuses operator id: 19] +Output [3]: [item#33, return_ratio#34, currency_ratio#35] + +(36) CometSort +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: [item#33, return_ratio#34, currency_ratio#35], [return_ratio#34 ASC NULLS FIRST] + +(37) ColumnarToRow [codegen id : 7] +Input [3]: [item#33, return_ratio#34, currency_ratio#35] + +(38) Window +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: [rank(return_ratio#34) windowspecdefinition(return_ratio#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#36], [return_ratio#34 ASC NULLS FIRST] + +(39) Sort [codegen id : 8] +Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#36] +Arguments: [currency_ratio#35 ASC NULLS FIRST], false, 0 + +(40) Window +Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#36] +Arguments: [rank(currency_ratio#35) windowspecdefinition(currency_ratio#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#37], [currency_ratio#35 ASC NULLS FIRST] + +(41) Filter [codegen id : 9] +Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#36, currency_rank#37] +Condition : ((return_rank#36 <= 10) OR (currency_rank#37 <= 10)) + +(42) Project [codegen id : 9] +Output [5]: [store AS channel#38, item#33, return_ratio#34, return_rank#36, currency_rank#37] +Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#36, currency_rank#37] + +(43) Union + +(44) HashAggregate [codegen id : 10] +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Keys [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + +(45) Exchange +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Arguments: hashpartitioning(channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(46) HashAggregate [codegen id : 11] +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Keys [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + +(47) TakeOrderedAndProject +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Arguments: 100, [channel#26 ASC NULLS FIRST, return_rank#24 ASC NULLS FIRST, currency_rank#25 ASC NULLS FIRST, item#21 ASC NULLS FIRST], [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_datafusion/simplified.txt new file mode 100644 index 0000000000..baf07c5c9a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_datafusion/simplified.txt @@ -0,0 +1,69 @@ +TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] + WholeStageCodegen (11) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 + WholeStageCodegen (10) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Union + WholeStageCodegen (3) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (2) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + CometExchange #2 + CometHashAggregate [item,return_ratio,currency_ratio,ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,sum(coalesce(wr_return_quantity, 0)),sum(coalesce(ws_quantity, 0)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + CometExchange [ws_item_sk] #3 + CometHashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] + CometProject [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + CometBroadcastHashJoin [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt,d_date_sk] + CometProject [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometBroadcastExchange [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] #4 + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] + CometFilter [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + CometProject [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometFilter [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + WholeStageCodegen (6) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (5) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + ReusedExchange [item,return_ratio,currency_ratio] #2 + WholeStageCodegen (9) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (8) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + ReusedExchange [item,return_ratio,currency_ratio] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ddfe4a99f5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_iceberg_compat/explain.txt @@ -0,0 +1,423 @@ +== Physical Plan == +TakeOrderedAndProject (77) ++- * HashAggregate (76) + +- Exchange (75) + +- * HashAggregate (74) + +- Union (73) + :- * Project (26) + : +- * Filter (25) + : +- Window (24) + : +- * Sort (23) + : +- Window (22) + : +- * ColumnarToRow (21) + : +- CometSort (20) + : +- CometExchange (19) + : +- CometHashAggregate (18) + : +- CometExchange (17) + : +- CometHashAggregate (16) + : +- CometProject (15) + : +- CometBroadcastHashJoin (14) + : :- CometProject (9) + : : +- CometBroadcastHashJoin (8) + : : :- CometBroadcastExchange (4) + : : : +- CometProject (3) + : : : +- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : +- CometProject (7) + : : +- CometFilter (6) + : : +- CometScan parquet spark_catalog.default.web_returns (5) + : +- CometBroadcastExchange (13) + : +- CometProject (12) + : +- CometFilter (11) + : +- CometScan parquet spark_catalog.default.date_dim (10) + :- * Project (49) + : +- * Filter (48) + : +- Window (47) + : +- * Sort (46) + : +- Window (45) + : +- * ColumnarToRow (44) + : +- CometSort (43) + : +- CometExchange (42) + : +- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometProject (38) + : +- CometBroadcastHashJoin (37) + : :- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometBroadcastExchange (30) + : : : +- CometProject (29) + : : : +- CometFilter (28) + : : : +- CometScan parquet spark_catalog.default.catalog_sales (27) + : : +- CometProject (33) + : : +- CometFilter (32) + : : +- CometScan parquet spark_catalog.default.catalog_returns (31) + : +- ReusedExchange (36) + +- * Project (72) + +- * Filter (71) + +- Window (70) + +- * Sort (69) + +- Window (68) + +- * ColumnarToRow (67) + +- CometSort (66) + +- CometExchange (65) + +- CometHashAggregate (64) + +- CometExchange (63) + +- CometHashAggregate (62) + +- CometProject (61) + +- CometBroadcastHashJoin (60) + :- CometProject (58) + : +- CometBroadcastHashJoin (57) + : :- CometBroadcastExchange (53) + : : +- CometProject (52) + : : +- CometFilter (51) + : : +- CometScan parquet spark_catalog.default.store_sales (50) + : +- CometProject (56) + : +- CometFilter (55) + : +- CometScan parquet spark_catalog.default.store_returns (54) + +- ReusedExchange (59) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#6)] +PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_order_number), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Condition : (((((((isnotnull(ws_net_profit#5) AND isnotnull(ws_net_paid#4)) AND isnotnull(ws_quantity#3)) AND (ws_net_profit#5 > 1.00)) AND (ws_net_paid#4 > 0.00)) AND (ws_quantity#3 > 0)) AND isnotnull(ws_order_number#2)) AND isnotnull(ws_item_sk#1)) + +(3) CometProject +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6], [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] + +(4) CometBroadcastExchange +Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Arguments: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] + +(5) CometScan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(6) CometFilter +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(7) CometProject +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Arguments: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10], [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(8) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Right output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: [ws_order_number#2, ws_item_sk#1], [wr_order_number#8, wr_item_sk#7], Inner, BuildLeft + +(9) CometProject +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10], [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] + +(10) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) CometFilter +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(12) CometProject +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Arguments: [d_date_sk#12], [d_date_sk#12] + +(13) CometBroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: [d_date_sk#12] + +(14) CometBroadcastHashJoin +Left output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] +Right output [1]: [d_date_sk#12] +Arguments: [ws_sold_date_sk#6], [d_date_sk#12], Inner, BuildRight + +(15) CometProject +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] +Arguments: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10], [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] + +(16) CometHashAggregate +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#1] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#9, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] + +(17) CometExchange +Input [7]: [ws_item_sk#1, sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(18) CometHashAggregate +Input [7]: [ws_item_sk#1, sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Keys [1]: [ws_item_sk#1] +Functions [4]: [sum(coalesce(wr_return_quantity#9, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] + +(19) CometExchange +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometSort +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: [item#21, return_ratio#22, currency_ratio#23], [return_ratio#22 ASC NULLS FIRST] + +(21) ColumnarToRow [codegen id : 1] +Input [3]: [item#21, return_ratio#22, currency_ratio#23] + +(22) Window +Input [3]: [item#21, return_ratio#22, currency_ratio#23] +Arguments: [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#24], [return_ratio#22 ASC NULLS FIRST] + +(23) Sort [codegen id : 2] +Input [4]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24] +Arguments: [currency_ratio#23 ASC NULLS FIRST], false, 0 + +(24) Window +Input [4]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24] +Arguments: [rank(currency_ratio#23) windowspecdefinition(currency_ratio#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#25], [currency_ratio#23 ASC NULLS FIRST] + +(25) Filter [codegen id : 3] +Input [5]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24, currency_rank#25] +Condition : ((return_rank#24 <= 10) OR (currency_rank#25 <= 10)) + +(26) Project [codegen id : 3] +Output [5]: [web AS channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Input [5]: [item#21, return_ratio#22, currency_ratio#23, return_rank#24, currency_rank#25] + +(27) CometScan parquet spark_catalog.default.catalog_sales +Output [6]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_net_profit#31, cs_sold_date_sk#32] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#32)] +PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(28) CometFilter +Input [6]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_net_profit#31, cs_sold_date_sk#32] +Condition : (((((((isnotnull(cs_net_profit#31) AND isnotnull(cs_net_paid#30)) AND isnotnull(cs_quantity#29)) AND (cs_net_profit#31 > 1.00)) AND (cs_net_paid#30 > 0.00)) AND (cs_quantity#29 > 0)) AND isnotnull(cs_order_number#28)) AND isnotnull(cs_item_sk#27)) + +(29) CometProject +Input [6]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_net_profit#31, cs_sold_date_sk#32] +Arguments: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32], [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32] + +(30) CometBroadcastExchange +Input [5]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32] +Arguments: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32] + +(31) CometScan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36, cr_returned_date_sk#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(32) CometFilter +Input [5]: [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36, cr_returned_date_sk#37] +Condition : (((isnotnull(cr_return_amount#36) AND (cr_return_amount#36 > 10000.00)) AND isnotnull(cr_order_number#34)) AND isnotnull(cr_item_sk#33)) + +(33) CometProject +Input [5]: [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36, cr_returned_date_sk#37] +Arguments: [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36], [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36] + +(34) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32] +Right output [4]: [cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36] +Arguments: [cs_order_number#28, cs_item_sk#27], [cr_order_number#34, cr_item_sk#33], Inner, BuildLeft + +(35) CometProject +Input [9]: [cs_item_sk#27, cs_order_number#28, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32, cr_item_sk#33, cr_order_number#34, cr_return_quantity#35, cr_return_amount#36] +Arguments: [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32, cr_return_quantity#35, cr_return_amount#36], [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32, cr_return_quantity#35, cr_return_amount#36] + +(36) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#38] + +(37) CometBroadcastHashJoin +Left output [6]: [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32, cr_return_quantity#35, cr_return_amount#36] +Right output [1]: [d_date_sk#38] +Arguments: [cs_sold_date_sk#32], [d_date_sk#38], Inner, BuildRight + +(38) CometProject +Input [7]: [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cs_sold_date_sk#32, cr_return_quantity#35, cr_return_amount#36, d_date_sk#38] +Arguments: [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cr_return_quantity#35, cr_return_amount#36], [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cr_return_quantity#35, cr_return_amount#36] + +(39) CometHashAggregate +Input [5]: [cs_item_sk#27, cs_quantity#29, cs_net_paid#30, cr_return_quantity#35, cr_return_amount#36] +Keys [1]: [cs_item_sk#27] +Functions [4]: [partial_sum(coalesce(cr_return_quantity#35, 0)), partial_sum(coalesce(cs_quantity#29, 0)), partial_sum(coalesce(cast(cr_return_amount#36 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))] + +(40) CometExchange +Input [7]: [cs_item_sk#27, sum#39, sum#40, sum#41, isEmpty#42, sum#43, isEmpty#44] +Arguments: hashpartitioning(cs_item_sk#27, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(41) CometHashAggregate +Input [7]: [cs_item_sk#27, sum#39, sum#40, sum#41, isEmpty#42, sum#43, isEmpty#44] +Keys [1]: [cs_item_sk#27] +Functions [4]: [sum(coalesce(cr_return_quantity#35, 0)), sum(coalesce(cs_quantity#29, 0)), sum(coalesce(cast(cr_return_amount#36 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))] + +(42) CometExchange +Input [3]: [item#45, return_ratio#46, currency_ratio#47] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(43) CometSort +Input [3]: [item#45, return_ratio#46, currency_ratio#47] +Arguments: [item#45, return_ratio#46, currency_ratio#47], [return_ratio#46 ASC NULLS FIRST] + +(44) ColumnarToRow [codegen id : 4] +Input [3]: [item#45, return_ratio#46, currency_ratio#47] + +(45) Window +Input [3]: [item#45, return_ratio#46, currency_ratio#47] +Arguments: [rank(return_ratio#46) windowspecdefinition(return_ratio#46 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#48], [return_ratio#46 ASC NULLS FIRST] + +(46) Sort [codegen id : 5] +Input [4]: [item#45, return_ratio#46, currency_ratio#47, return_rank#48] +Arguments: [currency_ratio#47 ASC NULLS FIRST], false, 0 + +(47) Window +Input [4]: [item#45, return_ratio#46, currency_ratio#47, return_rank#48] +Arguments: [rank(currency_ratio#47) windowspecdefinition(currency_ratio#47 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#49], [currency_ratio#47 ASC NULLS FIRST] + +(48) Filter [codegen id : 6] +Input [5]: [item#45, return_ratio#46, currency_ratio#47, return_rank#48, currency_rank#49] +Condition : ((return_rank#48 <= 10) OR (currency_rank#49 <= 10)) + +(49) Project [codegen id : 6] +Output [5]: [catalog AS channel#50, item#45, return_ratio#46, return_rank#48, currency_rank#49] +Input [5]: [item#45, return_ratio#46, currency_ratio#47, return_rank#48, currency_rank#49] + +(50) CometScan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_net_profit#55, ss_sold_date_sk#56] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#56)] +PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(51) CometFilter +Input [6]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_net_profit#55, ss_sold_date_sk#56] +Condition : (((((((isnotnull(ss_net_profit#55) AND isnotnull(ss_net_paid#54)) AND isnotnull(ss_quantity#53)) AND (ss_net_profit#55 > 1.00)) AND (ss_net_paid#54 > 0.00)) AND (ss_quantity#53 > 0)) AND isnotnull(ss_ticket_number#52)) AND isnotnull(ss_item_sk#51)) + +(52) CometProject +Input [6]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_net_profit#55, ss_sold_date_sk#56] +Arguments: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56], [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56] + +(53) CometBroadcastExchange +Input [5]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56] +Arguments: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56] + +(54) CometScan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60, sr_returned_date_sk#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(55) CometFilter +Input [5]: [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60, sr_returned_date_sk#61] +Condition : (((isnotnull(sr_return_amt#60) AND (sr_return_amt#60 > 10000.00)) AND isnotnull(sr_ticket_number#58)) AND isnotnull(sr_item_sk#57)) + +(56) CometProject +Input [5]: [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60, sr_returned_date_sk#61] +Arguments: [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60], [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60] + +(57) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56] +Right output [4]: [sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60] +Arguments: [ss_ticket_number#52, ss_item_sk#51], [sr_ticket_number#58, sr_item_sk#57], Inner, BuildLeft + +(58) CometProject +Input [9]: [ss_item_sk#51, ss_ticket_number#52, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56, sr_item_sk#57, sr_ticket_number#58, sr_return_quantity#59, sr_return_amt#60] +Arguments: [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56, sr_return_quantity#59, sr_return_amt#60], [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56, sr_return_quantity#59, sr_return_amt#60] + +(59) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#62] + +(60) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56, sr_return_quantity#59, sr_return_amt#60] +Right output [1]: [d_date_sk#62] +Arguments: [ss_sold_date_sk#56], [d_date_sk#62], Inner, BuildRight + +(61) CometProject +Input [7]: [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, ss_sold_date_sk#56, sr_return_quantity#59, sr_return_amt#60, d_date_sk#62] +Arguments: [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, sr_return_quantity#59, sr_return_amt#60], [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, sr_return_quantity#59, sr_return_amt#60] + +(62) CometHashAggregate +Input [5]: [ss_item_sk#51, ss_quantity#53, ss_net_paid#54, sr_return_quantity#59, sr_return_amt#60] +Keys [1]: [ss_item_sk#51] +Functions [4]: [partial_sum(coalesce(sr_return_quantity#59, 0)), partial_sum(coalesce(ss_quantity#53, 0)), partial_sum(coalesce(cast(sr_return_amt#60 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#54 as decimal(12,2)), 0.00))] + +(63) CometExchange +Input [7]: [ss_item_sk#51, sum#63, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Arguments: hashpartitioning(ss_item_sk#51, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(64) CometHashAggregate +Input [7]: [ss_item_sk#51, sum#63, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Keys [1]: [ss_item_sk#51] +Functions [4]: [sum(coalesce(sr_return_quantity#59, 0)), sum(coalesce(ss_quantity#53, 0)), sum(coalesce(cast(sr_return_amt#60 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#54 as decimal(12,2)), 0.00))] + +(65) CometExchange +Input [3]: [item#69, return_ratio#70, currency_ratio#71] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(66) CometSort +Input [3]: [item#69, return_ratio#70, currency_ratio#71] +Arguments: [item#69, return_ratio#70, currency_ratio#71], [return_ratio#70 ASC NULLS FIRST] + +(67) ColumnarToRow [codegen id : 7] +Input [3]: [item#69, return_ratio#70, currency_ratio#71] + +(68) Window +Input [3]: [item#69, return_ratio#70, currency_ratio#71] +Arguments: [rank(return_ratio#70) windowspecdefinition(return_ratio#70 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#72], [return_ratio#70 ASC NULLS FIRST] + +(69) Sort [codegen id : 8] +Input [4]: [item#69, return_ratio#70, currency_ratio#71, return_rank#72] +Arguments: [currency_ratio#71 ASC NULLS FIRST], false, 0 + +(70) Window +Input [4]: [item#69, return_ratio#70, currency_ratio#71, return_rank#72] +Arguments: [rank(currency_ratio#71) windowspecdefinition(currency_ratio#71 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#73], [currency_ratio#71 ASC NULLS FIRST] + +(71) Filter [codegen id : 9] +Input [5]: [item#69, return_ratio#70, currency_ratio#71, return_rank#72, currency_rank#73] +Condition : ((return_rank#72 <= 10) OR (currency_rank#73 <= 10)) + +(72) Project [codegen id : 9] +Output [5]: [store AS channel#74, item#69, return_ratio#70, return_rank#72, currency_rank#73] +Input [5]: [item#69, return_ratio#70, currency_ratio#71, return_rank#72, currency_rank#73] + +(73) Union + +(74) HashAggregate [codegen id : 10] +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Keys [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + +(75) Exchange +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Arguments: hashpartitioning(channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(76) HashAggregate [codegen id : 11] +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Keys [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + +(77) TakeOrderedAndProject +Input [5]: [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] +Arguments: 100, [channel#26 ASC NULLS FIRST, return_rank#24 ASC NULLS FIRST, currency_rank#25 ASC NULLS FIRST, item#21 ASC NULLS FIRST], [channel#26, item#21, return_ratio#22, return_rank#24, currency_rank#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..af903c445b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q49.native_iceberg_compat/simplified.txt @@ -0,0 +1,99 @@ +TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] + WholeStageCodegen (11) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 + WholeStageCodegen (10) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Union + WholeStageCodegen (3) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (2) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + CometExchange #2 + CometHashAggregate [item,return_ratio,currency_ratio,ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,sum(coalesce(wr_return_quantity, 0)),sum(coalesce(ws_quantity, 0)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + CometExchange [ws_item_sk] #3 + CometHashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] + CometProject [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + CometBroadcastHashJoin [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt,d_date_sk] + CometProject [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometBroadcastExchange [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] #4 + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] + CometFilter [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + CometProject [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometFilter [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (6) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (5) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + CometExchange #6 + CometHashAggregate [item,return_ratio,currency_ratio,cs_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,sum(coalesce(cr_return_quantity, 0)),sum(coalesce(cs_quantity, 0)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] + CometExchange [cs_item_sk] #7 + CometHashAggregate [cs_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,cr_return_quantity,cs_quantity,cr_return_amount,cs_net_paid] + CometProject [cs_item_sk,cs_quantity,cs_net_paid,cr_return_quantity,cr_return_amount] + CometBroadcastHashJoin [cs_item_sk,cs_quantity,cs_net_paid,cs_sold_date_sk,cr_return_quantity,cr_return_amount,d_date_sk] + CometProject [cs_item_sk,cs_quantity,cs_net_paid,cs_sold_date_sk,cr_return_quantity,cr_return_amount] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_sold_date_sk,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometBroadcastExchange [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_sold_date_sk] #8 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_sold_date_sk] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk] + CometProject [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometFilter [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (9) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (8) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometSort [item,return_ratio,currency_ratio] + CometExchange #9 + CometHashAggregate [item,return_ratio,currency_ratio,ss_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,sum(coalesce(sr_return_quantity, 0)),sum(coalesce(ss_quantity, 0)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] + CometExchange [ss_item_sk] #10 + CometHashAggregate [ss_item_sk,sum,sum,sum,isEmpty,sum,isEmpty,sr_return_quantity,ss_quantity,sr_return_amt,ss_net_paid] + CometProject [ss_item_sk,ss_quantity,ss_net_paid,sr_return_quantity,sr_return_amt] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_net_paid,ss_sold_date_sk,sr_return_quantity,sr_return_amt,d_date_sk] + CometProject [ss_item_sk,ss_quantity,ss_net_paid,ss_sold_date_sk,sr_return_quantity,sr_return_amt] + CometBroadcastHashJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_sold_date_sk,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometBroadcastExchange [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_sold_date_sk] #11 + CometProject [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_sold_date_sk] + CometFilter [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk] + CometProject [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometFilter [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_datafusion/explain.txt new file mode 100644 index 0000000000..7044110296 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_datafusion/explain.txt @@ -0,0 +1,259 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Filter (47) + +- * HashAggregate (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- Window (37) + : +- * Sort (36) + : +- Exchange (35) + : +- * Project (34) + : +- * Filter (33) + : +- * SortMergeJoin FullOuter (32) + : :- * Sort (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- Exchange (26) + : : +- * HashAggregate (25) + : : +- * Project (24) + : : +- * BroadcastHashJoin Inner BuildRight (23) + : : :- * Project (16) + : : : +- Window (15) + : : : +- * ColumnarToRow (14) + : : : +- CometSort (13) + : : : +- CometExchange (12) + : : : +- CometHashAggregate (11) + : : : +- CometExchange (10) + : : : +- CometHashAggregate (9) + : : : +- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- BroadcastExchange (22) + : : +- * Project (21) + : : +- Window (20) + : : +- * ColumnarToRow (19) + : : +- CometSort (18) + : : +- ReusedExchange (17) + : +- * Sort (31) + : +- ReusedExchange (30) + +- BroadcastExchange (42) + +- * Project (41) + +- Window (40) + +- * Sort (39) + +- ReusedExchange (38) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Arguments: [d_date_sk#4, d_date#5, d_month_seq#6] + +(4) CometFilter +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Arguments: [d_date_sk#4, d_date#5], [d_date_sk#4, d_date#5] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: [d_date_sk#4, d_date#5] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Right output [2]: [d_date_sk#4, d_date#5] +Arguments: [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [5]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3, d_date_sk#4, d_date#5] +Arguments: [ws_item_sk#1, ws_sales_price#2, d_date#5], [ws_item_sk#1, ws_sales_price#2, d_date#5] + +(9) CometHashAggregate +Input [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#2))] + +(10) CometExchange +Input [3]: [ws_item_sk#1, d_date#5, sum#7] +Arguments: hashpartitioning(ws_item_sk#1, d_date#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(11) CometHashAggregate +Input [3]: [ws_item_sk#1, d_date#5, sum#7] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] + +(12) CometExchange +Input [4]: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(13) CometSort +Input [4]: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1] +Arguments: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1], [ws_item_sk#1 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST] + +(14) ColumnarToRow [codegen id : 1] +Input [4]: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1] + +(15) Window +Input [4]: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1] +Arguments: [row_number() windowspecdefinition(ws_item_sk#1, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#10], [ws_item_sk#1], [d_date#5 ASC NULLS FIRST] + +(16) Project [codegen id : 4] +Output [4]: [item_sk#8, d_date#5, sumws#9, rk#10] +Input [5]: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1, rk#10] + +(17) ReusedExchange [Reuses operator id: 12] +Output [4]: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12] + +(18) CometSort +Input [4]: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12] +Arguments: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12], [ws_item_sk#12 ASC NULLS FIRST, d_date#11 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 2] +Input [4]: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12] + +(20) Window +Input [4]: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12] +Arguments: [row_number() windowspecdefinition(ws_item_sk#12, d_date#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#13], [ws_item_sk#12], [d_date#11 ASC NULLS FIRST] + +(21) Project [codegen id : 3] +Output [3]: [item_sk#8 AS item_sk#14, sumws#9 AS sumws#15, rk#13] +Input [5]: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12, rk#13] + +(22) BroadcastExchange +Input [3]: [item_sk#14, sumws#15, rk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [item_sk#8] +Right keys [1]: [item_sk#14] +Join type: Inner +Join condition: (rk#10 >= rk#13) + +(24) Project [codegen id : 4] +Output [4]: [item_sk#8, d_date#5, sumws#9, sumws#15] +Input [7]: [item_sk#8, d_date#5, sumws#9, rk#10, item_sk#14, sumws#15, rk#13] + +(25) HashAggregate [codegen id : 4] +Input [4]: [item_sk#8, d_date#5, sumws#9, sumws#15] +Keys [3]: [item_sk#8, d_date#5, sumws#9] +Functions [1]: [partial_sum(sumws#15)] +Aggregate Attributes [2]: [sum#16, isEmpty#17] +Results [5]: [item_sk#8, d_date#5, sumws#9, sum#18, isEmpty#19] + +(26) Exchange +Input [5]: [item_sk#8, d_date#5, sumws#9, sum#18, isEmpty#19] +Arguments: hashpartitioning(item_sk#8, d_date#5, sumws#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 5] +Input [5]: [item_sk#8, d_date#5, sumws#9, sum#18, isEmpty#19] +Keys [3]: [item_sk#8, d_date#5, sumws#9] +Functions [1]: [sum(sumws#15)] +Aggregate Attributes [1]: [sum(sumws#15)#20] +Results [3]: [item_sk#8, d_date#5, sum(sumws#15)#20 AS cume_sales#21] + +(28) Exchange +Input [3]: [item_sk#8, d_date#5, cume_sales#21] +Arguments: hashpartitioning(item_sk#8, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) Sort [codegen id : 6] +Input [3]: [item_sk#8, d_date#5, cume_sales#21] +Arguments: [item_sk#8 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(30) ReusedExchange [Reuses operator id: 28] +Output [3]: [item_sk#22, d_date#23, cume_sales#24] + +(31) Sort [codegen id : 12] +Input [3]: [item_sk#22, d_date#23, cume_sales#24] +Arguments: [item_sk#22 ASC NULLS FIRST, d_date#23 ASC NULLS FIRST], false, 0 + +(32) SortMergeJoin [codegen id : 13] +Left keys [2]: [item_sk#8, d_date#5] +Right keys [2]: [item_sk#22, d_date#23] +Join type: FullOuter +Join condition: None + +(33) Filter [codegen id : 13] +Input [6]: [item_sk#8, d_date#5, cume_sales#21, item_sk#22, d_date#23, cume_sales#24] +Condition : isnotnull(CASE WHEN isnotnull(item_sk#8) THEN item_sk#8 ELSE item_sk#22 END) + +(34) Project [codegen id : 13] +Output [4]: [CASE WHEN isnotnull(item_sk#8) THEN item_sk#8 ELSE item_sk#22 END AS item_sk#25, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#23 END AS d_date#26, cume_sales#21 AS web_sales#27, cume_sales#24 AS store_sales#28] +Input [6]: [item_sk#8, d_date#5, cume_sales#21, item_sk#22, d_date#23, cume_sales#24] + +(35) Exchange +Input [4]: [item_sk#25, d_date#26, web_sales#27, store_sales#28] +Arguments: hashpartitioning(item_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(36) Sort [codegen id : 14] +Input [4]: [item_sk#25, d_date#26, web_sales#27, store_sales#28] +Arguments: [item_sk#25 ASC NULLS FIRST, d_date#26 ASC NULLS FIRST], false, 0 + +(37) Window +Input [4]: [item_sk#25, d_date#26, web_sales#27, store_sales#28] +Arguments: [row_number() windowspecdefinition(item_sk#25, d_date#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#29], [item_sk#25], [d_date#26 ASC NULLS FIRST] + +(38) ReusedExchange [Reuses operator id: 35] +Output [4]: [item_sk#25, d_date#26, web_sales#27, store_sales#28] + +(39) Sort [codegen id : 28] +Input [4]: [item_sk#25, d_date#26, web_sales#27, store_sales#28] +Arguments: [item_sk#25 ASC NULLS FIRST, d_date#26 ASC NULLS FIRST], false, 0 + +(40) Window +Input [4]: [item_sk#25, d_date#26, web_sales#27, store_sales#28] +Arguments: [row_number() windowspecdefinition(item_sk#25, d_date#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#30], [item_sk#25], [d_date#26 ASC NULLS FIRST] + +(41) Project [codegen id : 29] +Output [4]: [item_sk#25 AS item_sk#31, web_sales#27 AS web_sales#32, store_sales#28 AS store_sales#33, rk#30] +Input [5]: [item_sk#25, d_date#26, web_sales#27, store_sales#28, rk#30] + +(42) BroadcastExchange +Input [4]: [item_sk#31, web_sales#32, store_sales#33, rk#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] + +(43) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [item_sk#25] +Right keys [1]: [item_sk#31] +Join type: Inner +Join condition: (rk#29 >= rk#30) + +(44) Project [codegen id : 30] +Output [6]: [item_sk#25, d_date#26, web_sales#27, store_sales#28, web_sales#32, store_sales#33] +Input [9]: [item_sk#25, d_date#26, web_sales#27, store_sales#28, rk#29, item_sk#31, web_sales#32, store_sales#33, rk#30] + +(45) HashAggregate [codegen id : 30] +Input [6]: [item_sk#25, d_date#26, web_sales#27, store_sales#28, web_sales#32, store_sales#33] +Keys [4]: [item_sk#25, d_date#26, web_sales#27, store_sales#28] +Functions [2]: [partial_max(web_sales#32), partial_max(store_sales#33)] +Aggregate Attributes [2]: [max#34, max#35] +Results [6]: [item_sk#25, d_date#26, web_sales#27, store_sales#28, max#36, max#37] + +(46) HashAggregate [codegen id : 30] +Input [6]: [item_sk#25, d_date#26, web_sales#27, store_sales#28, max#36, max#37] +Keys [4]: [item_sk#25, d_date#26, web_sales#27, store_sales#28] +Functions [2]: [max(web_sales#32), max(store_sales#33)] +Aggregate Attributes [2]: [max(web_sales#32)#38, max(store_sales#33)#39] +Results [6]: [item_sk#25, d_date#26, web_sales#27, store_sales#28, max(web_sales#32)#38 AS web_cumulative#40, max(store_sales#33)#39 AS store_cumulative#41] + +(47) Filter [codegen id : 30] +Input [6]: [item_sk#25, d_date#26, web_sales#27, store_sales#28, web_cumulative#40, store_cumulative#41] +Condition : ((isnotnull(web_cumulative#40) AND isnotnull(store_cumulative#41)) AND (web_cumulative#40 > store_cumulative#41)) + +(48) TakeOrderedAndProject +Input [6]: [item_sk#25, d_date#26, web_sales#27, store_sales#28, web_cumulative#40, store_cumulative#41] +Arguments: 100, [item_sk#25 ASC NULLS FIRST, d_date#26 ASC NULLS FIRST], [item_sk#25, d_date#26, web_sales#27, store_sales#28, web_cumulative#40, store_cumulative#41] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..055420a2a4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_datafusion/simplified.txt @@ -0,0 +1,75 @@ +TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] + WholeStageCodegen (30) + Filter [web_cumulative,store_cumulative] + HashAggregate [item_sk,d_date,web_sales,store_sales,max,max] [max(web_sales),max(store_sales),web_cumulative,store_cumulative,max,max] + HashAggregate [item_sk,d_date,web_sales,store_sales,web_sales,store_sales] [max,max,max,max] + Project [item_sk,d_date,web_sales,store_sales,web_sales,store_sales] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + InputAdapter + Window [item_sk,d_date] + WholeStageCodegen (14) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (13) + Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] + Filter [item_sk,item_sk] + SortMergeJoin [item_sk,d_date,item_sk,d_date] + InputAdapter + WholeStageCodegen (6) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen (5) + HashAggregate [item_sk,d_date,sumws,sum,isEmpty] [sum(sumws),cume_sales,sum,isEmpty] + InputAdapter + Exchange [item_sk,d_date,sumws] #3 + WholeStageCodegen (4) + HashAggregate [item_sk,d_date,sumws,sumws] [sum,isEmpty,sum,isEmpty] + Project [item_sk,d_date,sumws,sumws] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + Project [item_sk,d_date,sumws,rk] + InputAdapter + Window [ws_item_sk,d_date] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [item_sk,d_date,sumws,ws_item_sk] + CometExchange [ws_item_sk] #4 + CometHashAggregate [item_sk,d_date,sumws,ws_item_sk,sum,sum(UnscaledValue(ws_sales_price))] + CometExchange [ws_item_sk,d_date] #5 + CometHashAggregate [ws_item_sk,d_date,sum,ws_sales_price] + CometProject [ws_item_sk,ws_sales_price,d_date] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,d_date_sk,d_date] + CometFilter [ws_item_sk,ws_sales_price,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_sales_price,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #6 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Project [item_sk,sumws,rk] + InputAdapter + Window [ws_item_sk,d_date] + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometSort [item_sk,d_date,sumws,ws_item_sk] + ReusedExchange [item_sk,d_date,sumws,ws_item_sk] #4 + InputAdapter + WholeStageCodegen (12) + Sort [item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,cume_sales] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (29) + Project [item_sk,web_sales,store_sales,rk] + InputAdapter + Window [item_sk,d_date] + WholeStageCodegen (28) + Sort [item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,web_sales,store_sales] #1 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..b01d15546a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_iceberg_compat/explain.txt @@ -0,0 +1,398 @@ +== Physical Plan == +TakeOrderedAndProject (72) ++- * Filter (71) + +- * HashAggregate (70) + +- * HashAggregate (69) + +- * Project (68) + +- * BroadcastHashJoin Inner BuildRight (67) + :- Window (61) + : +- * Sort (60) + : +- Exchange (59) + : +- * Project (58) + : +- * Filter (57) + : +- * SortMergeJoin FullOuter (56) + : :- * Sort (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- Exchange (26) + : : +- * HashAggregate (25) + : : +- * Project (24) + : : +- * BroadcastHashJoin Inner BuildRight (23) + : : :- * Project (16) + : : : +- Window (15) + : : : +- * ColumnarToRow (14) + : : : +- CometSort (13) + : : : +- CometExchange (12) + : : : +- CometHashAggregate (11) + : : : +- CometExchange (10) + : : : +- CometHashAggregate (9) + : : : +- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- BroadcastExchange (22) + : : +- * Project (21) + : : +- Window (20) + : : +- * ColumnarToRow (19) + : : +- CometSort (18) + : : +- ReusedExchange (17) + : +- * Sort (55) + : +- Exchange (54) + : +- * HashAggregate (53) + : +- Exchange (52) + : +- * HashAggregate (51) + : +- * Project (50) + : +- * BroadcastHashJoin Inner BuildRight (49) + : :- * Project (42) + : : +- Window (41) + : : +- * ColumnarToRow (40) + : : +- CometSort (39) + : : +- CometExchange (38) + : : +- CometHashAggregate (37) + : : +- CometExchange (36) + : : +- CometHashAggregate (35) + : : +- CometProject (34) + : : +- CometBroadcastHashJoin (33) + : : :- CometFilter (31) + : : : +- CometScan parquet spark_catalog.default.store_sales (30) + : : +- ReusedExchange (32) + : +- BroadcastExchange (48) + : +- * Project (47) + : +- Window (46) + : +- * ColumnarToRow (45) + : +- CometSort (44) + : +- ReusedExchange (43) + +- BroadcastExchange (66) + +- * Project (65) + +- Window (64) + +- * Sort (63) + +- ReusedExchange (62) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Arguments: [d_date_sk#4, d_date#5], [d_date_sk#4, d_date#5] + +(6) CometBroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: [d_date_sk#4, d_date#5] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Right output [2]: [d_date_sk#4, d_date#5] +Arguments: [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [5]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3, d_date_sk#4, d_date#5] +Arguments: [ws_item_sk#1, ws_sales_price#2, d_date#5], [ws_item_sk#1, ws_sales_price#2, d_date#5] + +(9) CometHashAggregate +Input [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#2))] + +(10) CometExchange +Input [3]: [ws_item_sk#1, d_date#5, sum#7] +Arguments: hashpartitioning(ws_item_sk#1, d_date#5, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(11) CometHashAggregate +Input [3]: [ws_item_sk#1, d_date#5, sum#7] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] + +(12) CometExchange +Input [4]: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(13) CometSort +Input [4]: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1] +Arguments: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1], [ws_item_sk#1 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST] + +(14) ColumnarToRow [codegen id : 1] +Input [4]: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1] + +(15) Window +Input [4]: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1] +Arguments: [row_number() windowspecdefinition(ws_item_sk#1, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#10], [ws_item_sk#1], [d_date#5 ASC NULLS FIRST] + +(16) Project [codegen id : 4] +Output [4]: [item_sk#8, d_date#5, sumws#9, rk#10] +Input [5]: [item_sk#8, d_date#5, sumws#9, ws_item_sk#1, rk#10] + +(17) ReusedExchange [Reuses operator id: 12] +Output [4]: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12] + +(18) CometSort +Input [4]: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12] +Arguments: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12], [ws_item_sk#12 ASC NULLS FIRST, d_date#11 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 2] +Input [4]: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12] + +(20) Window +Input [4]: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12] +Arguments: [row_number() windowspecdefinition(ws_item_sk#12, d_date#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#13], [ws_item_sk#12], [d_date#11 ASC NULLS FIRST] + +(21) Project [codegen id : 3] +Output [3]: [item_sk#8 AS item_sk#14, sumws#9 AS sumws#15, rk#13] +Input [5]: [item_sk#8, d_date#11, sumws#9, ws_item_sk#12, rk#13] + +(22) BroadcastExchange +Input [3]: [item_sk#14, sumws#15, rk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [item_sk#8] +Right keys [1]: [item_sk#14] +Join type: Inner +Join condition: (rk#10 >= rk#13) + +(24) Project [codegen id : 4] +Output [4]: [item_sk#8, d_date#5, sumws#9, sumws#15] +Input [7]: [item_sk#8, d_date#5, sumws#9, rk#10, item_sk#14, sumws#15, rk#13] + +(25) HashAggregate [codegen id : 4] +Input [4]: [item_sk#8, d_date#5, sumws#9, sumws#15] +Keys [3]: [item_sk#8, d_date#5, sumws#9] +Functions [1]: [partial_sum(sumws#15)] +Aggregate Attributes [2]: [sum#16, isEmpty#17] +Results [5]: [item_sk#8, d_date#5, sumws#9, sum#18, isEmpty#19] + +(26) Exchange +Input [5]: [item_sk#8, d_date#5, sumws#9, sum#18, isEmpty#19] +Arguments: hashpartitioning(item_sk#8, d_date#5, sumws#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 5] +Input [5]: [item_sk#8, d_date#5, sumws#9, sum#18, isEmpty#19] +Keys [3]: [item_sk#8, d_date#5, sumws#9] +Functions [1]: [sum(sumws#15)] +Aggregate Attributes [1]: [sum(sumws#15)#20] +Results [3]: [item_sk#8, d_date#5, sum(sumws#15)#20 AS cume_sales#21] + +(28) Exchange +Input [3]: [item_sk#8, d_date#5, cume_sales#21] +Arguments: hashpartitioning(item_sk#8, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) Sort [codegen id : 6] +Input [3]: [item_sk#8, d_date#5, cume_sales#21] +Arguments: [item_sk#8 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(30) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#22, ss_sales_price#23, ss_sold_date_sk#24] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#24)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(31) CometFilter +Input [3]: [ss_item_sk#22, ss_sales_price#23, ss_sold_date_sk#24] +Condition : isnotnull(ss_item_sk#22) + +(32) ReusedExchange [Reuses operator id: 6] +Output [2]: [d_date_sk#25, d_date#26] + +(33) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#22, ss_sales_price#23, ss_sold_date_sk#24] +Right output [2]: [d_date_sk#25, d_date#26] +Arguments: [ss_sold_date_sk#24], [d_date_sk#25], Inner, BuildRight + +(34) CometProject +Input [5]: [ss_item_sk#22, ss_sales_price#23, ss_sold_date_sk#24, d_date_sk#25, d_date#26] +Arguments: [ss_item_sk#22, ss_sales_price#23, d_date#26], [ss_item_sk#22, ss_sales_price#23, d_date#26] + +(35) CometHashAggregate +Input [3]: [ss_item_sk#22, ss_sales_price#23, d_date#26] +Keys [2]: [ss_item_sk#22, d_date#26] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#23))] + +(36) CometExchange +Input [3]: [ss_item_sk#22, d_date#26, sum#27] +Arguments: hashpartitioning(ss_item_sk#22, d_date#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(37) CometHashAggregate +Input [3]: [ss_item_sk#22, d_date#26, sum#27] +Keys [2]: [ss_item_sk#22, d_date#26] +Functions [1]: [sum(UnscaledValue(ss_sales_price#23))] + +(38) CometExchange +Input [4]: [item_sk#28, d_date#26, sumss#29, ss_item_sk#22] +Arguments: hashpartitioning(ss_item_sk#22, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(39) CometSort +Input [4]: [item_sk#28, d_date#26, sumss#29, ss_item_sk#22] +Arguments: [item_sk#28, d_date#26, sumss#29, ss_item_sk#22], [ss_item_sk#22 ASC NULLS FIRST, d_date#26 ASC NULLS FIRST] + +(40) ColumnarToRow [codegen id : 7] +Input [4]: [item_sk#28, d_date#26, sumss#29, ss_item_sk#22] + +(41) Window +Input [4]: [item_sk#28, d_date#26, sumss#29, ss_item_sk#22] +Arguments: [row_number() windowspecdefinition(ss_item_sk#22, d_date#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#30], [ss_item_sk#22], [d_date#26 ASC NULLS FIRST] + +(42) Project [codegen id : 10] +Output [4]: [item_sk#28, d_date#26, sumss#29, rk#30] +Input [5]: [item_sk#28, d_date#26, sumss#29, ss_item_sk#22, rk#30] + +(43) ReusedExchange [Reuses operator id: 38] +Output [4]: [item_sk#28, d_date#31, sumss#29, ss_item_sk#32] + +(44) CometSort +Input [4]: [item_sk#28, d_date#31, sumss#29, ss_item_sk#32] +Arguments: [item_sk#28, d_date#31, sumss#29, ss_item_sk#32], [ss_item_sk#32 ASC NULLS FIRST, d_date#31 ASC NULLS FIRST] + +(45) ColumnarToRow [codegen id : 8] +Input [4]: [item_sk#28, d_date#31, sumss#29, ss_item_sk#32] + +(46) Window +Input [4]: [item_sk#28, d_date#31, sumss#29, ss_item_sk#32] +Arguments: [row_number() windowspecdefinition(ss_item_sk#32, d_date#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#33], [ss_item_sk#32], [d_date#31 ASC NULLS FIRST] + +(47) Project [codegen id : 9] +Output [3]: [item_sk#28 AS item_sk#34, sumss#29 AS sumss#35, rk#33] +Input [5]: [item_sk#28, d_date#31, sumss#29, ss_item_sk#32, rk#33] + +(48) BroadcastExchange +Input [3]: [item_sk#34, sumss#35, rk#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(49) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [item_sk#28] +Right keys [1]: [item_sk#34] +Join type: Inner +Join condition: (rk#30 >= rk#33) + +(50) Project [codegen id : 10] +Output [4]: [item_sk#28, d_date#26, sumss#29, sumss#35] +Input [7]: [item_sk#28, d_date#26, sumss#29, rk#30, item_sk#34, sumss#35, rk#33] + +(51) HashAggregate [codegen id : 10] +Input [4]: [item_sk#28, d_date#26, sumss#29, sumss#35] +Keys [3]: [item_sk#28, d_date#26, sumss#29] +Functions [1]: [partial_sum(sumss#35)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [5]: [item_sk#28, d_date#26, sumss#29, sum#38, isEmpty#39] + +(52) Exchange +Input [5]: [item_sk#28, d_date#26, sumss#29, sum#38, isEmpty#39] +Arguments: hashpartitioning(item_sk#28, d_date#26, sumss#29, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(53) HashAggregate [codegen id : 11] +Input [5]: [item_sk#28, d_date#26, sumss#29, sum#38, isEmpty#39] +Keys [3]: [item_sk#28, d_date#26, sumss#29] +Functions [1]: [sum(sumss#35)] +Aggregate Attributes [1]: [sum(sumss#35)#40] +Results [3]: [item_sk#28, d_date#26, sum(sumss#35)#40 AS cume_sales#41] + +(54) Exchange +Input [3]: [item_sk#28, d_date#26, cume_sales#41] +Arguments: hashpartitioning(item_sk#28, d_date#26, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(55) Sort [codegen id : 12] +Input [3]: [item_sk#28, d_date#26, cume_sales#41] +Arguments: [item_sk#28 ASC NULLS FIRST, d_date#26 ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin [codegen id : 13] +Left keys [2]: [item_sk#8, d_date#5] +Right keys [2]: [item_sk#28, d_date#26] +Join type: FullOuter +Join condition: None + +(57) Filter [codegen id : 13] +Input [6]: [item_sk#8, d_date#5, cume_sales#21, item_sk#28, d_date#26, cume_sales#41] +Condition : isnotnull(CASE WHEN isnotnull(item_sk#8) THEN item_sk#8 ELSE item_sk#28 END) + +(58) Project [codegen id : 13] +Output [4]: [CASE WHEN isnotnull(item_sk#8) THEN item_sk#8 ELSE item_sk#28 END AS item_sk#42, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#26 END AS d_date#43, cume_sales#21 AS web_sales#44, cume_sales#41 AS store_sales#45] +Input [6]: [item_sk#8, d_date#5, cume_sales#21, item_sk#28, d_date#26, cume_sales#41] + +(59) Exchange +Input [4]: [item_sk#42, d_date#43, web_sales#44, store_sales#45] +Arguments: hashpartitioning(item_sk#42, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(60) Sort [codegen id : 14] +Input [4]: [item_sk#42, d_date#43, web_sales#44, store_sales#45] +Arguments: [item_sk#42 ASC NULLS FIRST, d_date#43 ASC NULLS FIRST], false, 0 + +(61) Window +Input [4]: [item_sk#42, d_date#43, web_sales#44, store_sales#45] +Arguments: [row_number() windowspecdefinition(item_sk#42, d_date#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#46], [item_sk#42], [d_date#43 ASC NULLS FIRST] + +(62) ReusedExchange [Reuses operator id: 59] +Output [4]: [item_sk#42, d_date#43, web_sales#44, store_sales#45] + +(63) Sort [codegen id : 28] +Input [4]: [item_sk#42, d_date#43, web_sales#44, store_sales#45] +Arguments: [item_sk#42 ASC NULLS FIRST, d_date#43 ASC NULLS FIRST], false, 0 + +(64) Window +Input [4]: [item_sk#42, d_date#43, web_sales#44, store_sales#45] +Arguments: [row_number() windowspecdefinition(item_sk#42, d_date#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#47], [item_sk#42], [d_date#43 ASC NULLS FIRST] + +(65) Project [codegen id : 29] +Output [4]: [item_sk#42 AS item_sk#48, web_sales#44 AS web_sales#49, store_sales#45 AS store_sales#50, rk#47] +Input [5]: [item_sk#42, d_date#43, web_sales#44, store_sales#45, rk#47] + +(66) BroadcastExchange +Input [4]: [item_sk#48, web_sales#49, store_sales#50, rk#47] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(67) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [item_sk#42] +Right keys [1]: [item_sk#48] +Join type: Inner +Join condition: (rk#46 >= rk#47) + +(68) Project [codegen id : 30] +Output [6]: [item_sk#42, d_date#43, web_sales#44, store_sales#45, web_sales#49, store_sales#50] +Input [9]: [item_sk#42, d_date#43, web_sales#44, store_sales#45, rk#46, item_sk#48, web_sales#49, store_sales#50, rk#47] + +(69) HashAggregate [codegen id : 30] +Input [6]: [item_sk#42, d_date#43, web_sales#44, store_sales#45, web_sales#49, store_sales#50] +Keys [4]: [item_sk#42, d_date#43, web_sales#44, store_sales#45] +Functions [2]: [partial_max(web_sales#49), partial_max(store_sales#50)] +Aggregate Attributes [2]: [max#51, max#52] +Results [6]: [item_sk#42, d_date#43, web_sales#44, store_sales#45, max#53, max#54] + +(70) HashAggregate [codegen id : 30] +Input [6]: [item_sk#42, d_date#43, web_sales#44, store_sales#45, max#53, max#54] +Keys [4]: [item_sk#42, d_date#43, web_sales#44, store_sales#45] +Functions [2]: [max(web_sales#49), max(store_sales#50)] +Aggregate Attributes [2]: [max(web_sales#49)#55, max(store_sales#50)#56] +Results [6]: [item_sk#42, d_date#43, web_sales#44, store_sales#45, max(web_sales#49)#55 AS web_cumulative#57, max(store_sales#50)#56 AS store_cumulative#58] + +(71) Filter [codegen id : 30] +Input [6]: [item_sk#42, d_date#43, web_sales#44, store_sales#45, web_cumulative#57, store_cumulative#58] +Condition : ((isnotnull(web_cumulative#57) AND isnotnull(store_cumulative#58)) AND (web_cumulative#57 > store_cumulative#58)) + +(72) TakeOrderedAndProject +Input [6]: [item_sk#42, d_date#43, web_sales#44, store_sales#45, web_cumulative#57, store_cumulative#58] +Arguments: 100, [item_sk#42 ASC NULLS FIRST, d_date#43 ASC NULLS FIRST], [item_sk#42, d_date#43, web_sales#44, store_sales#45, web_cumulative#57, store_cumulative#58] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8a7f0aa1ed --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q51a.native_iceberg_compat/simplified.txt @@ -0,0 +1,110 @@ +TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] + WholeStageCodegen (30) + Filter [web_cumulative,store_cumulative] + HashAggregate [item_sk,d_date,web_sales,store_sales,max,max] [max(web_sales),max(store_sales),web_cumulative,store_cumulative,max,max] + HashAggregate [item_sk,d_date,web_sales,store_sales,web_sales,store_sales] [max,max,max,max] + Project [item_sk,d_date,web_sales,store_sales,web_sales,store_sales] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + InputAdapter + Window [item_sk,d_date] + WholeStageCodegen (14) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (13) + Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] + Filter [item_sk,item_sk] + SortMergeJoin [item_sk,d_date,item_sk,d_date] + InputAdapter + WholeStageCodegen (6) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen (5) + HashAggregate [item_sk,d_date,sumws,sum,isEmpty] [sum(sumws),cume_sales,sum,isEmpty] + InputAdapter + Exchange [item_sk,d_date,sumws] #3 + WholeStageCodegen (4) + HashAggregate [item_sk,d_date,sumws,sumws] [sum,isEmpty,sum,isEmpty] + Project [item_sk,d_date,sumws,sumws] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + Project [item_sk,d_date,sumws,rk] + InputAdapter + Window [ws_item_sk,d_date] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [item_sk,d_date,sumws,ws_item_sk] + CometExchange [ws_item_sk] #4 + CometHashAggregate [item_sk,d_date,sumws,ws_item_sk,sum,sum(UnscaledValue(ws_sales_price))] + CometExchange [ws_item_sk,d_date] #5 + CometHashAggregate [ws_item_sk,d_date,sum,ws_sales_price] + CometProject [ws_item_sk,ws_sales_price,d_date] + CometBroadcastHashJoin [ws_item_sk,ws_sales_price,ws_sold_date_sk,d_date_sk,d_date] + CometFilter [ws_item_sk,ws_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_date] #6 + CometProject [d_date_sk,d_date] + CometFilter [d_date_sk,d_date,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (3) + Project [item_sk,sumws,rk] + InputAdapter + Window [ws_item_sk,d_date] + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometSort [item_sk,d_date,sumws,ws_item_sk] + ReusedExchange [item_sk,d_date,sumws,ws_item_sk] #4 + InputAdapter + WholeStageCodegen (12) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #8 + WholeStageCodegen (11) + HashAggregate [item_sk,d_date,sumss,sum,isEmpty] [sum(sumss),cume_sales,sum,isEmpty] + InputAdapter + Exchange [item_sk,d_date,sumss] #9 + WholeStageCodegen (10) + HashAggregate [item_sk,d_date,sumss,sumss] [sum,isEmpty,sum,isEmpty] + Project [item_sk,d_date,sumss,sumss] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + Project [item_sk,d_date,sumss,rk] + InputAdapter + Window [ss_item_sk,d_date] + WholeStageCodegen (7) + ColumnarToRow + InputAdapter + CometSort [item_sk,d_date,sumss,ss_item_sk] + CometExchange [ss_item_sk] #10 + CometHashAggregate [item_sk,d_date,sumss,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] + CometExchange [ss_item_sk,d_date] #11 + CometHashAggregate [ss_item_sk,d_date,sum,ss_sales_price] + CometProject [ss_item_sk,ss_sales_price,d_date] + CometBroadcastHashJoin [ss_item_sk,ss_sales_price,ss_sold_date_sk,d_date_sk,d_date] + CometFilter [ss_item_sk,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] + ReusedExchange [d_date_sk,d_date] #6 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (9) + Project [item_sk,sumss,rk] + InputAdapter + Window [ss_item_sk,d_date] + WholeStageCodegen (8) + ColumnarToRow + InputAdapter + CometSort [item_sk,d_date,sumss,ss_item_sk] + ReusedExchange [item_sk,d_date,sumss,ss_item_sk] #10 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (29) + Project [item_sk,web_sales,store_sales,rk] + InputAdapter + Window [item_sk,d_date] + WholeStageCodegen (28) + Sort [item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,web_sales,store_sales] #1 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_datafusion/explain.txt new file mode 100644 index 0000000000..674d4c6f7b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_datafusion/explain.txt @@ -0,0 +1,243 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (28) + : : +- * Filter (27) + : : +- Window (26) + : : +- * Filter (25) + : : +- Window (24) + : : +- * ColumnarToRow (23) + : : +- CometSort (22) + : : +- CometExchange (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`call_center` (13) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) + + +(1) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Arguments: [i_item_sk#1, i_brand#2, i_category#3] + +(2) CometFilter +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(4) CometFilter +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) + +(5) CometBroadcastExchange +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Right output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [i_item_sk#1], [cs_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7], [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(9) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(11) CometBroadcastHashJoin +Left output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] +Right output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [cs_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] +Arguments: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10], [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] + +(13) CometNativeScan: `spark_catalog`.`default`.`call_center` +Output [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cc_call_center_sk#11, cc_name#12] + +(14) CometFilter +Input [2]: [cc_call_center_sk#11, cc_name#12] +Condition : (isnotnull(cc_call_center_sk#11) AND isnotnull(cc_name#12)) + +(15) CometBroadcastExchange +Input [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cc_call_center_sk#11, cc_name#12] + +(16) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] +Right output [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cs_call_center_sk#4], [cc_call_center_sk#11], Inner, BuildRight + +(17) CometProject +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10, cc_call_center_sk#11, cc_name#12] +Arguments: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12], [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] + +(18) CometHashAggregate +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] + +(19) CometExchange +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#13] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(20) CometHashAggregate +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#13] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] + +(21) CometExchange +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(22) CometSort +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15], [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(23) ColumnarToRow [codegen id : 1] +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] + +(24) Window +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#16], [i_category#3, i_brand#2, cc_name#12], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(25) Filter [codegen id : 2] +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(26) Window +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16] +Arguments: [avg(_w0#15) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#17], [i_category#3, i_brand#2, cc_name#12, d_year#9] + +(27) Filter [codegen id : 7] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16, avg_monthly_sales#17] +Condition : ((isnotnull(avg_monthly_sales#17) AND (avg_monthly_sales#17 > 0.000000)) AND CASE WHEN (avg_monthly_sales#17 > 0.000000) THEN ((abs((sum_sales#14 - avg_monthly_sales#17)) / avg_monthly_sales#17) > 0.1000000000000000) END) + +(28) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16, avg_monthly_sales#17] + +(29) ReusedExchange [Reuses operator id: 19] +Output [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum#23] + +(30) CometHashAggregate +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum#23] +Keys [5]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22] +Functions [1]: [sum(UnscaledValue(cs_sales_price#24))] + +(31) CometExchange +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: hashpartitioning(i_category#18, i_brand#19, cc_name#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14], [i_category#18 ASC NULLS FIRST, i_brand#19 ASC NULLS FIRST, cc_name#20 ASC NULLS FIRST, d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST] + +(33) ColumnarToRow [codegen id : 3] +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] + +(34) Window +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: [rank(d_year#21, d_moy#22) windowspecdefinition(i_category#18, i_brand#19, cc_name#20, d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#25], [i_category#18, i_brand#19, cc_name#20], [d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST] + +(35) Project [codegen id : 4] +Output [5]: [i_category#18, i_brand#19, cc_name#20, sum_sales#14 AS sum_sales#26, rn#25] +Input [7]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14, rn#25] + +(36) BroadcastExchange +Input [5]: [i_category#18, i_brand#19, cc_name#20, sum_sales#26, rn#25] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=4] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#16] +Right keys [4]: [i_category#18, i_brand#19, cc_name#20, (rn#25 + 1)] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, sum_sales#26] +Input [13]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, i_category#18, i_brand#19, cc_name#20, sum_sales#26, rn#25] + +(39) ReusedExchange [Reuses operator id: 31] +Output [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] + +(40) CometSort +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] +Arguments: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14], [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] + +(41) ColumnarToRow [codegen id : 5] +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] + +(42) Window +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] +Arguments: [rank(d_year#30, d_moy#31) windowspecdefinition(i_category#27, i_brand#28, cc_name#29, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#32], [i_category#27, i_brand#28, cc_name#29], [d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] + +(43) Project [codegen id : 6] +Output [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#14 AS sum_sales#33, rn#32] +Input [7]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14, rn#32] + +(44) BroadcastExchange +Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#33, rn#32] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 7] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#16] +Right keys [4]: [i_category#27, i_brand#28, cc_name#29, (rn#32 - 1)] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, sum_sales#26 AS psum#34, sum_sales#33 AS nsum#35] +Input [14]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, sum_sales#26, i_category#27, i_brand#28, cc_name#29, sum_sales#33, rn#32] + +(47) TakeOrderedAndProject +Input [8]: [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, psum#34, nsum#35] +Arguments: 100, [(sum_sales#14 - avg_monthly_sales#17) ASC NULLS FIRST, d_year#9 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, psum#34, nsum#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b840a4a743 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_datafusion/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_moy,psum,nsum] + WholeStageCodegen (7) + Project [i_category,i_brand,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,cc_name,d_year] + WholeStageCodegen (2) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,_w0] + CometExchange [i_category,i_brand,cc_name] #1 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,_w0,sum,sum(UnscaledValue(cs_sales_price))] + CometExchange [i_category,i_brand,cc_name,d_year,d_moy] #2 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum,cs_sales_price] + CometProject [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] + CometBroadcastHashJoin [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy,cc_call_center_sk,cc_name] + CometProject [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] + CometBroadcastHashJoin [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk,d_date_sk,d_year,d_moy] + CometProject [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_category,cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_category] + CometBroadcastExchange [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] #3 + CometFilter [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year,d_moy] + CometBroadcastExchange [cc_call_center_sk,cc_name] #5 + CometFilter [cc_call_center_sk,cc_name] + CometNativeScan: `spark_catalog`.`default`.`call_center` [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] + CometExchange [i_category,i_brand,cc_name] #7 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(cs_sales_price))] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..8fc6256fe1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_iceberg_compat/explain.txt @@ -0,0 +1,256 @@ +== Physical Plan == +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (28) + : : +- * Filter (27) + : : +- Window (26) + : : +- * Filter (25) + : : +- Window (24) + : : +- * ColumnarToRow (23) + : : +- CometSort (22) + : : +- CometExchange (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.item (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometScan parquet spark_catalog.default.call_center (13) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) + + +(1) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(3) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#7)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) + +(5) CometBroadcastExchange +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Right output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [i_item_sk#1], [cs_item_sk#5], Inner, BuildRight + +(7) CometProject +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7], [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [d_date_sk#8, d_year#9, d_moy#10] + +(11) CometBroadcastHashJoin +Left output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] +Right output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: [cs_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] +Arguments: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10], [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] + +(13) CometScan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#11, cc_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [cc_call_center_sk#11, cc_name#12] +Condition : (isnotnull(cc_call_center_sk#11) AND isnotnull(cc_name#12)) + +(15) CometBroadcastExchange +Input [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cc_call_center_sk#11, cc_name#12] + +(16) CometBroadcastHashJoin +Left output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] +Right output [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: [cs_call_center_sk#4], [cc_call_center_sk#11], Inner, BuildRight + +(17) CometProject +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10, cc_call_center_sk#11, cc_name#12] +Arguments: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12], [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] + +(18) CometHashAggregate +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] + +(19) CometExchange +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#13] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(20) CometHashAggregate +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#13] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] + +(21) CometExchange +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(22) CometSort +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15], [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(23) ColumnarToRow [codegen id : 1] +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] + +(24) Window +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#16], [i_category#3, i_brand#2, cc_name#12], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(25) Filter [codegen id : 2] +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(26) Window +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16] +Arguments: [avg(_w0#15) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#17], [i_category#3, i_brand#2, cc_name#12, d_year#9] + +(27) Filter [codegen id : 7] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16, avg_monthly_sales#17] +Condition : ((isnotnull(avg_monthly_sales#17) AND (avg_monthly_sales#17 > 0.000000)) AND CASE WHEN (avg_monthly_sales#17 > 0.000000) THEN ((abs((sum_sales#14 - avg_monthly_sales#17)) / avg_monthly_sales#17) > 0.1000000000000000) END) + +(28) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, _w0#15, rn#16, avg_monthly_sales#17] + +(29) ReusedExchange [Reuses operator id: 19] +Output [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum#23] + +(30) CometHashAggregate +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum#23] +Keys [5]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22] +Functions [1]: [sum(UnscaledValue(cs_sales_price#24))] + +(31) CometExchange +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: hashpartitioning(i_category#18, i_brand#19, cc_name#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14], [i_category#18 ASC NULLS FIRST, i_brand#19 ASC NULLS FIRST, cc_name#20 ASC NULLS FIRST, d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST] + +(33) ColumnarToRow [codegen id : 3] +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] + +(34) Window +Input [6]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14] +Arguments: [rank(d_year#21, d_moy#22) windowspecdefinition(i_category#18, i_brand#19, cc_name#20, d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#25], [i_category#18, i_brand#19, cc_name#20], [d_year#21 ASC NULLS FIRST, d_moy#22 ASC NULLS FIRST] + +(35) Project [codegen id : 4] +Output [5]: [i_category#18, i_brand#19, cc_name#20, sum_sales#14 AS sum_sales#26, rn#25] +Input [7]: [i_category#18, i_brand#19, cc_name#20, d_year#21, d_moy#22, sum_sales#14, rn#25] + +(36) BroadcastExchange +Input [5]: [i_category#18, i_brand#19, cc_name#20, sum_sales#26, rn#25] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=4] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#16] +Right keys [4]: [i_category#18, i_brand#19, cc_name#20, (rn#25 + 1)] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, sum_sales#26] +Input [13]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, i_category#18, i_brand#19, cc_name#20, sum_sales#26, rn#25] + +(39) ReusedExchange [Reuses operator id: 31] +Output [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] + +(40) CometSort +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] +Arguments: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14], [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] + +(41) ColumnarToRow [codegen id : 5] +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] + +(42) Window +Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14] +Arguments: [rank(d_year#30, d_moy#31) windowspecdefinition(i_category#27, i_brand#28, cc_name#29, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#32], [i_category#27, i_brand#28, cc_name#29], [d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] + +(43) Project [codegen id : 6] +Output [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#14 AS sum_sales#33, rn#32] +Input [7]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#14, rn#32] + +(44) BroadcastExchange +Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#33, rn#32] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=5] + +(45) BroadcastHashJoin [codegen id : 7] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#16] +Right keys [4]: [i_category#27, i_brand#28, cc_name#29, (rn#32 - 1)] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, sum_sales#26 AS psum#34, sum_sales#33 AS nsum#35] +Input [14]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#14, avg_monthly_sales#17, rn#16, sum_sales#26, i_category#27, i_brand#28, cc_name#29, sum_sales#33, rn#32] + +(47) TakeOrderedAndProject +Input [8]: [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, psum#34, nsum#35] +Arguments: 100, [(sum_sales#14 - avg_monthly_sales#17) ASC NULLS FIRST, d_year#9 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#17, sum_sales#14, psum#34, nsum#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..3d8e8b5417 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57.native_iceberg_compat/simplified.txt @@ -0,0 +1,63 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_moy,psum,nsum] + WholeStageCodegen (7) + Project [i_category,i_brand,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,cc_name,d_year] + WholeStageCodegen (2) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,_w0] + CometExchange [i_category,i_brand,cc_name] #1 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,_w0,sum,sum(UnscaledValue(cs_sales_price))] + CometExchange [i_category,i_brand,cc_name,d_year,d_moy] #2 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum,cs_sales_price] + CometProject [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] + CometBroadcastHashJoin [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy,cc_call_center_sk,cc_name] + CometProject [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] + CometBroadcastHashJoin [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk,d_date_sk,d_year,d_moy] + CometProject [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastHashJoin [i_item_sk,i_brand,i_category,cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometFilter [i_item_sk,i_brand,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + CometBroadcastExchange [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] #3 + CometFilter [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy] #4 + CometFilter [d_date_sk,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + CometBroadcastExchange [cc_call_center_sk,cc_name] #5 + CometFilter [cc_call_center_sk,cc_name] + CometScan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] + CometExchange [i_category,i_brand,cc_name] #7 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(cs_sales_price))] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (5) + ColumnarToRow + InputAdapter + CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_datafusion/explain.txt new file mode 100644 index 0000000000..498f1c3292 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_datafusion/explain.txt @@ -0,0 +1,362 @@ +== Physical Plan == +TakeOrderedAndProject (65) ++- * HashAggregate (64) + +- Exchange (63) + +- * HashAggregate (62) + +- Union (61) + :- * HashAggregate (50) + : +- Exchange (49) + : +- * HashAggregate (48) + : +- Union (47) + : :- * HashAggregate (22) + : : +- * ColumnarToRow (21) + : : +- CometExchange (20) + : : +- CometHashAggregate (19) + : : +- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometUnion (7) + : : : : :- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (4) + : : : +- CometBroadcastExchange (11) + : : : +- CometProject (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (16) + : : +- CometFilter (15) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (14) + : :- * HashAggregate (25) + : : +- * ColumnarToRow (24) + : : +- ReusedExchange (23) + : +- * HashAggregate (46) + : +- * ColumnarToRow (45) + : +- CometExchange (44) + : +- CometHashAggregate (43) + : +- CometProject (42) + : +- CometBroadcastHashJoin (41) + : :- CometProject (39) + : : +- CometBroadcastHashJoin (38) + : : :- CometUnion (36) + : : : :- CometProject (28) + : : : : +- CometFilter (27) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (26) + : : : +- CometProject (35) + : : : +- CometBroadcastHashJoin (34) + : : : :- CometBroadcastExchange (30) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`web_returns` (29) + : : : +- CometProject (33) + : : : +- CometFilter (32) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (31) + : : +- ReusedExchange (37) + : +- ReusedExchange (40) + :- * HashAggregate (55) + : +- Exchange (54) + : +- * HashAggregate (53) + : +- * HashAggregate (52) + : +- ReusedExchange (51) + +- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- * HashAggregate (57) + +- ReusedExchange (56) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(3) CometProject +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10], [ss_store_sk#1 AS store_sk#5, ss_sold_date_sk#4 AS date_sk#6, ss_ext_sales_price#2 AS sales_price#7, ss_net_profit#3 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Arguments: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(5) CometFilter +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#11) + +(6) CometProject +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Arguments: [store_sk#15, date_sk#16, sales_price#17, profit#18, return_amt#19, net_loss#20], [sr_store_sk#11 AS store_sk#15, sr_returned_date_sk#14 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#12 AS return_amt#19, sr_net_loss#13 AS net_loss#20] + +(7) CometUnion +Child 0 Input [6]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10] +Child 1 Input [6]: [store_sk#15, date_sk#16, sales_price#17, profit#18, return_amt#19, net_loss#20] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#21, d_date#22] +Arguments: [d_date_sk#21, d_date#22] + +(9) CometFilter +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 1998-08-04)) AND (d_date#22 <= 1998-08-18)) AND isnotnull(d_date_sk#21)) + +(10) CometProject +Input [2]: [d_date_sk#21, d_date#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(12) CometBroadcastHashJoin +Left output [6]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10] +Right output [1]: [d_date_sk#21] +Arguments: [date_sk#6], [d_date_sk#21], Inner, BuildRight + +(13) CometProject +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] +Arguments: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10], [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] + +(14) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#23, s_store_id#24] +Arguments: [s_store_sk#23, s_store_id#24] + +(15) CometFilter +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(16) CometBroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: [s_store_sk#23, s_store_id#24] + +(17) CometBroadcastHashJoin +Left output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Right output [2]: [s_store_sk#23, s_store_id#24] +Arguments: [store_sk#5], [s_store_sk#23], Inner, BuildRight + +(18) CometProject +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#23, s_store_id#24] +Arguments: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24], [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] + +(19) CometHashAggregate +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] + +(20) CometExchange +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) ColumnarToRow [codegen id : 1] +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] + +(22) HashAggregate [codegen id : 1] +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] +Keys [1]: [s_store_id#24] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#29, sum(UnscaledValue(return_amt#9))#30, sum(UnscaledValue(profit#8))#31, sum(UnscaledValue(net_loss#10))#32] +Results [5]: [store channel AS channel#33, concat(store, s_store_id#24) AS id#34, MakeDecimal(sum(UnscaledValue(sales_price#7))#29,17,2) AS sales#35, MakeDecimal(sum(UnscaledValue(return_amt#9))#30,17,2) AS returns#36, (MakeDecimal(sum(UnscaledValue(profit#8))#31,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#10))#32,17,2)) AS profit#37] + +(23) ReusedExchange [Reuses operator id: 20] +Output [5]: [cp_catalog_page_id#38, sum#39, sum#40, sum#41, sum#42] + +(24) ColumnarToRow [codegen id : 2] +Input [5]: [cp_catalog_page_id#38, sum#39, sum#40, sum#41, sum#42] + +(25) HashAggregate [codegen id : 2] +Input [5]: [cp_catalog_page_id#38, sum#39, sum#40, sum#41, sum#42] +Keys [1]: [cp_catalog_page_id#38] +Functions [4]: [sum(UnscaledValue(sales_price#43)), sum(UnscaledValue(return_amt#44)), sum(UnscaledValue(profit#45)), sum(UnscaledValue(net_loss#46))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#43))#47, sum(UnscaledValue(return_amt#44))#48, sum(UnscaledValue(profit#45))#49, sum(UnscaledValue(net_loss#46))#50] +Results [5]: [catalog channel AS channel#51, concat(catalog_page, cp_catalog_page_id#38) AS id#52, MakeDecimal(sum(UnscaledValue(sales_price#43))#47,17,2) AS sales#53, MakeDecimal(sum(UnscaledValue(return_amt#44))#48,17,2) AS returns#54, (MakeDecimal(sum(UnscaledValue(profit#45))#49,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#46))#50,17,2)) AS profit#55] + +(26) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_web_site_sk#56, ws_ext_sales_price#57, ws_net_profit#58, ws_sold_date_sk#59] +Arguments: [ws_web_site_sk#56, ws_ext_sales_price#57, ws_net_profit#58, ws_sold_date_sk#59] + +(27) CometFilter +Input [4]: [ws_web_site_sk#56, ws_ext_sales_price#57, ws_net_profit#58, ws_sold_date_sk#59] +Condition : isnotnull(ws_web_site_sk#56) + +(28) CometProject +Input [4]: [ws_web_site_sk#56, ws_ext_sales_price#57, ws_net_profit#58, ws_sold_date_sk#59] +Arguments: [wsr_web_site_sk#60, date_sk#61, sales_price#62, profit#63, return_amt#64, net_loss#65], [ws_web_site_sk#56 AS wsr_web_site_sk#60, ws_sold_date_sk#59 AS date_sk#61, ws_ext_sales_price#57 AS sales_price#62, ws_net_profit#58 AS profit#63, 0.00 AS return_amt#64, 0.00 AS net_loss#65] + +(29) CometNativeScan: `spark_catalog`.`default`.`web_returns` +Output [5]: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70] +Arguments: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70] + +(30) CometBroadcastExchange +Input [5]: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70] +Arguments: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70] + +(31) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [4]: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73, ws_sold_date_sk#74] +Arguments: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73, ws_sold_date_sk#74] + +(32) CometFilter +Input [4]: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73, ws_sold_date_sk#74] +Condition : ((isnotnull(ws_item_sk#71) AND isnotnull(ws_order_number#73)) AND isnotnull(ws_web_site_sk#72)) + +(33) CometProject +Input [4]: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73, ws_sold_date_sk#74] +Arguments: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73], [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73] + +(34) CometBroadcastHashJoin +Left output [5]: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70] +Right output [3]: [ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73] +Arguments: [wr_item_sk#66, wr_order_number#67], [ws_item_sk#71, ws_order_number#73], Inner, BuildLeft + +(35) CometProject +Input [8]: [wr_item_sk#66, wr_order_number#67, wr_return_amt#68, wr_net_loss#69, wr_returned_date_sk#70, ws_item_sk#71, ws_web_site_sk#72, ws_order_number#73] +Arguments: [wsr_web_site_sk#75, date_sk#76, sales_price#77, profit#78, return_amt#79, net_loss#80], [ws_web_site_sk#72 AS wsr_web_site_sk#75, wr_returned_date_sk#70 AS date_sk#76, 0.00 AS sales_price#77, 0.00 AS profit#78, wr_return_amt#68 AS return_amt#79, wr_net_loss#69 AS net_loss#80] + +(36) CometUnion +Child 0 Input [6]: [wsr_web_site_sk#60, date_sk#61, sales_price#62, profit#63, return_amt#64, net_loss#65] +Child 1 Input [6]: [wsr_web_site_sk#75, date_sk#76, sales_price#77, profit#78, return_amt#79, net_loss#80] + +(37) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#81] + +(38) CometBroadcastHashJoin +Left output [6]: [wsr_web_site_sk#60, date_sk#61, sales_price#62, profit#63, return_amt#64, net_loss#65] +Right output [1]: [d_date_sk#81] +Arguments: [date_sk#61], [d_date_sk#81], Inner, BuildRight + +(39) CometProject +Input [7]: [wsr_web_site_sk#60, date_sk#61, sales_price#62, profit#63, return_amt#64, net_loss#65, d_date_sk#81] +Arguments: [wsr_web_site_sk#60, sales_price#62, profit#63, return_amt#64, net_loss#65], [wsr_web_site_sk#60, sales_price#62, profit#63, return_amt#64, net_loss#65] + +(40) ReusedExchange [Reuses operator id: 16] +Output [2]: [web_site_sk#82, web_site_id#83] + +(41) CometBroadcastHashJoin +Left output [5]: [wsr_web_site_sk#60, sales_price#62, profit#63, return_amt#64, net_loss#65] +Right output [2]: [web_site_sk#82, web_site_id#83] +Arguments: [wsr_web_site_sk#60], [web_site_sk#82], Inner, BuildRight + +(42) CometProject +Input [7]: [wsr_web_site_sk#60, sales_price#62, profit#63, return_amt#64, net_loss#65, web_site_sk#82, web_site_id#83] +Arguments: [sales_price#62, profit#63, return_amt#64, net_loss#65, web_site_id#83], [sales_price#62, profit#63, return_amt#64, net_loss#65, web_site_id#83] + +(43) CometHashAggregate +Input [5]: [sales_price#62, profit#63, return_amt#64, net_loss#65, web_site_id#83] +Keys [1]: [web_site_id#83] +Functions [4]: [partial_sum(UnscaledValue(sales_price#62)), partial_sum(UnscaledValue(return_amt#64)), partial_sum(UnscaledValue(profit#63)), partial_sum(UnscaledValue(net_loss#65))] + +(44) CometExchange +Input [5]: [web_site_id#83, sum#84, sum#85, sum#86, sum#87] +Arguments: hashpartitioning(web_site_id#83, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(45) ColumnarToRow [codegen id : 3] +Input [5]: [web_site_id#83, sum#84, sum#85, sum#86, sum#87] + +(46) HashAggregate [codegen id : 3] +Input [5]: [web_site_id#83, sum#84, sum#85, sum#86, sum#87] +Keys [1]: [web_site_id#83] +Functions [4]: [sum(UnscaledValue(sales_price#62)), sum(UnscaledValue(return_amt#64)), sum(UnscaledValue(profit#63)), sum(UnscaledValue(net_loss#65))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#62))#88, sum(UnscaledValue(return_amt#64))#89, sum(UnscaledValue(profit#63))#90, sum(UnscaledValue(net_loss#65))#91] +Results [5]: [web channel AS channel#92, concat(web_site, web_site_id#83) AS id#93, MakeDecimal(sum(UnscaledValue(sales_price#62))#88,17,2) AS sales#94, MakeDecimal(sum(UnscaledValue(return_amt#64))#89,17,2) AS returns#95, (MakeDecimal(sum(UnscaledValue(profit#63))#90,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#65))#91,17,2)) AS profit#96] + +(47) Union + +(48) HashAggregate [codegen id : 4] +Input [5]: [channel#33, id#34, sales#35, returns#36, profit#37] +Keys [2]: [channel#33, id#34] +Functions [3]: [partial_sum(sales#35), partial_sum(returns#36), partial_sum(profit#37)] +Aggregate Attributes [6]: [sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Results [8]: [channel#33, id#34, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] + +(49) Exchange +Input [8]: [channel#33, id#34, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Arguments: hashpartitioning(channel#33, id#34, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(50) HashAggregate [codegen id : 5] +Input [8]: [channel#33, id#34, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Keys [2]: [channel#33, id#34] +Functions [3]: [sum(sales#35), sum(returns#36), sum(profit#37)] +Aggregate Attributes [3]: [sum(sales#35)#109, sum(returns#36)#110, sum(profit#37)#111] +Results [5]: [channel#33, id#34, cast(sum(sales#35)#109 as decimal(37,2)) AS sales#112, cast(sum(returns#36)#110 as decimal(37,2)) AS returns#113, cast(sum(profit#37)#111 as decimal(38,2)) AS profit#114] + +(51) ReusedExchange [Reuses operator id: 49] +Output [8]: [channel#33, id#34, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] + +(52) HashAggregate [codegen id : 10] +Input [8]: [channel#33, id#34, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Keys [2]: [channel#33, id#34] +Functions [3]: [sum(sales#35), sum(returns#36), sum(profit#37)] +Aggregate Attributes [3]: [sum(sales#35)#109, sum(returns#36)#110, sum(profit#37)#111] +Results [4]: [channel#33, sum(sales#35)#109 AS sales#115, sum(returns#36)#110 AS returns#116, sum(profit#37)#111 AS profit#117] + +(53) HashAggregate [codegen id : 10] +Input [4]: [channel#33, sales#115, returns#116, profit#117] +Keys [1]: [channel#33] +Functions [3]: [partial_sum(sales#115), partial_sum(returns#116), partial_sum(profit#117)] +Aggregate Attributes [6]: [sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Results [7]: [channel#33, sum#124, isEmpty#125, sum#126, isEmpty#127, sum#128, isEmpty#129] + +(54) Exchange +Input [7]: [channel#33, sum#124, isEmpty#125, sum#126, isEmpty#127, sum#128, isEmpty#129] +Arguments: hashpartitioning(channel#33, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(55) HashAggregate [codegen id : 11] +Input [7]: [channel#33, sum#124, isEmpty#125, sum#126, isEmpty#127, sum#128, isEmpty#129] +Keys [1]: [channel#33] +Functions [3]: [sum(sales#115), sum(returns#116), sum(profit#117)] +Aggregate Attributes [3]: [sum(sales#115)#130, sum(returns#116)#131, sum(profit#117)#132] +Results [5]: [channel#33, null AS id#133, sum(sales#115)#130 AS sum(sales)#134, sum(returns#116)#131 AS sum(returns)#135, sum(profit#117)#132 AS sum(profit)#136] + +(56) ReusedExchange [Reuses operator id: 49] +Output [8]: [channel#33, id#34, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] + +(57) HashAggregate [codegen id : 16] +Input [8]: [channel#33, id#34, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Keys [2]: [channel#33, id#34] +Functions [3]: [sum(sales#35), sum(returns#36), sum(profit#37)] +Aggregate Attributes [3]: [sum(sales#35)#109, sum(returns#36)#110, sum(profit#37)#111] +Results [3]: [sum(sales#35)#109 AS sales#137, sum(returns#36)#110 AS returns#138, sum(profit#37)#111 AS profit#139] + +(58) HashAggregate [codegen id : 16] +Input [3]: [sales#137, returns#138, profit#139] +Keys: [] +Functions [3]: [partial_sum(sales#137), partial_sum(returns#138), partial_sum(profit#139)] +Aggregate Attributes [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Results [6]: [sum#146, isEmpty#147, sum#148, isEmpty#149, sum#150, isEmpty#151] + +(59) Exchange +Input [6]: [sum#146, isEmpty#147, sum#148, isEmpty#149, sum#150, isEmpty#151] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(60) HashAggregate [codegen id : 17] +Input [6]: [sum#146, isEmpty#147, sum#148, isEmpty#149, sum#150, isEmpty#151] +Keys: [] +Functions [3]: [sum(sales#137), sum(returns#138), sum(profit#139)] +Aggregate Attributes [3]: [sum(sales#137)#152, sum(returns#138)#153, sum(profit#139)#154] +Results [5]: [null AS channel#155, null AS id#156, sum(sales#137)#152 AS sum(sales)#157, sum(returns#138)#153 AS sum(returns)#158, sum(profit#139)#154 AS sum(profit)#159] + +(61) Union + +(62) HashAggregate [codegen id : 18] +Input [5]: [channel#33, id#34, sales#112, returns#113, profit#114] +Keys [5]: [channel#33, id#34, sales#112, returns#113, profit#114] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#33, id#34, sales#112, returns#113, profit#114] + +(63) Exchange +Input [5]: [channel#33, id#34, sales#112, returns#113, profit#114] +Arguments: hashpartitioning(channel#33, id#34, sales#112, returns#113, profit#114, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(64) HashAggregate [codegen id : 19] +Input [5]: [channel#33, id#34, sales#112, returns#113, profit#114] +Keys [5]: [channel#33, id#34, sales#112, returns#113, profit#114] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#33, id#34, sales#112, returns#113, profit#114] + +(65) TakeOrderedAndProject +Input [5]: [channel#33, id#34, sales#112, returns#113, profit#114] +Arguments: 100, [channel#33 ASC NULLS FIRST, id#34 ASC NULLS FIRST], [channel#33, id#34, sales#112, returns#113, profit#114] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..fd65d68eea --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_datafusion/simplified.txt @@ -0,0 +1,87 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (19) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (18) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (5) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (4) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (1) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExchange [s_store_id] #3 + CometHashAggregate [s_store_id,sum,sum,sum,sum,sales_price,return_amt,profit,net_loss] + CometProject [sales_price,profit,return_amt,net_loss,s_store_id] + CometBroadcastHashJoin [store_sk,sales_price,profit,return_amt,net_loss,s_store_sk,s_store_id] + CometProject [store_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [store_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometProject [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk,s_store_id] #5 + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + WholeStageCodegen (2) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + ReusedExchange [cp_catalog_page_id,sum,sum,sum,sum] #3 + WholeStageCodegen (3) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExchange [web_site_id] #6 + CometHashAggregate [web_site_id,sum,sum,sum,sum,sales_price,return_amt,profit,net_loss] + CometProject [sales_price,profit,return_amt,net_loss,web_site_id] + CometBroadcastHashJoin [wsr_web_site_sk,sales_price,profit,return_amt,net_loss,web_site_sk,web_site_id] + CometProject [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometProject [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk,ws_item_sk,ws_web_site_sk,ws_order_number] + CometBroadcastExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] #7 + CometNativeScan: `spark_catalog`.`default`.`web_returns` [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + CometProject [ws_item_sk,ws_web_site_sk,ws_order_number] + CometFilter [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + ReusedExchange [d_date_sk] #4 + ReusedExchange [web_site_sk,web_site_id] #5 + WholeStageCodegen (11) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #8 + WholeStageCodegen (10) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (17) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #9 + WholeStageCodegen (16) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..ac3a35a19e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_iceberg_compat/explain.txt @@ -0,0 +1,494 @@ +== Physical Plan == +TakeOrderedAndProject (83) ++- * HashAggregate (82) + +- Exchange (81) + +- * HashAggregate (80) + +- Union (79) + :- * HashAggregate (68) + : +- Exchange (67) + : +- * HashAggregate (66) + : +- Union (65) + : :- * HashAggregate (22) + : : +- * ColumnarToRow (21) + : : +- CometExchange (20) + : : +- CometHashAggregate (19) + : : +- CometProject (18) + : : +- CometBroadcastHashJoin (17) + : : :- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometUnion (7) + : : : : :- CometProject (3) + : : : : : +- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : +- CometProject (6) + : : : : +- CometFilter (5) + : : : : +- CometScan parquet spark_catalog.default.store_returns (4) + : : : +- CometBroadcastExchange (11) + : : : +- CometProject (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (16) + : : +- CometFilter (15) + : : +- CometScan parquet spark_catalog.default.store (14) + : :- * HashAggregate (41) + : : +- * ColumnarToRow (40) + : : +- CometExchange (39) + : : +- CometHashAggregate (38) + : : +- CometProject (37) + : : +- CometBroadcastHashJoin (36) + : : :- CometProject (32) + : : : +- CometBroadcastHashJoin (31) + : : : :- CometUnion (29) + : : : : :- CometProject (25) + : : : : : +- CometFilter (24) + : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (23) + : : : : +- CometProject (28) + : : : : +- CometFilter (27) + : : : : +- CometScan parquet spark_catalog.default.catalog_returns (26) + : : : +- ReusedExchange (30) + : : +- CometBroadcastExchange (35) + : : +- CometFilter (34) + : : +- CometScan parquet spark_catalog.default.catalog_page (33) + : +- * HashAggregate (64) + : +- * ColumnarToRow (63) + : +- CometExchange (62) + : +- CometHashAggregate (61) + : +- CometProject (60) + : +- CometBroadcastHashJoin (59) + : :- CometProject (55) + : : +- CometBroadcastHashJoin (54) + : : :- CometUnion (52) + : : : :- CometProject (44) + : : : : +- CometFilter (43) + : : : : +- CometScan parquet spark_catalog.default.web_sales (42) + : : : +- CometProject (51) + : : : +- CometBroadcastHashJoin (50) + : : : :- CometBroadcastExchange (46) + : : : : +- CometScan parquet spark_catalog.default.web_returns (45) + : : : +- CometProject (49) + : : : +- CometFilter (48) + : : : +- CometScan parquet spark_catalog.default.web_sales (47) + : : +- ReusedExchange (53) + : +- CometBroadcastExchange (58) + : +- CometFilter (57) + : +- CometScan parquet spark_catalog.default.web_site (56) + :- * HashAggregate (73) + : +- Exchange (72) + : +- * HashAggregate (71) + : +- * HashAggregate (70) + : +- ReusedExchange (69) + +- * HashAggregate (78) + +- Exchange (77) + +- * HashAggregate (76) + +- * HashAggregate (75) + +- ReusedExchange (74) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(3) CometProject +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10], [ss_store_sk#1 AS store_sk#5, ss_sold_date_sk#4 AS date_sk#6, ss_ext_sales_price#2 AS sales_price#7, ss_net_profit#3 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] + +(4) CometScan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#14)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(5) CometFilter +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#11) + +(6) CometProject +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Arguments: [store_sk#15, date_sk#16, sales_price#17, profit#18, return_amt#19, net_loss#20], [sr_store_sk#11 AS store_sk#15, sr_returned_date_sk#14 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#12 AS return_amt#19, sr_net_loss#13 AS net_loss#20] + +(7) CometUnion +Child 0 Input [6]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10] +Child 1 Input [6]: [store_sk#15, date_sk#16, sales_price#17, profit#18, return_amt#19, net_loss#20] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 1998-08-04)) AND (d_date#22 <= 1998-08-18)) AND isnotnull(d_date_sk#21)) + +(10) CometProject +Input [2]: [d_date_sk#21, d_date#22] +Arguments: [d_date_sk#21], [d_date_sk#21] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: [d_date_sk#21] + +(12) CometBroadcastHashJoin +Left output [6]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10] +Right output [1]: [d_date_sk#21] +Arguments: [date_sk#6], [d_date_sk#21], Inner, BuildRight + +(13) CometProject +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] +Arguments: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10], [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] + +(14) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#23, s_store_id#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(15) CometFilter +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(16) CometBroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: [s_store_sk#23, s_store_id#24] + +(17) CometBroadcastHashJoin +Left output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Right output [2]: [s_store_sk#23, s_store_id#24] +Arguments: [store_sk#5], [s_store_sk#23], Inner, BuildRight + +(18) CometProject +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#23, s_store_id#24] +Arguments: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24], [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] + +(19) CometHashAggregate +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] + +(20) CometExchange +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) ColumnarToRow [codegen id : 1] +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] + +(22) HashAggregate [codegen id : 1] +Input [5]: [s_store_id#24, sum#25, sum#26, sum#27, sum#28] +Keys [1]: [s_store_id#24] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#29, sum(UnscaledValue(return_amt#9))#30, sum(UnscaledValue(profit#8))#31, sum(UnscaledValue(net_loss#10))#32] +Results [5]: [store channel AS channel#33, concat(store, s_store_id#24) AS id#34, MakeDecimal(sum(UnscaledValue(sales_price#7))#29,17,2) AS sales#35, MakeDecimal(sum(UnscaledValue(return_amt#9))#30,17,2) AS returns#36, (MakeDecimal(sum(UnscaledValue(profit#8))#31,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#10))#32,17,2)) AS profit#37] + +(23) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_catalog_page_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#41)] +PushedFilters: [IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(24) CometFilter +Input [4]: [cs_catalog_page_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Condition : isnotnull(cs_catalog_page_sk#38) + +(25) CometProject +Input [4]: [cs_catalog_page_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Arguments: [page_sk#42, date_sk#43, sales_price#44, profit#45, return_amt#46, net_loss#47], [cs_catalog_page_sk#38 AS page_sk#42, cs_sold_date_sk#41 AS date_sk#43, cs_ext_sales_price#39 AS sales_price#44, cs_net_profit#40 AS profit#45, 0.00 AS return_amt#46, 0.00 AS net_loss#47] + +(26) CometScan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_catalog_page_sk#48, cr_return_amount#49, cr_net_loss#50, cr_returned_date_sk#51] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#51)] +PushedFilters: [IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(27) CometFilter +Input [4]: [cr_catalog_page_sk#48, cr_return_amount#49, cr_net_loss#50, cr_returned_date_sk#51] +Condition : isnotnull(cr_catalog_page_sk#48) + +(28) CometProject +Input [4]: [cr_catalog_page_sk#48, cr_return_amount#49, cr_net_loss#50, cr_returned_date_sk#51] +Arguments: [page_sk#52, date_sk#53, sales_price#54, profit#55, return_amt#56, net_loss#57], [cr_catalog_page_sk#48 AS page_sk#52, cr_returned_date_sk#51 AS date_sk#53, 0.00 AS sales_price#54, 0.00 AS profit#55, cr_return_amount#49 AS return_amt#56, cr_net_loss#50 AS net_loss#57] + +(29) CometUnion +Child 0 Input [6]: [page_sk#42, date_sk#43, sales_price#44, profit#45, return_amt#46, net_loss#47] +Child 1 Input [6]: [page_sk#52, date_sk#53, sales_price#54, profit#55, return_amt#56, net_loss#57] + +(30) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#58] + +(31) CometBroadcastHashJoin +Left output [6]: [page_sk#42, date_sk#43, sales_price#44, profit#45, return_amt#46, net_loss#47] +Right output [1]: [d_date_sk#58] +Arguments: [date_sk#43], [d_date_sk#58], Inner, BuildRight + +(32) CometProject +Input [7]: [page_sk#42, date_sk#43, sales_price#44, profit#45, return_amt#46, net_loss#47, d_date_sk#58] +Arguments: [page_sk#42, sales_price#44, profit#45, return_amt#46, net_loss#47], [page_sk#42, sales_price#44, profit#45, return_amt#46, net_loss#47] + +(33) CometScan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#59, cp_catalog_page_id#60] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(34) CometFilter +Input [2]: [cp_catalog_page_sk#59, cp_catalog_page_id#60] +Condition : isnotnull(cp_catalog_page_sk#59) + +(35) CometBroadcastExchange +Input [2]: [cp_catalog_page_sk#59, cp_catalog_page_id#60] +Arguments: [cp_catalog_page_sk#59, cp_catalog_page_id#60] + +(36) CometBroadcastHashJoin +Left output [5]: [page_sk#42, sales_price#44, profit#45, return_amt#46, net_loss#47] +Right output [2]: [cp_catalog_page_sk#59, cp_catalog_page_id#60] +Arguments: [page_sk#42], [cp_catalog_page_sk#59], Inner, BuildRight + +(37) CometProject +Input [7]: [page_sk#42, sales_price#44, profit#45, return_amt#46, net_loss#47, cp_catalog_page_sk#59, cp_catalog_page_id#60] +Arguments: [sales_price#44, profit#45, return_amt#46, net_loss#47, cp_catalog_page_id#60], [sales_price#44, profit#45, return_amt#46, net_loss#47, cp_catalog_page_id#60] + +(38) CometHashAggregate +Input [5]: [sales_price#44, profit#45, return_amt#46, net_loss#47, cp_catalog_page_id#60] +Keys [1]: [cp_catalog_page_id#60] +Functions [4]: [partial_sum(UnscaledValue(sales_price#44)), partial_sum(UnscaledValue(return_amt#46)), partial_sum(UnscaledValue(profit#45)), partial_sum(UnscaledValue(net_loss#47))] + +(39) CometExchange +Input [5]: [cp_catalog_page_id#60, sum#61, sum#62, sum#63, sum#64] +Arguments: hashpartitioning(cp_catalog_page_id#60, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(40) ColumnarToRow [codegen id : 2] +Input [5]: [cp_catalog_page_id#60, sum#61, sum#62, sum#63, sum#64] + +(41) HashAggregate [codegen id : 2] +Input [5]: [cp_catalog_page_id#60, sum#61, sum#62, sum#63, sum#64] +Keys [1]: [cp_catalog_page_id#60] +Functions [4]: [sum(UnscaledValue(sales_price#44)), sum(UnscaledValue(return_amt#46)), sum(UnscaledValue(profit#45)), sum(UnscaledValue(net_loss#47))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#44))#65, sum(UnscaledValue(return_amt#46))#66, sum(UnscaledValue(profit#45))#67, sum(UnscaledValue(net_loss#47))#68] +Results [5]: [catalog channel AS channel#69, concat(catalog_page, cp_catalog_page_id#60) AS id#70, MakeDecimal(sum(UnscaledValue(sales_price#44))#65,17,2) AS sales#71, MakeDecimal(sum(UnscaledValue(return_amt#46))#66,17,2) AS returns#72, (MakeDecimal(sum(UnscaledValue(profit#45))#67,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#47))#68,17,2)) AS profit#73] + +(42) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_site_sk#74, ws_ext_sales_price#75, ws_net_profit#76, ws_sold_date_sk#77] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#77)] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(43) CometFilter +Input [4]: [ws_web_site_sk#74, ws_ext_sales_price#75, ws_net_profit#76, ws_sold_date_sk#77] +Condition : isnotnull(ws_web_site_sk#74) + +(44) CometProject +Input [4]: [ws_web_site_sk#74, ws_ext_sales_price#75, ws_net_profit#76, ws_sold_date_sk#77] +Arguments: [wsr_web_site_sk#78, date_sk#79, sales_price#80, profit#81, return_amt#82, net_loss#83], [ws_web_site_sk#74 AS wsr_web_site_sk#78, ws_sold_date_sk#77 AS date_sk#79, ws_ext_sales_price#75 AS sales_price#80, ws_net_profit#76 AS profit#81, 0.00 AS return_amt#82, 0.00 AS net_loss#83] + +(45) CometScan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#84, wr_order_number#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#88)] +ReadSchema: struct + +(46) CometBroadcastExchange +Input [5]: [wr_item_sk#84, wr_order_number#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Arguments: [wr_item_sk#84, wr_order_number#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] + +(47) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91, ws_sold_date_sk#92] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) CometFilter +Input [4]: [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91, ws_sold_date_sk#92] +Condition : ((isnotnull(ws_item_sk#89) AND isnotnull(ws_order_number#91)) AND isnotnull(ws_web_site_sk#90)) + +(49) CometProject +Input [4]: [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91, ws_sold_date_sk#92] +Arguments: [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91], [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91] + +(50) CometBroadcastHashJoin +Left output [5]: [wr_item_sk#84, wr_order_number#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Right output [3]: [ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91] +Arguments: [wr_item_sk#84, wr_order_number#85], [ws_item_sk#89, ws_order_number#91], Inner, BuildLeft + +(51) CometProject +Input [8]: [wr_item_sk#84, wr_order_number#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88, ws_item_sk#89, ws_web_site_sk#90, ws_order_number#91] +Arguments: [wsr_web_site_sk#93, date_sk#94, sales_price#95, profit#96, return_amt#97, net_loss#98], [ws_web_site_sk#90 AS wsr_web_site_sk#93, wr_returned_date_sk#88 AS date_sk#94, 0.00 AS sales_price#95, 0.00 AS profit#96, wr_return_amt#86 AS return_amt#97, wr_net_loss#87 AS net_loss#98] + +(52) CometUnion +Child 0 Input [6]: [wsr_web_site_sk#78, date_sk#79, sales_price#80, profit#81, return_amt#82, net_loss#83] +Child 1 Input [6]: [wsr_web_site_sk#93, date_sk#94, sales_price#95, profit#96, return_amt#97, net_loss#98] + +(53) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#99] + +(54) CometBroadcastHashJoin +Left output [6]: [wsr_web_site_sk#78, date_sk#79, sales_price#80, profit#81, return_amt#82, net_loss#83] +Right output [1]: [d_date_sk#99] +Arguments: [date_sk#79], [d_date_sk#99], Inner, BuildRight + +(55) CometProject +Input [7]: [wsr_web_site_sk#78, date_sk#79, sales_price#80, profit#81, return_amt#82, net_loss#83, d_date_sk#99] +Arguments: [wsr_web_site_sk#78, sales_price#80, profit#81, return_amt#82, net_loss#83], [wsr_web_site_sk#78, sales_price#80, profit#81, return_amt#82, net_loss#83] + +(56) CometScan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#100, web_site_id#101] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(57) CometFilter +Input [2]: [web_site_sk#100, web_site_id#101] +Condition : isnotnull(web_site_sk#100) + +(58) CometBroadcastExchange +Input [2]: [web_site_sk#100, web_site_id#101] +Arguments: [web_site_sk#100, web_site_id#101] + +(59) CometBroadcastHashJoin +Left output [5]: [wsr_web_site_sk#78, sales_price#80, profit#81, return_amt#82, net_loss#83] +Right output [2]: [web_site_sk#100, web_site_id#101] +Arguments: [wsr_web_site_sk#78], [web_site_sk#100], Inner, BuildRight + +(60) CometProject +Input [7]: [wsr_web_site_sk#78, sales_price#80, profit#81, return_amt#82, net_loss#83, web_site_sk#100, web_site_id#101] +Arguments: [sales_price#80, profit#81, return_amt#82, net_loss#83, web_site_id#101], [sales_price#80, profit#81, return_amt#82, net_loss#83, web_site_id#101] + +(61) CometHashAggregate +Input [5]: [sales_price#80, profit#81, return_amt#82, net_loss#83, web_site_id#101] +Keys [1]: [web_site_id#101] +Functions [4]: [partial_sum(UnscaledValue(sales_price#80)), partial_sum(UnscaledValue(return_amt#82)), partial_sum(UnscaledValue(profit#81)), partial_sum(UnscaledValue(net_loss#83))] + +(62) CometExchange +Input [5]: [web_site_id#101, sum#102, sum#103, sum#104, sum#105] +Arguments: hashpartitioning(web_site_id#101, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(63) ColumnarToRow [codegen id : 3] +Input [5]: [web_site_id#101, sum#102, sum#103, sum#104, sum#105] + +(64) HashAggregate [codegen id : 3] +Input [5]: [web_site_id#101, sum#102, sum#103, sum#104, sum#105] +Keys [1]: [web_site_id#101] +Functions [4]: [sum(UnscaledValue(sales_price#80)), sum(UnscaledValue(return_amt#82)), sum(UnscaledValue(profit#81)), sum(UnscaledValue(net_loss#83))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#80))#106, sum(UnscaledValue(return_amt#82))#107, sum(UnscaledValue(profit#81))#108, sum(UnscaledValue(net_loss#83))#109] +Results [5]: [web channel AS channel#110, concat(web_site, web_site_id#101) AS id#111, MakeDecimal(sum(UnscaledValue(sales_price#80))#106,17,2) AS sales#112, MakeDecimal(sum(UnscaledValue(return_amt#82))#107,17,2) AS returns#113, (MakeDecimal(sum(UnscaledValue(profit#81))#108,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#83))#109,17,2)) AS profit#114] + +(65) Union + +(66) HashAggregate [codegen id : 4] +Input [5]: [channel#33, id#34, sales#35, returns#36, profit#37] +Keys [2]: [channel#33, id#34] +Functions [3]: [partial_sum(sales#35), partial_sum(returns#36), partial_sum(profit#37)] +Aggregate Attributes [6]: [sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Results [8]: [channel#33, id#34, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] + +(67) Exchange +Input [8]: [channel#33, id#34, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Arguments: hashpartitioning(channel#33, id#34, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(68) HashAggregate [codegen id : 5] +Input [8]: [channel#33, id#34, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [2]: [channel#33, id#34] +Functions [3]: [sum(sales#35), sum(returns#36), sum(profit#37)] +Aggregate Attributes [3]: [sum(sales#35)#127, sum(returns#36)#128, sum(profit#37)#129] +Results [5]: [channel#33, id#34, cast(sum(sales#35)#127 as decimal(37,2)) AS sales#130, cast(sum(returns#36)#128 as decimal(37,2)) AS returns#131, cast(sum(profit#37)#129 as decimal(38,2)) AS profit#132] + +(69) ReusedExchange [Reuses operator id: 67] +Output [8]: [channel#33, id#34, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] + +(70) HashAggregate [codegen id : 10] +Input [8]: [channel#33, id#34, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [2]: [channel#33, id#34] +Functions [3]: [sum(sales#35), sum(returns#36), sum(profit#37)] +Aggregate Attributes [3]: [sum(sales#35)#127, sum(returns#36)#128, sum(profit#37)#129] +Results [4]: [channel#33, sum(sales#35)#127 AS sales#133, sum(returns#36)#128 AS returns#134, sum(profit#37)#129 AS profit#135] + +(71) HashAggregate [codegen id : 10] +Input [4]: [channel#33, sales#133, returns#134, profit#135] +Keys [1]: [channel#33] +Functions [3]: [partial_sum(sales#133), partial_sum(returns#134), partial_sum(profit#135)] +Aggregate Attributes [6]: [sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Results [7]: [channel#33, sum#142, isEmpty#143, sum#144, isEmpty#145, sum#146, isEmpty#147] + +(72) Exchange +Input [7]: [channel#33, sum#142, isEmpty#143, sum#144, isEmpty#145, sum#146, isEmpty#147] +Arguments: hashpartitioning(channel#33, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(73) HashAggregate [codegen id : 11] +Input [7]: [channel#33, sum#142, isEmpty#143, sum#144, isEmpty#145, sum#146, isEmpty#147] +Keys [1]: [channel#33] +Functions [3]: [sum(sales#133), sum(returns#134), sum(profit#135)] +Aggregate Attributes [3]: [sum(sales#133)#148, sum(returns#134)#149, sum(profit#135)#150] +Results [5]: [channel#33, null AS id#151, sum(sales#133)#148 AS sum(sales)#152, sum(returns#134)#149 AS sum(returns)#153, sum(profit#135)#150 AS sum(profit)#154] + +(74) ReusedExchange [Reuses operator id: 67] +Output [8]: [channel#33, id#34, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] + +(75) HashAggregate [codegen id : 16] +Input [8]: [channel#33, id#34, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [2]: [channel#33, id#34] +Functions [3]: [sum(sales#35), sum(returns#36), sum(profit#37)] +Aggregate Attributes [3]: [sum(sales#35)#127, sum(returns#36)#128, sum(profit#37)#129] +Results [3]: [sum(sales#35)#127 AS sales#155, sum(returns#36)#128 AS returns#156, sum(profit#37)#129 AS profit#157] + +(76) HashAggregate [codegen id : 16] +Input [3]: [sales#155, returns#156, profit#157] +Keys: [] +Functions [3]: [partial_sum(sales#155), partial_sum(returns#156), partial_sum(profit#157)] +Aggregate Attributes [6]: [sum#158, isEmpty#159, sum#160, isEmpty#161, sum#162, isEmpty#163] +Results [6]: [sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] + +(77) Exchange +Input [6]: [sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(78) HashAggregate [codegen id : 17] +Input [6]: [sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169] +Keys: [] +Functions [3]: [sum(sales#155), sum(returns#156), sum(profit#157)] +Aggregate Attributes [3]: [sum(sales#155)#170, sum(returns#156)#171, sum(profit#157)#172] +Results [5]: [null AS channel#173, null AS id#174, sum(sales#155)#170 AS sum(sales)#175, sum(returns#156)#171 AS sum(returns)#176, sum(profit#157)#172 AS sum(profit)#177] + +(79) Union + +(80) HashAggregate [codegen id : 18] +Input [5]: [channel#33, id#34, sales#130, returns#131, profit#132] +Keys [5]: [channel#33, id#34, sales#130, returns#131, profit#132] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#33, id#34, sales#130, returns#131, profit#132] + +(81) Exchange +Input [5]: [channel#33, id#34, sales#130, returns#131, profit#132] +Arguments: hashpartitioning(channel#33, id#34, sales#130, returns#131, profit#132, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(82) HashAggregate [codegen id : 19] +Input [5]: [channel#33, id#34, sales#130, returns#131, profit#132] +Keys [5]: [channel#33, id#34, sales#130, returns#131, profit#132] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#33, id#34, sales#130, returns#131, profit#132] + +(83) TakeOrderedAndProject +Input [5]: [channel#33, id#34, sales#130, returns#131, profit#132] +Arguments: 100, [channel#33 ASC NULLS FIRST, id#34 ASC NULLS FIRST], [channel#33, id#34, sales#130, returns#131, profit#132] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..11f824ac1a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q5a.native_iceberg_compat/simplified.txt @@ -0,0 +1,105 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (19) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (18) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (5) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (4) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (1) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExchange [s_store_id] #3 + CometHashAggregate [s_store_id,sum,sum,sum,sum,sales_price,return_amt,profit,net_loss] + CometProject [sales_price,profit,return_amt,net_loss,s_store_id] + CometBroadcastHashJoin [store_sk,sales_price,profit,return_amt,net_loss,s_store_sk,s_store_id] + CometProject [store_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [store_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometProject [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] [store_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk,s_store_id] #5 + CometFilter [s_store_sk,s_store_id] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id] + WholeStageCodegen (2) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExchange [cp_catalog_page_id] #6 + CometHashAggregate [cp_catalog_page_id,sum,sum,sum,sum,sales_price,return_amt,profit,net_loss] + CometProject [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [page_sk,sales_price,profit,return_amt,net_loss,cp_catalog_page_sk,cp_catalog_page_id] + CometProject [page_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [page_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [page_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] [page_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometProject [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] [page_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk] + ReusedExchange [d_date_sk] #4 + CometBroadcastExchange [cp_catalog_page_sk,cp_catalog_page_id] #7 + CometFilter [cp_catalog_page_sk,cp_catalog_page_id] + CometScan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen (3) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + ColumnarToRow + InputAdapter + CometExchange [web_site_id] #8 + CometHashAggregate [web_site_id,sum,sum,sum,sum,sales_price,return_amt,profit,net_loss] + CometProject [sales_price,profit,return_amt,net_loss,web_site_id] + CometBroadcastHashJoin [wsr_web_site_sk,sales_price,profit,return_amt,net_loss,web_site_sk,web_site_id] + CometProject [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss,d_date_sk] + CometUnion [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometProject [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometFilter [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometProject [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] [wsr_web_site_sk,date_sk,sales_price,profit,return_amt,net_loss] + CometBroadcastHashJoin [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk,ws_item_sk,ws_web_site_sk,ws_order_number] + CometBroadcastExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] #9 + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + CometProject [ws_item_sk,ws_web_site_sk,ws_order_number] + CometFilter [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + ReusedExchange [d_date_sk] #4 + CometBroadcastExchange [web_site_sk,web_site_id] #10 + CometFilter [web_site_sk,web_site_id] + CometScan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] + WholeStageCodegen (11) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #11 + WholeStageCodegen (10) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (17) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #12 + WholeStageCodegen (16) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_datafusion/explain.txt new file mode 100644 index 0000000000..58bf87acf3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_datafusion/explain.txt @@ -0,0 +1,243 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometTakeOrderedAndProject (37) + +- CometFilter (36) + +- CometHashAggregate (35) + +- CometExchange (34) + +- CometHashAggregate (33) + +- CometProject (32) + +- CometBroadcastHashJoin (31) + :- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + +- CometBroadcastExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (20) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (19) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometHashAggregate (25) + +- CometExchange (24) + +- CometHashAggregate (23) + +- CometFilter (22) + +- CometNativeScan: `spark_catalog`.`default`.`item` (21) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [2]: [ca_address_sk#1, ca_state#2] +Arguments: [ca_address_sk#1, ca_state#2] + +(2) CometFilter +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: [c_customer_sk#3, c_current_addr_sk#4] + +(4) CometFilter +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(5) CometBroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: [c_customer_sk#3, c_current_addr_sk#4] + +(6) CometBroadcastHashJoin +Left output [2]: [ca_address_sk#1, ca_state#2] +Right output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: [ca_address_sk#1], [c_current_addr_sk#4], Inner, BuildRight + +(7) CometProject +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] +Arguments: [ca_state#2, c_customer_sk#3], [ca_state#2, c_customer_sk#3] + +(8) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(9) CometFilter +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_item_sk#5)) + +(10) CometBroadcastExchange +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(11) CometBroadcastHashJoin +Left output [2]: [ca_state#2, c_customer_sk#3] +Right output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], Inner, BuildRight + +(12) CometProject +Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7], [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#8, d_month_seq#9] +Arguments: [d_date_sk#8, d_month_seq#9] + +(14) CometFilter +Input [2]: [d_date_sk#8, d_month_seq#9] +Condition : ((isnotnull(d_month_seq#9) AND (d_month_seq#9 = Subquery scalar-subquery#10, [id=#11])) AND isnotnull(d_date_sk#8)) + +(15) CometProject +Input [2]: [d_date_sk#8, d_month_seq#9] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(17) CometBroadcastHashJoin +Left output [3]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(18) CometProject +Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ca_state#2, ss_item_sk#5], [ca_state#2, ss_item_sk#5] + +(19) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Arguments: [i_item_sk#12, i_current_price#13, i_category#14] + +(20) CometFilter +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Condition : ((isnotnull(i_current_price#13) AND isnotnull(i_category#14)) AND isnotnull(i_item_sk#12)) + +(21) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_current_price#15, i_category#16] +Arguments: [i_current_price#15, i_category#16] + +(22) CometFilter +Input [2]: [i_current_price#15, i_category#16] +Condition : isnotnull(i_category#16) + +(23) CometHashAggregate +Input [2]: [i_current_price#15, i_category#16] +Keys [1]: [i_category#16] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#15))] + +(24) CometExchange +Input [3]: [i_category#16, sum#17, count#18] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(25) CometHashAggregate +Input [3]: [i_category#16, sum#17, count#18] +Keys [1]: [i_category#16] +Functions [1]: [avg(UnscaledValue(i_current_price#15))] + +(26) CometFilter +Input [2]: [avg(i_current_price)#19, i_category#16] +Condition : isnotnull(avg(i_current_price)#19) + +(27) CometBroadcastExchange +Input [2]: [avg(i_current_price)#19, i_category#16] +Arguments: [avg(i_current_price)#19, i_category#16] + +(28) CometBroadcastHashJoin +Left output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Right output [2]: [avg(i_current_price)#19, i_category#16] +Arguments: [i_category#14], [i_category#16], Inner, (cast(i_current_price#13 as decimal(14,7)) > (1.2 * avg(i_current_price)#19)), BuildRight + +(29) CometProject +Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#19, i_category#16] +Arguments: [i_item_sk#12], [i_item_sk#12] + +(30) CometBroadcastExchange +Input [1]: [i_item_sk#12] +Arguments: [i_item_sk#12] + +(31) CometBroadcastHashJoin +Left output [2]: [ca_state#2, ss_item_sk#5] +Right output [1]: [i_item_sk#12] +Arguments: [ss_item_sk#5], [i_item_sk#12], Inner, BuildRight + +(32) CometProject +Input [3]: [ca_state#2, ss_item_sk#5, i_item_sk#12] +Arguments: [ca_state#2], [ca_state#2] + +(33) CometHashAggregate +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] + +(34) CometExchange +Input [2]: [ca_state#2, count#20] +Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(35) CometHashAggregate +Input [2]: [ca_state#2, count#20] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] + +(36) CometFilter +Input [3]: [state#21, cnt#22, ca_state#2] +Condition : (cnt#22 >= 10) + +(37) CometTakeOrderedAndProject +Input [3]: [state#21, cnt#22, ca_state#2] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[cnt#22 ASC NULLS FIRST,ca_state#2 ASC NULLS FIRST], output=[state#21,cnt#22]), [state#21, cnt#22], 100, [cnt#22 ASC NULLS FIRST, ca_state#2 ASC NULLS FIRST], [state#21, cnt#22] + +(38) ColumnarToRow [codegen id : 1] +Input [2]: [state#21, cnt#22] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 14 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* ColumnarToRow (45) ++- CometHashAggregate (44) + +- CometExchange (43) + +- CometHashAggregate (42) + +- CometProject (41) + +- CometFilter (40) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (39) + + +(39) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [3]: [d_month_seq#23, d_year#24, d_moy#25] +Arguments: [d_month_seq#23, d_year#24, d_moy#25] + +(40) CometFilter +Input [3]: [d_month_seq#23, d_year#24, d_moy#25] +Condition : (((isnotnull(d_year#24) AND isnotnull(d_moy#25)) AND (d_year#24 = 2000)) AND (d_moy#25 = 1)) + +(41) CometProject +Input [3]: [d_month_seq#23, d_year#24, d_moy#25] +Arguments: [d_month_seq#23], [d_month_seq#23] + +(42) CometHashAggregate +Input [1]: [d_month_seq#23] +Keys [1]: [d_month_seq#23] +Functions: [] + +(43) CometExchange +Input [1]: [d_month_seq#23] +Arguments: hashpartitioning(d_month_seq#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(44) CometHashAggregate +Input [1]: [d_month_seq#23] +Keys [1]: [d_month_seq#23] +Functions: [] + +(45) ColumnarToRow [codegen id : 1] +Input [1]: [d_month_seq#23] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_datafusion/simplified.txt new file mode 100644 index 0000000000..305e08545d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_datafusion/simplified.txt @@ -0,0 +1,50 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [state,cnt,ca_state] + CometFilter [state,cnt,ca_state] + CometHashAggregate [state,cnt,ca_state,count,count(1)] + CometExchange [ca_state] #1 + CometHashAggregate [ca_state,count] + CometProject [ca_state] + CometBroadcastHashJoin [ca_state,ss_item_sk,i_item_sk] + CometProject [ca_state,ss_item_sk] + CometBroadcastHashJoin [ca_state,ss_item_sk,ss_sold_date_sk,d_date_sk] + CometProject [ca_state,ss_item_sk,ss_sold_date_sk] + CometBroadcastHashJoin [ca_state,c_customer_sk,ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometProject [ca_state,c_customer_sk] + CometBroadcastHashJoin [ca_address_sk,ca_state,c_customer_sk,c_current_addr_sk] + CometFilter [ca_address_sk,ca_state] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_state] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #2 + CometFilter [c_customer_sk,c_current_addr_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [d_month_seq] + CometExchange [d_month_seq] #5 + CometHashAggregate [d_month_seq] + CometProject [d_month_seq] + CometFilter [d_month_seq,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_month_seq,d_year,d_moy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk] #6 + CometProject [i_item_sk] + CometBroadcastHashJoin [i_item_sk,i_current_price,i_category,avg(i_current_price),i_category] + CometFilter [i_item_sk,i_current_price,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_category] + CometBroadcastExchange [avg(i_current_price),i_category] #7 + CometFilter [avg(i_current_price),i_category] + CometHashAggregate [avg(i_current_price),i_category,sum,count,avg(UnscaledValue(i_current_price))] + CometExchange [i_category] #8 + CometHashAggregate [i_category,sum,count,i_current_price] + CometFilter [i_current_price,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_current_price,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d89aade9a3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_iceberg_compat/explain.txt @@ -0,0 +1,265 @@ +== Physical Plan == +* ColumnarToRow (38) ++- CometTakeOrderedAndProject (37) + +- CometFilter (36) + +- CometHashAggregate (35) + +- CometExchange (34) + +- CometHashAggregate (33) + +- CometProject (32) + +- CometBroadcastHashJoin (31) + :- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (12) + : : +- CometBroadcastHashJoin (11) + : : :- CometProject (7) + : : : +- CometBroadcastHashJoin (6) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.customer_address (1) + : : : +- CometBroadcastExchange (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.customer (3) + : : +- CometBroadcastExchange (10) + : : +- CometFilter (9) + : : +- CometScan parquet spark_catalog.default.store_sales (8) + : +- CometBroadcastExchange (16) + : +- CometProject (15) + : +- CometFilter (14) + : +- CometScan parquet spark_catalog.default.date_dim (13) + +- CometBroadcastExchange (30) + +- CometProject (29) + +- CometBroadcastHashJoin (28) + :- CometFilter (20) + : +- CometScan parquet spark_catalog.default.item (19) + +- CometBroadcastExchange (27) + +- CometFilter (26) + +- CometHashAggregate (25) + +- CometExchange (24) + +- CometHashAggregate (23) + +- CometFilter (22) + +- CometScan parquet spark_catalog.default.item (21) + + +(1) CometScan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#1, ca_state#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(2) CometFilter +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(3) CometScan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(5) CometBroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: [c_customer_sk#3, c_current_addr_sk#4] + +(6) CometBroadcastHashJoin +Left output [2]: [ca_address_sk#1, ca_state#2] +Right output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: [ca_address_sk#1], [c_current_addr_sk#4], Inner, BuildRight + +(7) CometProject +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] +Arguments: [ca_state#2, c_customer_sk#3], [ca_state#2, c_customer_sk#3] + +(8) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(9) CometFilter +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_item_sk#5)) + +(10) CometBroadcastExchange +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(11) CometBroadcastHashJoin +Left output [2]: [ca_state#2, c_customer_sk#3] +Right output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [c_customer_sk#3], [ss_customer_sk#6], Inner, BuildRight + +(12) CometProject +Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7], [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_month_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [d_date_sk#8, d_month_seq#9] +Condition : ((isnotnull(d_month_seq#9) AND (d_month_seq#9 = Subquery scalar-subquery#10, [id=#11])) AND isnotnull(d_date_sk#8)) + +(15) CometProject +Input [2]: [d_date_sk#8, d_month_seq#9] +Arguments: [d_date_sk#8], [d_date_sk#8] + +(16) CometBroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: [d_date_sk#8] + +(17) CometBroadcastHashJoin +Left output [3]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] +Right output [1]: [d_date_sk#8] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(18) CometProject +Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#8] +Arguments: [ca_state#2, ss_item_sk#5], [ca_state#2, ss_item_sk#5] + +(19) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_category), IsNotNull(i_item_sk)] +ReadSchema: struct + +(20) CometFilter +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Condition : ((isnotnull(i_current_price#13) AND isnotnull(i_category#14)) AND isnotnull(i_item_sk#12)) + +(21) CometScan parquet spark_catalog.default.item +Output [2]: [i_current_price#15, i_category#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(22) CometFilter +Input [2]: [i_current_price#15, i_category#16] +Condition : isnotnull(i_category#16) + +(23) CometHashAggregate +Input [2]: [i_current_price#15, i_category#16] +Keys [1]: [i_category#16] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#15))] + +(24) CometExchange +Input [3]: [i_category#16, sum#17, count#18] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(25) CometHashAggregate +Input [3]: [i_category#16, sum#17, count#18] +Keys [1]: [i_category#16] +Functions [1]: [avg(UnscaledValue(i_current_price#15))] + +(26) CometFilter +Input [2]: [avg(i_current_price)#19, i_category#16] +Condition : isnotnull(avg(i_current_price)#19) + +(27) CometBroadcastExchange +Input [2]: [avg(i_current_price)#19, i_category#16] +Arguments: [avg(i_current_price)#19, i_category#16] + +(28) CometBroadcastHashJoin +Left output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Right output [2]: [avg(i_current_price)#19, i_category#16] +Arguments: [i_category#14], [i_category#16], Inner, (cast(i_current_price#13 as decimal(14,7)) > (1.2 * avg(i_current_price)#19)), BuildRight + +(29) CometProject +Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#19, i_category#16] +Arguments: [i_item_sk#12], [i_item_sk#12] + +(30) CometBroadcastExchange +Input [1]: [i_item_sk#12] +Arguments: [i_item_sk#12] + +(31) CometBroadcastHashJoin +Left output [2]: [ca_state#2, ss_item_sk#5] +Right output [1]: [i_item_sk#12] +Arguments: [ss_item_sk#5], [i_item_sk#12], Inner, BuildRight + +(32) CometProject +Input [3]: [ca_state#2, ss_item_sk#5, i_item_sk#12] +Arguments: [ca_state#2], [ca_state#2] + +(33) CometHashAggregate +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] + +(34) CometExchange +Input [2]: [ca_state#2, count#20] +Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(35) CometHashAggregate +Input [2]: [ca_state#2, count#20] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] + +(36) CometFilter +Input [3]: [state#21, cnt#22, ca_state#2] +Condition : (cnt#22 >= 10) + +(37) CometTakeOrderedAndProject +Input [3]: [state#21, cnt#22, ca_state#2] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[cnt#22 ASC NULLS FIRST,ca_state#2 ASC NULLS FIRST], output=[state#21,cnt#22]), [state#21, cnt#22], 100, [cnt#22 ASC NULLS FIRST, ca_state#2 ASC NULLS FIRST], [state#21, cnt#22] + +(38) ColumnarToRow [codegen id : 1] +Input [2]: [state#21, cnt#22] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 14 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* ColumnarToRow (45) ++- CometHashAggregate (44) + +- CometExchange (43) + +- CometHashAggregate (42) + +- CometProject (41) + +- CometFilter (40) + +- CometScan parquet spark_catalog.default.date_dim (39) + + +(39) CometScan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#23, d_year#24, d_moy#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(40) CometFilter +Input [3]: [d_month_seq#23, d_year#24, d_moy#25] +Condition : (((isnotnull(d_year#24) AND isnotnull(d_moy#25)) AND (d_year#24 = 2000)) AND (d_moy#25 = 1)) + +(41) CometProject +Input [3]: [d_month_seq#23, d_year#24, d_moy#25] +Arguments: [d_month_seq#23], [d_month_seq#23] + +(42) CometHashAggregate +Input [1]: [d_month_seq#23] +Keys [1]: [d_month_seq#23] +Functions: [] + +(43) CometExchange +Input [1]: [d_month_seq#23] +Arguments: hashpartitioning(d_month_seq#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(44) CometHashAggregate +Input [1]: [d_month_seq#23] +Keys [1]: [d_month_seq#23] +Functions: [] + +(45) ColumnarToRow [codegen id : 1] +Input [1]: [d_month_seq#23] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..6d308a7b62 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q6.native_iceberg_compat/simplified.txt @@ -0,0 +1,50 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [state,cnt,ca_state] + CometFilter [state,cnt,ca_state] + CometHashAggregate [state,cnt,ca_state,count,count(1)] + CometExchange [ca_state] #1 + CometHashAggregate [ca_state,count] + CometProject [ca_state] + CometBroadcastHashJoin [ca_state,ss_item_sk,i_item_sk] + CometProject [ca_state,ss_item_sk] + CometBroadcastHashJoin [ca_state,ss_item_sk,ss_sold_date_sk,d_date_sk] + CometProject [ca_state,ss_item_sk,ss_sold_date_sk] + CometBroadcastHashJoin [ca_state,c_customer_sk,ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometProject [ca_state,c_customer_sk] + CometBroadcastHashJoin [ca_address_sk,ca_state,c_customer_sk,c_current_addr_sk] + CometFilter [ca_address_sk,ca_state] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + CometBroadcastExchange [c_customer_sk,c_current_addr_sk] #2 + CometFilter [c_customer_sk,c_current_addr_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_sold_date_sk] #3 + CometFilter [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometHashAggregate [d_month_seq] + CometExchange [d_month_seq] #5 + CometHashAggregate [d_month_seq] + CometProject [d_month_seq] + CometFilter [d_month_seq,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk] #6 + CometProject [i_item_sk] + CometBroadcastHashJoin [i_item_sk,i_current_price,i_category,avg(i_current_price),i_category] + CometFilter [i_item_sk,i_current_price,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_category] + CometBroadcastExchange [avg(i_current_price),i_category] #7 + CometFilter [avg(i_current_price),i_category] + CometHashAggregate [avg(i_current_price),i_category,sum,count,avg(UnscaledValue(i_current_price))] + CometExchange [i_category] #8 + CometHashAggregate [i_category,sum,count,i_current_price] + CometFilter [i_current_price,i_category] + CometScan parquet spark_catalog.default.item [i_current_price,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_datafusion/explain.txt new file mode 100644 index 0000000000..d0b3132fc7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_datafusion/explain.txt @@ -0,0 +1,837 @@ +== Physical Plan == +* ColumnarToRow (163) ++- CometSort (162) + +- CometColumnarExchange (161) + +- CometProject (160) + +- CometSortMergeJoin (159) + :- CometSort (98) + : +- CometExchange (97) + : +- CometHashAggregate (96) + : +- CometHashAggregate (95) + : +- CometProject (94) + : +- CometBroadcastHashJoin (93) + : :- CometProject (88) + : : +- CometBroadcastHashJoin (87) + : : :- CometProject (85) + : : : +- CometBroadcastHashJoin (84) + : : : :- CometProject (82) + : : : : +- CometBroadcastHashJoin (81) + : : : : :- CometProject (79) + : : : : : +- CometBroadcastHashJoin (78) + : : : : : :- CometProject (74) + : : : : : : +- CometBroadcastHashJoin (73) + : : : : : : :- CometProject (71) + : : : : : : : +- CometBroadcastHashJoin (70) + : : : : : : : :- CometProject (66) + : : : : : : : : +- CometBroadcastHashJoin (65) + : : : : : : : : :- CometProject (61) + : : : : : : : : : +- CometBroadcastHashJoin (60) + : : : : : : : : : :- CometProject (58) + : : : : : : : : : : +- CometBroadcastHashJoin (57) + : : : : : : : : : : :- CometProject (53) + : : : : : : : : : : : +- CometBroadcastHashJoin (52) + : : : : : : : : : : : :- CometProject (50) + : : : : : : : : : : : : +- CometBroadcastHashJoin (49) + : : : : : : : : : : : : :- CometProject (45) + : : : : : : : : : : : : : +- CometBroadcastHashJoin (44) + : : : : : : : : : : : : : :- CometProject (40) + : : : : : : : : : : : : : : +- CometBroadcastHashJoin (39) + : : : : : : : : : : : : : : :- CometProject (35) + : : : : : : : : : : : : : : : +- CometBroadcastHashJoin (34) + : : : : : : : : : : : : : : : :- CometProject (30) + : : : : : : : : : : : : : : : : +- CometSortMergeJoin (29) + : : : : : : : : : : : : : : : : :- CometSort (10) + : : : : : : : : : : : : : : : : : +- CometExchange (9) + : : : : : : : : : : : : : : : : : +- CometProject (8) + : : : : : : : : : : : : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : : : : : : : : : : : : :- CometBroadcastExchange (3) + : : : : : : : : : : : : : : : : : : +- CometFilter (2) + : : : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : : : : : : : : : : : : +- CometProject (6) + : : : : : : : : : : : : : : : : : +- CometFilter (5) + : : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (4) + : : : : : : : : : : : : : : : : +- CometSort (28) + : : : : : : : : : : : : : : : : +- CometProject (27) + : : : : : : : : : : : : : : : : +- CometFilter (26) + : : : : : : : : : : : : : : : : +- CometHashAggregate (25) + : : : : : : : : : : : : : : : : +- CometExchange (24) + : : : : : : : : : : : : : : : : +- CometHashAggregate (23) + : : : : : : : : : : : : : : : : +- CometProject (22) + : : : : : : : : : : : : : : : : +- CometSortMergeJoin (21) + : : : : : : : : : : : : : : : : :- CometSort (15) + : : : : : : : : : : : : : : : : : +- CometExchange (14) + : : : : : : : : : : : : : : : : : +- CometProject (13) + : : : : : : : : : : : : : : : : : +- CometFilter (12) + : : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (11) + : : : : : : : : : : : : : : : : +- CometSort (20) + : : : : : : : : : : : : : : : : +- CometExchange (19) + : : : : : : : : : : : : : : : : +- CometProject (18) + : : : : : : : : : : : : : : : : +- CometFilter (17) + : : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (16) + : : : : : : : : : : : : : : : +- CometBroadcastExchange (33) + : : : : : : : : : : : : : : : +- CometFilter (32) + : : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (31) + : : : : : : : : : : : : : : +- CometBroadcastExchange (38) + : : : : : : : : : : : : : : +- CometFilter (37) + : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (36) + : : : : : : : : : : : : : +- CometBroadcastExchange (43) + : : : : : : : : : : : : : +- CometFilter (42) + : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (41) + : : : : : : : : : : : : +- CometBroadcastExchange (48) + : : : : : : : : : : : : +- CometFilter (47) + : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (46) + : : : : : : : : : : : +- ReusedExchange (51) + : : : : : : : : : : +- CometBroadcastExchange (56) + : : : : : : : : : : +- CometFilter (55) + : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (54) + : : : : : : : : : +- ReusedExchange (59) + : : : : : : : : +- CometBroadcastExchange (64) + : : : : : : : : +- CometFilter (63) + : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (62) + : : : : : : : +- CometBroadcastExchange (69) + : : : : : : : +- CometFilter (68) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (67) + : : : : : : +- ReusedExchange (72) + : : : : : +- CometBroadcastExchange (77) + : : : : : +- CometFilter (76) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_address` (75) + : : : : +- ReusedExchange (80) + : : : +- ReusedExchange (83) + : : +- ReusedExchange (86) + : +- CometBroadcastExchange (92) + : +- CometProject (91) + : +- CometFilter (90) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (89) + +- CometSort (158) + +- CometExchange (157) + +- CometHashAggregate (156) + +- CometHashAggregate (155) + +- CometProject (154) + +- CometBroadcastHashJoin (153) + :- CometProject (151) + : +- CometBroadcastHashJoin (150) + : :- CometProject (148) + : : +- CometBroadcastHashJoin (147) + : : :- CometProject (145) + : : : +- CometBroadcastHashJoin (144) + : : : :- CometProject (142) + : : : : +- CometBroadcastHashJoin (141) + : : : : :- CometProject (139) + : : : : : +- CometBroadcastHashJoin (138) + : : : : : :- CometProject (136) + : : : : : : +- CometBroadcastHashJoin (135) + : : : : : : :- CometProject (133) + : : : : : : : +- CometBroadcastHashJoin (132) + : : : : : : : :- CometProject (130) + : : : : : : : : +- CometBroadcastHashJoin (129) + : : : : : : : : :- CometProject (127) + : : : : : : : : : +- CometBroadcastHashJoin (126) + : : : : : : : : : :- CometProject (124) + : : : : : : : : : : +- CometBroadcastHashJoin (123) + : : : : : : : : : : :- CometProject (121) + : : : : : : : : : : : +- CometBroadcastHashJoin (120) + : : : : : : : : : : : :- CometProject (118) + : : : : : : : : : : : : +- CometBroadcastHashJoin (117) + : : : : : : : : : : : : :- CometProject (115) + : : : : : : : : : : : : : +- CometBroadcastHashJoin (114) + : : : : : : : : : : : : : :- CometProject (112) + : : : : : : : : : : : : : : +- CometBroadcastHashJoin (111) + : : : : : : : : : : : : : : :- CometProject (107) + : : : : : : : : : : : : : : : +- CometSortMergeJoin (106) + : : : : : : : : : : : : : : : :- CometSort (100) + : : : : : : : : : : : : : : : : +- ReusedExchange (99) + : : : : : : : : : : : : : : : +- CometSort (105) + : : : : : : : : : : : : : : : +- CometProject (104) + : : : : : : : : : : : : : : : +- CometFilter (103) + : : : : : : : : : : : : : : : +- CometHashAggregate (102) + : : : : : : : : : : : : : : : +- ReusedExchange (101) + : : : : : : : : : : : : : : +- CometBroadcastExchange (110) + : : : : : : : : : : : : : : +- CometFilter (109) + : : : : : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (108) + : : : : : : : : : : : : : +- ReusedExchange (113) + : : : : : : : : : : : : +- ReusedExchange (116) + : : : : : : : : : : : +- ReusedExchange (119) + : : : : : : : : : : +- ReusedExchange (122) + : : : : : : : : : +- ReusedExchange (125) + : : : : : : : : +- ReusedExchange (128) + : : : : : : : +- ReusedExchange (131) + : : : : : : +- ReusedExchange (134) + : : : : : +- ReusedExchange (137) + : : : : +- ReusedExchange (140) + : : : +- ReusedExchange (143) + : : +- ReusedExchange (146) + : +- ReusedExchange (149) + +- ReusedExchange (152) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(2) CometFilter +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) + +(3) CometBroadcastExchange +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(4) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Arguments: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(5) CometFilter +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(6) CometProject +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Arguments: [sr_item_sk#13, sr_ticket_number#14], [sr_item_sk#13, sr_ticket_number#14] + +(7) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [2]: [sr_item_sk#13, sr_ticket_number#14] +Arguments: [ss_item_sk#1, ss_ticket_number#8], [sr_item_sk#13, sr_ticket_number#14], Inner, BuildLeft + +(8) CometProject +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#13, sr_ticket_number#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(9) CometExchange +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometSort +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1 ASC NULLS FIRST] + +(11) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(12) CometFilter +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(13) CometProject +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18], [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] + +(14) CometExchange +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: hashpartitioning(cs_item_sk#16, cs_order_number#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(15) CometSort +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18], [cs_item_sk#16 ASC NULLS FIRST, cs_order_number#17 ASC NULLS FIRST] + +(16) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(17) CometFilter +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Condition : (isnotnull(cr_item_sk#20) AND isnotnull(cr_order_number#21)) + +(18) CometProject +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(19) CometExchange +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: hashpartitioning(cr_item_sk#20, cr_order_number#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometSort +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cr_item_sk#20 ASC NULLS FIRST, cr_order_number#21 ASC NULLS FIRST] + +(21) CometSortMergeJoin +Left output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Right output [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cs_item_sk#16, cs_order_number#17], [cr_item_sk#20, cr_order_number#21], Inner + +(22) CometProject +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(23) CometHashAggregate +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] + +(24) CometExchange +Input [4]: [cs_item_sk#16, sum#26, sum#27, isEmpty#28] +Arguments: hashpartitioning(cs_item_sk#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(25) CometHashAggregate +Input [4]: [cs_item_sk#16, sum#26, sum#27, isEmpty#28] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] + +(26) CometFilter +Input [3]: [cs_item_sk#16, sale#29, refund#30] +Condition : ((isnotnull(sale#29) AND isnotnull(refund#30)) AND (cast(sale#29 as decimal(21,2)) > (2 * refund#30))) + +(27) CometProject +Input [3]: [cs_item_sk#16, sale#29, refund#30] +Arguments: [cs_item_sk#16], [cs_item_sk#16] + +(28) CometSort +Input [1]: [cs_item_sk#16] +Arguments: [cs_item_sk#16], [cs_item_sk#16 ASC NULLS FIRST] + +(29) CometSortMergeJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [1]: [cs_item_sk#16] +Arguments: [ss_item_sk#1], [cs_item_sk#16], Inner + +(30) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#16] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(31) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#31, d_year#32] +Arguments: [d_date_sk#31, d_year#32] + +(32) CometFilter +Input [2]: [d_date_sk#31, d_year#32] +Condition : ((isnotnull(d_year#32) AND (d_year#32 = 1999)) AND isnotnull(d_date_sk#31)) + +(33) CometBroadcastExchange +Input [2]: [d_date_sk#31, d_year#32] +Arguments: [d_date_sk#31, d_year#32] + +(34) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [2]: [d_date_sk#31, d_year#32] +Arguments: [ss_sold_date_sk#12], [d_date_sk#31], Inner, BuildRight + +(35) CometProject +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#31, d_year#32] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32] + +(36) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [s_store_sk#33, s_store_name#34, s_zip#35] + +(37) CometFilter +Input [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Condition : ((isnotnull(s_store_sk#33) AND isnotnull(s_store_name#34)) AND isnotnull(s_zip#35)) + +(38) CometBroadcastExchange +Input [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [s_store_sk#33, s_store_name#34, s_zip#35] + +(39) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32] +Right output [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [ss_store_sk#6], [s_store_sk#33], Inner, BuildRight + +(40) CometProject +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35] + +(41) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] + +(42) CometFilter +Input [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Condition : (((((isnotnull(c_customer_sk#36) AND isnotnull(c_first_sales_date_sk#41)) AND isnotnull(c_first_shipto_date_sk#40)) AND isnotnull(c_current_cdemo_sk#37)) AND isnotnull(c_current_hdemo_sk#38)) AND isnotnull(c_current_addr_sk#39)) + +(43) CometBroadcastExchange +Input [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] + +(44) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35] +Right output [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [ss_customer_sk#2], [c_customer_sk#36], Inner, BuildRight + +(45) CometProject +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] + +(46) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#42, d_year#43] +Arguments: [d_date_sk#42, d_year#43] + +(47) CometFilter +Input [2]: [d_date_sk#42, d_year#43] +Condition : isnotnull(d_date_sk#42) + +(48) CometBroadcastExchange +Input [2]: [d_date_sk#42, d_year#43] +Arguments: [d_date_sk#42, d_year#43] + +(49) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Right output [2]: [d_date_sk#42, d_year#43] +Arguments: [c_first_sales_date_sk#41], [d_date_sk#42], Inner, BuildRight + +(50) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41, d_date_sk#42, d_year#43] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43] + +(51) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#44, d_year#45] + +(52) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43] +Right output [2]: [d_date_sk#44, d_year#45] +Arguments: [c_first_shipto_date_sk#40], [d_date_sk#44], Inner, BuildRight + +(53) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43, d_date_sk#44, d_year#45] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(54) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [2]: [cd_demo_sk#46, cd_marital_status#47] +Arguments: [cd_demo_sk#46, cd_marital_status#47] + +(55) CometFilter +Input [2]: [cd_demo_sk#46, cd_marital_status#47] +Condition : (isnotnull(cd_demo_sk#46) AND isnotnull(cd_marital_status#47)) + +(56) CometBroadcastExchange +Input [2]: [cd_demo_sk#46, cd_marital_status#47] +Arguments: [cd_demo_sk#46, cd_marital_status#47] + +(57) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [2]: [cd_demo_sk#46, cd_marital_status#47] +Arguments: [ss_cdemo_sk#3], [cd_demo_sk#46], Inner, BuildRight + +(58) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_demo_sk#46, cd_marital_status#47] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47] + +(59) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#48, cd_marital_status#49] + +(60) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47] +Right output [2]: [cd_demo_sk#48, cd_marital_status#49] +Arguments: [c_current_cdemo_sk#37], [cd_demo_sk#48], Inner, NOT (cd_marital_status#47 = cd_marital_status#49), BuildRight + +(61) CometProject +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47, cd_demo_sk#48, cd_marital_status#49] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(62) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [1]: [p_promo_sk#50] +Arguments: [p_promo_sk#50] + +(63) CometFilter +Input [1]: [p_promo_sk#50] +Condition : isnotnull(p_promo_sk#50) + +(64) CometBroadcastExchange +Input [1]: [p_promo_sk#50] +Arguments: [p_promo_sk#50] + +(65) CometBroadcastHashJoin +Left output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [1]: [p_promo_sk#50] +Arguments: [ss_promo_sk#7], [p_promo_sk#50], Inner, BuildRight + +(66) CometProject +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, p_promo_sk#50] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(67) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [hd_demo_sk#51, hd_income_band_sk#52] + +(68) CometFilter +Input [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Condition : (isnotnull(hd_demo_sk#51) AND isnotnull(hd_income_band_sk#52)) + +(69) CometBroadcastExchange +Input [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [hd_demo_sk#51, hd_income_band_sk#52] + +(70) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [ss_hdemo_sk#4], [hd_demo_sk#51], Inner, BuildRight + +(71) CometProject +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52], [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52] + +(72) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#53, hd_income_band_sk#54] + +(73) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52] +Right output [2]: [hd_demo_sk#53, hd_income_band_sk#54] +Arguments: [c_current_hdemo_sk#38], [hd_demo_sk#53], Inner, BuildRight + +(74) CometProject +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_demo_sk#53, hd_income_band_sk#54] +Arguments: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54], [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54] + +(75) CometNativeScan: `spark_catalog`.`default`.`customer_address` +Output [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] + +(76) CometFilter +Input [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Condition : isnotnull(ca_address_sk#55) + +(77) CometBroadcastExchange +Input [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] + +(78) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54] +Right output [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ss_addr_sk#5], [ca_address_sk#55], Inner, BuildRight + +(79) CometProject +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] + +(80) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(81) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Right output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: [c_current_addr_sk#39], [ca_address_sk#60], Inner, BuildRight + +(82) CometProject +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(83) ReusedExchange [Reuses operator id: 64] +Output [1]: [ib_income_band_sk#65] + +(84) CometBroadcastHashJoin +Left output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [1]: [ib_income_band_sk#65] +Arguments: [hd_income_band_sk#52], [ib_income_band_sk#65], Inner, BuildRight + +(85) CometProject +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ib_income_band_sk#65] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(86) ReusedExchange [Reuses operator id: 64] +Output [1]: [ib_income_band_sk#66] + +(87) CometBroadcastHashJoin +Left output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [1]: [ib_income_band_sk#66] +Arguments: [hd_income_band_sk#54], [ib_income_band_sk#66], Inner, BuildRight + +(88) CometProject +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ib_income_band_sk#66] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(89) CometNativeScan: `spark_catalog`.`default`.`item` +Output [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Arguments: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] + +(90) CometFilter +Input [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Condition : ((((((isnotnull(i_current_price#68) AND i_color#69 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#68 >= 64.00)) AND (i_current_price#68 <= 74.00)) AND (i_current_price#68 >= 65.00)) AND (i_current_price#68 <= 79.00)) AND isnotnull(i_item_sk#67)) + +(91) CometProject +Input [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Arguments: [i_item_sk#67, i_product_name#70], [i_item_sk#67, i_product_name#70] + +(92) CometBroadcastExchange +Input [2]: [i_item_sk#67, i_product_name#70] +Arguments: [i_item_sk#67, i_product_name#70] + +(93) CometBroadcastHashJoin +Left output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [2]: [i_item_sk#67, i_product_name#70] +Arguments: [ss_item_sk#1], [i_item_sk#67], Inner, BuildRight + +(94) CometProject +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] +Arguments: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70], [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] + +(95) CometHashAggregate +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] +Keys [15]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] + +(96) CometHashAggregate +Input [19]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45, count#71, sum#72, sum#73, sum#74] +Keys [15]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] + +(97) CometExchange +Input [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Arguments: hashpartitioning(item_sk#76, store_name#77, store_zip#78, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(98) CometSort +Input [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Arguments: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91], [item_sk#76 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, store_zip#78 ASC NULLS FIRST] + +(99) ReusedExchange [Reuses operator id: 9] +Output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] + +(100) CometSort +Input [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102], [ss_item_sk#92 ASC NULLS FIRST] + +(101) ReusedExchange [Reuses operator id: 24] +Output [4]: [cs_item_sk#103, sum#104, sum#105, isEmpty#106] + +(102) CometHashAggregate +Input [4]: [cs_item_sk#103, sum#104, sum#105, isEmpty#106] +Keys [1]: [cs_item_sk#103] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#107)), sum(((cr_refunded_cash#108 + cr_reversed_charge#109) + cr_store_credit#110))] + +(103) CometFilter +Input [3]: [cs_item_sk#103, sale#29, refund#30] +Condition : ((isnotnull(sale#29) AND isnotnull(refund#30)) AND (cast(sale#29 as decimal(21,2)) > (2 * refund#30))) + +(104) CometProject +Input [3]: [cs_item_sk#103, sale#29, refund#30] +Arguments: [cs_item_sk#103], [cs_item_sk#103] + +(105) CometSort +Input [1]: [cs_item_sk#103] +Arguments: [cs_item_sk#103], [cs_item_sk#103 ASC NULLS FIRST] + +(106) CometSortMergeJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Right output [1]: [cs_item_sk#103] +Arguments: [ss_item_sk#92], [cs_item_sk#103], Inner + +(107) CometProject +Input [12]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102, cs_item_sk#103] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] + +(108) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#111, d_year#112] +Arguments: [d_date_sk#111, d_year#112] + +(109) CometFilter +Input [2]: [d_date_sk#111, d_year#112] +Condition : ((isnotnull(d_year#112) AND (d_year#112 = 2000)) AND isnotnull(d_date_sk#111)) + +(110) CometBroadcastExchange +Input [2]: [d_date_sk#111, d_year#112] +Arguments: [d_date_sk#111, d_year#112] + +(111) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Right output [2]: [d_date_sk#111, d_year#112] +Arguments: [ss_sold_date_sk#102], [d_date_sk#111], Inner, BuildRight + +(112) CometProject +Input [13]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102, d_date_sk#111, d_year#112] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112] + +(113) ReusedExchange [Reuses operator id: 38] +Output [3]: [s_store_sk#113, s_store_name#114, s_zip#115] + +(114) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112] +Right output [3]: [s_store_sk#113, s_store_name#114, s_zip#115] +Arguments: [ss_store_sk#97], [s_store_sk#113], Inner, BuildRight + +(115) CometProject +Input [14]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_sk#113, s_store_name#114, s_zip#115] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115] + +(116) ReusedExchange [Reuses operator id: 43] +Output [6]: [c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] + +(117) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115] +Right output [6]: [c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Arguments: [ss_customer_sk#93], [c_customer_sk#116], Inner, BuildRight + +(118) CometProject +Input [18]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] + +(119) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#122, d_year#123] + +(120) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Right output [2]: [d_date_sk#122, d_year#123] +Arguments: [c_first_sales_date_sk#121], [d_date_sk#122], Inner, BuildRight + +(121) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121, d_date_sk#122, d_year#123] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123] + +(122) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#124, d_year#125] + +(123) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123] +Right output [2]: [d_date_sk#124, d_year#125] +Arguments: [c_first_shipto_date_sk#120], [d_date_sk#124], Inner, BuildRight + +(124) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123, d_date_sk#124, d_year#125] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(125) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#126, cd_marital_status#127] + +(126) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [2]: [cd_demo_sk#126, cd_marital_status#127] +Arguments: [ss_cdemo_sk#94], [cd_demo_sk#126], Inner, BuildRight + +(127) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_demo_sk#126, cd_marital_status#127] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127] + +(128) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#128, cd_marital_status#129] + +(129) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127] +Right output [2]: [cd_demo_sk#128, cd_marital_status#129] +Arguments: [c_current_cdemo_sk#117], [cd_demo_sk#128], Inner, NOT (cd_marital_status#127 = cd_marital_status#129), BuildRight + +(130) CometProject +Input [18]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127, cd_demo_sk#128, cd_marital_status#129] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(131) ReusedExchange [Reuses operator id: 64] +Output [1]: [p_promo_sk#130] + +(132) CometBroadcastHashJoin +Left output [14]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [1]: [p_promo_sk#130] +Arguments: [ss_promo_sk#98], [p_promo_sk#130], Inner, BuildRight + +(133) CometProject +Input [15]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, p_promo_sk#130] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(134) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#131, hd_income_band_sk#132] + +(135) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [2]: [hd_demo_sk#131, hd_income_band_sk#132] +Arguments: [ss_hdemo_sk#95], [hd_demo_sk#131], Inner, BuildRight + +(136) CometProject +Input [15]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_demo_sk#131, hd_income_band_sk#132] +Arguments: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132], [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132] + +(137) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#133, hd_income_band_sk#134] + +(138) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132] +Right output [2]: [hd_demo_sk#133, hd_income_band_sk#134] +Arguments: [c_current_hdemo_sk#118], [hd_demo_sk#133], Inner, BuildRight + +(139) CometProject +Input [15]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_demo_sk#133, hd_income_band_sk#134] +Arguments: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134], [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134] + +(140) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] + +(141) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134] +Right output [5]: [ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Arguments: [ss_addr_sk#96], [ca_address_sk#135], Inner, BuildRight + +(142) CometProject +Input [18]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] + +(143) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(144) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Right output [5]: [ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Arguments: [c_current_addr_sk#119], [ca_address_sk#140], Inner, BuildRight + +(145) CometProject +Input [21]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(146) ReusedExchange [Reuses operator id: 64] +Output [1]: [ib_income_band_sk#145] + +(147) CometBroadcastHashJoin +Left output [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [1]: [ib_income_band_sk#145] +Arguments: [hd_income_band_sk#132], [ib_income_band_sk#145], Inner, BuildRight + +(148) CometProject +Input [20]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, ib_income_band_sk#145] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(149) ReusedExchange [Reuses operator id: 64] +Output [1]: [ib_income_band_sk#146] + +(150) CometBroadcastHashJoin +Left output [18]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [1]: [ib_income_band_sk#146] +Arguments: [hd_income_band_sk#134], [ib_income_band_sk#146], Inner, BuildRight + +(151) CometProject +Input [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, ib_income_band_sk#146] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(152) ReusedExchange [Reuses operator id: 92] +Output [2]: [i_item_sk#147, i_product_name#148] + +(153) CometBroadcastHashJoin +Left output [17]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [2]: [i_item_sk#147, i_product_name#148] +Arguments: [ss_item_sk#92], [i_item_sk#147], Inner, BuildRight + +(154) CometProject +Input [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] +Arguments: [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148], [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] + +(155) CometHashAggregate +Input [18]: [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] +Keys [15]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#99)), partial_sum(UnscaledValue(ss_list_price#100)), partial_sum(UnscaledValue(ss_coupon_amt#101))] + +(156) CometHashAggregate +Input [19]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125, count#71, sum#149, sum#150, sum#151] +Keys [15]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#99)), sum(UnscaledValue(ss_list_price#100)), sum(UnscaledValue(ss_coupon_amt#101))] + +(157) CometExchange +Input [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: hashpartitioning(item_sk#152, store_name#153, store_zip#154, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(158) CometSort +Input [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159], [item_sk#152 ASC NULLS FIRST, store_name#153 ASC NULLS FIRST, store_zip#154 ASC NULLS FIRST] + +(159) CometSortMergeJoin +Left output [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Right output [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [item_sk#76, store_name#77, store_zip#78], [item_sk#152, store_name#153, store_zip#154], Inner, (cnt#156 <= cnt#88) + +(160) CometProject +Input [25]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156], [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] + +(161) CometColumnarExchange +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] +Arguments: rangepartitioning(product_name#75 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, cnt#156 ASC NULLS FIRST, s1#89 ASC NULLS FIRST, s1#157 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=7] + +(162) CometSort +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] +Arguments: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156], [product_name#75 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, cnt#156 ASC NULLS FIRST, s1#89 ASC NULLS FIRST, s1#157 ASC NULLS FIRST] + +(163) ColumnarToRow [codegen id : 1] +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c68b3e5cbd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_datafusion/simplified.txt @@ -0,0 +1,165 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + CometColumnarExchange [product_name,store_name,cnt,s1,s1] #1 + CometProject [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + CometSortMergeJoin [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,item_sk,store_name,store_zip,syear,cnt,s1,s2,s3] + CometSort [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3] + CometExchange [item_sk,store_name,store_zip] #2 + CometHashAggregate [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt))] + CometHashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + CometProject [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,p_promo_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_sk,s_store_name,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,cs_item_sk] + CometSort [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometExchange [ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #4 + CometFilter [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometSort [cs_item_sk] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,sale,refund] + CometHashAggregate [cs_item_sk,sale,refund,sum,sum,isEmpty,sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))] + CometExchange [cs_item_sk] #5 + CometHashAggregate [cs_item_sk,sum,sum,isEmpty,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometProject [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_ext_list_price,cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometSort [cs_item_sk,cs_order_number,cs_ext_list_price] + CometExchange [cs_item_sk,cs_order_number] #6 + CometProject [cs_item_sk,cs_order_number,cs_ext_list_price] + CometFilter [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometExchange [cr_item_sk,cr_order_number] #7 + CometProject [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometFilter [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #8 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_store_name,s_zip] #9 + CometFilter [s_store_sk,s_store_name,s_zip] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_name,s_zip] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #11 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + ReusedExchange [d_date_sk,d_year] #11 + CometBroadcastExchange [cd_demo_sk,cd_marital_status] #12 + CometFilter [cd_demo_sk,cd_marital_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status] + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + CometBroadcastExchange [p_promo_sk] #13 + CometFilter [p_promo_sk] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk] + CometBroadcastExchange [hd_demo_sk,hd_income_band_sk] #14 + CometFilter [hd_demo_sk,hd_income_band_sk] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_income_band_sk] + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + CometBroadcastExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + CometFilter [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometNativeScan: `spark_catalog`.`default`.`customer_address` [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + ReusedExchange [ib_income_band_sk] #13 + ReusedExchange [ib_income_band_sk] #13 + CometBroadcastExchange [i_item_sk,i_product_name] #16 + CometProject [i_item_sk,i_product_name] + CometFilter [i_item_sk,i_current_price,i_color,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price,i_color,i_product_name] + CometSort [item_sk,store_name,store_zip,syear,cnt,s1,s2,s3] + CometExchange [item_sk,store_name,store_zip] #17 + CometHashAggregate [item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt))] + CometHashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + CometProject [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,p_promo_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_sk,s_store_name,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,cs_item_sk] + CometSort [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #3 + CometSort [cs_item_sk] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,sale,refund] + CometHashAggregate [cs_item_sk,sale,refund,sum,sum,isEmpty,sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))] + ReusedExchange [cs_item_sk,sum,sum,isEmpty] #5 + CometBroadcastExchange [d_date_sk,d_year] #18 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + ReusedExchange [s_store_sk,s_store_name,s_zip] #9 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + ReusedExchange [d_date_sk,d_year] #11 + ReusedExchange [d_date_sk,d_year] #11 + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + ReusedExchange [p_promo_sk] #13 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + ReusedExchange [ib_income_band_sk] #13 + ReusedExchange [ib_income_band_sk] #13 + ReusedExchange [i_item_sk,i_product_name] #16 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d8fe196f73 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_iceberg_compat/explain.txt @@ -0,0 +1,894 @@ +== Physical Plan == +* ColumnarToRow (165) ++- CometSort (164) + +- CometColumnarExchange (163) + +- CometProject (162) + +- CometSortMergeJoin (161) + :- CometSort (100) + : +- CometExchange (99) + : +- CometHashAggregate (98) + : +- CometHashAggregate (97) + : +- CometProject (96) + : +- CometBroadcastHashJoin (95) + : :- CometProject (90) + : : +- CometBroadcastHashJoin (89) + : : :- CometProject (87) + : : : +- CometBroadcastHashJoin (86) + : : : :- CometProject (82) + : : : : +- CometBroadcastHashJoin (81) + : : : : :- CometProject (79) + : : : : : +- CometBroadcastHashJoin (78) + : : : : : :- CometProject (74) + : : : : : : +- CometBroadcastHashJoin (73) + : : : : : : :- CometProject (71) + : : : : : : : +- CometBroadcastHashJoin (70) + : : : : : : : :- CometProject (66) + : : : : : : : : +- CometBroadcastHashJoin (65) + : : : : : : : : :- CometProject (61) + : : : : : : : : : +- CometBroadcastHashJoin (60) + : : : : : : : : : :- CometProject (58) + : : : : : : : : : : +- CometBroadcastHashJoin (57) + : : : : : : : : : : :- CometProject (53) + : : : : : : : : : : : +- CometBroadcastHashJoin (52) + : : : : : : : : : : : :- CometProject (50) + : : : : : : : : : : : : +- CometBroadcastHashJoin (49) + : : : : : : : : : : : : :- CometProject (45) + : : : : : : : : : : : : : +- CometBroadcastHashJoin (44) + : : : : : : : : : : : : : :- CometProject (40) + : : : : : : : : : : : : : : +- CometBroadcastHashJoin (39) + : : : : : : : : : : : : : : :- CometProject (35) + : : : : : : : : : : : : : : : +- CometBroadcastHashJoin (34) + : : : : : : : : : : : : : : : :- CometProject (30) + : : : : : : : : : : : : : : : : +- CometSortMergeJoin (29) + : : : : : : : : : : : : : : : : :- CometSort (10) + : : : : : : : : : : : : : : : : : +- CometExchange (9) + : : : : : : : : : : : : : : : : : +- CometProject (8) + : : : : : : : : : : : : : : : : : +- CometBroadcastHashJoin (7) + : : : : : : : : : : : : : : : : : :- CometBroadcastExchange (3) + : : : : : : : : : : : : : : : : : : +- CometFilter (2) + : : : : : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : : : : : : : : : : : : : +- CometProject (6) + : : : : : : : : : : : : : : : : : +- CometFilter (5) + : : : : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.store_returns (4) + : : : : : : : : : : : : : : : : +- CometSort (28) + : : : : : : : : : : : : : : : : +- CometProject (27) + : : : : : : : : : : : : : : : : +- CometFilter (26) + : : : : : : : : : : : : : : : : +- CometHashAggregate (25) + : : : : : : : : : : : : : : : : +- CometExchange (24) + : : : : : : : : : : : : : : : : +- CometHashAggregate (23) + : : : : : : : : : : : : : : : : +- CometProject (22) + : : : : : : : : : : : : : : : : +- CometSortMergeJoin (21) + : : : : : : : : : : : : : : : : :- CometSort (15) + : : : : : : : : : : : : : : : : : +- CometExchange (14) + : : : : : : : : : : : : : : : : : +- CometProject (13) + : : : : : : : : : : : : : : : : : +- CometFilter (12) + : : : : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (11) + : : : : : : : : : : : : : : : : +- CometSort (20) + : : : : : : : : : : : : : : : : +- CometExchange (19) + : : : : : : : : : : : : : : : : +- CometProject (18) + : : : : : : : : : : : : : : : : +- CometFilter (17) + : : : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.catalog_returns (16) + : : : : : : : : : : : : : : : +- CometBroadcastExchange (33) + : : : : : : : : : : : : : : : +- CometFilter (32) + : : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.date_dim (31) + : : : : : : : : : : : : : : +- CometBroadcastExchange (38) + : : : : : : : : : : : : : : +- CometFilter (37) + : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.store (36) + : : : : : : : : : : : : : +- CometBroadcastExchange (43) + : : : : : : : : : : : : : +- CometFilter (42) + : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.customer (41) + : : : : : : : : : : : : +- CometBroadcastExchange (48) + : : : : : : : : : : : : +- CometFilter (47) + : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.date_dim (46) + : : : : : : : : : : : +- ReusedExchange (51) + : : : : : : : : : : +- CometBroadcastExchange (56) + : : : : : : : : : : +- CometFilter (55) + : : : : : : : : : : +- CometScan parquet spark_catalog.default.customer_demographics (54) + : : : : : : : : : +- ReusedExchange (59) + : : : : : : : : +- CometBroadcastExchange (64) + : : : : : : : : +- CometFilter (63) + : : : : : : : : +- CometScan parquet spark_catalog.default.promotion (62) + : : : : : : : +- CometBroadcastExchange (69) + : : : : : : : +- CometFilter (68) + : : : : : : : +- CometScan parquet spark_catalog.default.household_demographics (67) + : : : : : : +- ReusedExchange (72) + : : : : : +- CometBroadcastExchange (77) + : : : : : +- CometFilter (76) + : : : : : +- CometScan parquet spark_catalog.default.customer_address (75) + : : : : +- ReusedExchange (80) + : : : +- CometBroadcastExchange (85) + : : : +- CometFilter (84) + : : : +- CometScan parquet spark_catalog.default.income_band (83) + : : +- ReusedExchange (88) + : +- CometBroadcastExchange (94) + : +- CometProject (93) + : +- CometFilter (92) + : +- CometScan parquet spark_catalog.default.item (91) + +- CometSort (160) + +- CometExchange (159) + +- CometHashAggregate (158) + +- CometHashAggregate (157) + +- CometProject (156) + +- CometBroadcastHashJoin (155) + :- CometProject (153) + : +- CometBroadcastHashJoin (152) + : :- CometProject (150) + : : +- CometBroadcastHashJoin (149) + : : :- CometProject (147) + : : : +- CometBroadcastHashJoin (146) + : : : :- CometProject (144) + : : : : +- CometBroadcastHashJoin (143) + : : : : :- CometProject (141) + : : : : : +- CometBroadcastHashJoin (140) + : : : : : :- CometProject (138) + : : : : : : +- CometBroadcastHashJoin (137) + : : : : : : :- CometProject (135) + : : : : : : : +- CometBroadcastHashJoin (134) + : : : : : : : :- CometProject (132) + : : : : : : : : +- CometBroadcastHashJoin (131) + : : : : : : : : :- CometProject (129) + : : : : : : : : : +- CometBroadcastHashJoin (128) + : : : : : : : : : :- CometProject (126) + : : : : : : : : : : +- CometBroadcastHashJoin (125) + : : : : : : : : : : :- CometProject (123) + : : : : : : : : : : : +- CometBroadcastHashJoin (122) + : : : : : : : : : : : :- CometProject (120) + : : : : : : : : : : : : +- CometBroadcastHashJoin (119) + : : : : : : : : : : : : :- CometProject (117) + : : : : : : : : : : : : : +- CometBroadcastHashJoin (116) + : : : : : : : : : : : : : :- CometProject (114) + : : : : : : : : : : : : : : +- CometBroadcastHashJoin (113) + : : : : : : : : : : : : : : :- CometProject (109) + : : : : : : : : : : : : : : : +- CometSortMergeJoin (108) + : : : : : : : : : : : : : : : :- CometSort (102) + : : : : : : : : : : : : : : : : +- ReusedExchange (101) + : : : : : : : : : : : : : : : +- CometSort (107) + : : : : : : : : : : : : : : : +- CometProject (106) + : : : : : : : : : : : : : : : +- CometFilter (105) + : : : : : : : : : : : : : : : +- CometHashAggregate (104) + : : : : : : : : : : : : : : : +- ReusedExchange (103) + : : : : : : : : : : : : : : +- CometBroadcastExchange (112) + : : : : : : : : : : : : : : +- CometFilter (111) + : : : : : : : : : : : : : : +- CometScan parquet spark_catalog.default.date_dim (110) + : : : : : : : : : : : : : +- ReusedExchange (115) + : : : : : : : : : : : : +- ReusedExchange (118) + : : : : : : : : : : : +- ReusedExchange (121) + : : : : : : : : : : +- ReusedExchange (124) + : : : : : : : : : +- ReusedExchange (127) + : : : : : : : : +- ReusedExchange (130) + : : : : : : : +- ReusedExchange (133) + : : : : : : +- ReusedExchange (136) + : : : : : +- ReusedExchange (139) + : : : : +- ReusedExchange (142) + : : : +- ReusedExchange (145) + : : +- ReusedExchange (148) + : +- ReusedExchange (151) + +- ReusedExchange (154) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) CometFilter +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) + +(3) CometBroadcastExchange +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(4) CometScan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) CometFilter +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(6) CometProject +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Arguments: [sr_item_sk#13, sr_ticket_number#14], [sr_item_sk#13, sr_ticket_number#14] + +(7) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [2]: [sr_item_sk#13, sr_ticket_number#14] +Arguments: [ss_item_sk#1, ss_ticket_number#8], [sr_item_sk#13, sr_ticket_number#14], Inner, BuildLeft + +(8) CometProject +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#13, sr_ticket_number#14] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(9) CometExchange +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(10) CometSort +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1 ASC NULLS FIRST] + +(11) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(12) CometFilter +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(13) CometProject +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18], [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] + +(14) CometExchange +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: hashpartitioning(cs_item_sk#16, cs_order_number#17, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(15) CometSort +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18], [cs_item_sk#16 ASC NULLS FIRST, cs_order_number#17 ASC NULLS FIRST] + +(16) CometScan parquet spark_catalog.default.catalog_returns +Output [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(17) CometFilter +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Condition : (isnotnull(cr_item_sk#20) AND isnotnull(cr_order_number#21)) + +(18) CometProject +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(19) CometExchange +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: hashpartitioning(cr_item_sk#20, cr_order_number#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometSort +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cr_item_sk#20 ASC NULLS FIRST, cr_order_number#21 ASC NULLS FIRST] + +(21) CometSortMergeJoin +Left output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Right output [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cs_item_sk#16, cs_order_number#17], [cr_item_sk#20, cr_order_number#21], Inner + +(22) CometProject +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24], [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(23) CometHashAggregate +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] + +(24) CometExchange +Input [4]: [cs_item_sk#16, sum#26, sum#27, isEmpty#28] +Arguments: hashpartitioning(cs_item_sk#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(25) CometHashAggregate +Input [4]: [cs_item_sk#16, sum#26, sum#27, isEmpty#28] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] + +(26) CometFilter +Input [3]: [cs_item_sk#16, sale#29, refund#30] +Condition : ((isnotnull(sale#29) AND isnotnull(refund#30)) AND (cast(sale#29 as decimal(21,2)) > (2 * refund#30))) + +(27) CometProject +Input [3]: [cs_item_sk#16, sale#29, refund#30] +Arguments: [cs_item_sk#16], [cs_item_sk#16] + +(28) CometSort +Input [1]: [cs_item_sk#16] +Arguments: [cs_item_sk#16], [cs_item_sk#16 ASC NULLS FIRST] + +(29) CometSortMergeJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [1]: [cs_item_sk#16] +Arguments: [ss_item_sk#1], [cs_item_sk#16], Inner + +(30) CometProject +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#16] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(31) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#31, d_year#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(32) CometFilter +Input [2]: [d_date_sk#31, d_year#32] +Condition : ((isnotnull(d_year#32) AND (d_year#32 = 1999)) AND isnotnull(d_date_sk#31)) + +(33) CometBroadcastExchange +Input [2]: [d_date_sk#31, d_year#32] +Arguments: [d_date_sk#31, d_year#32] + +(34) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Right output [2]: [d_date_sk#31, d_year#32] +Arguments: [ss_sold_date_sk#12], [d_date_sk#31], Inner, BuildRight + +(35) CometProject +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#31, d_year#32] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32] + +(36) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] +ReadSchema: struct + +(37) CometFilter +Input [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Condition : ((isnotnull(s_store_sk#33) AND isnotnull(s_store_name#34)) AND isnotnull(s_zip#35)) + +(38) CometBroadcastExchange +Input [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [s_store_sk#33, s_store_name#34, s_zip#35] + +(39) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32] +Right output [3]: [s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [ss_store_sk#6], [s_store_sk#33], Inner, BuildRight + +(40) CometProject +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_sk#33, s_store_name#34, s_zip#35] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35], [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35] + +(41) CometScan parquet spark_catalog.default.customer +Output [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(42) CometFilter +Input [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Condition : (((((isnotnull(c_customer_sk#36) AND isnotnull(c_first_sales_date_sk#41)) AND isnotnull(c_first_shipto_date_sk#40)) AND isnotnull(c_current_cdemo_sk#37)) AND isnotnull(c_current_hdemo_sk#38)) AND isnotnull(c_current_addr_sk#39)) + +(43) CometBroadcastExchange +Input [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] + +(44) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35] +Right output [6]: [c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [ss_customer_sk#2], [c_customer_sk#36], Inner, BuildRight + +(45) CometProject +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_customer_sk#36, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] + +(46) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#42, d_year#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(47) CometFilter +Input [2]: [d_date_sk#42, d_year#43] +Condition : isnotnull(d_date_sk#42) + +(48) CometBroadcastExchange +Input [2]: [d_date_sk#42, d_year#43] +Arguments: [d_date_sk#42, d_year#43] + +(49) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41] +Right output [2]: [d_date_sk#42, d_year#43] +Arguments: [c_first_sales_date_sk#41], [d_date_sk#42], Inner, BuildRight + +(50) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, c_first_sales_date_sk#41, d_date_sk#42, d_year#43] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43] + +(51) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#44, d_year#45] + +(52) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43] +Right output [2]: [d_date_sk#44, d_year#45] +Arguments: [c_first_shipto_date_sk#40], [d_date_sk#44], Inner, BuildRight + +(53) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, c_first_shipto_date_sk#40, d_year#43, d_date_sk#44, d_year#45] +Arguments: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(54) CometScan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#46, cd_marital_status#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(55) CometFilter +Input [2]: [cd_demo_sk#46, cd_marital_status#47] +Condition : (isnotnull(cd_demo_sk#46) AND isnotnull(cd_marital_status#47)) + +(56) CometBroadcastExchange +Input [2]: [cd_demo_sk#46, cd_marital_status#47] +Arguments: [cd_demo_sk#46, cd_marital_status#47] + +(57) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [2]: [cd_demo_sk#46, cd_marital_status#47] +Arguments: [ss_cdemo_sk#3], [cd_demo_sk#46], Inner, BuildRight + +(58) CometProject +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_demo_sk#46, cd_marital_status#47] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47] + +(59) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#48, cd_marital_status#49] + +(60) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47] +Right output [2]: [cd_demo_sk#48, cd_marital_status#49] +Arguments: [c_current_cdemo_sk#37], [cd_demo_sk#48], Inner, NOT (cd_marital_status#47 = cd_marital_status#49), BuildRight + +(61) CometProject +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_cdemo_sk#37, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, cd_marital_status#47, cd_demo_sk#48, cd_marital_status#49] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(62) CometScan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(63) CometFilter +Input [1]: [p_promo_sk#50] +Condition : isnotnull(p_promo_sk#50) + +(64) CometBroadcastExchange +Input [1]: [p_promo_sk#50] +Arguments: [p_promo_sk#50] + +(65) CometBroadcastHashJoin +Left output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [1]: [p_promo_sk#50] +Arguments: [ss_promo_sk#7], [p_promo_sk#50], Inner, BuildRight + +(66) CometProject +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, p_promo_sk#50] +Arguments: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45], [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] + +(67) CometScan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(68) CometFilter +Input [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Condition : (isnotnull(hd_demo_sk#51) AND isnotnull(hd_income_band_sk#52)) + +(69) CometBroadcastExchange +Input [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [hd_demo_sk#51, hd_income_band_sk#52] + +(70) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45] +Right output [2]: [hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [ss_hdemo_sk#4], [hd_demo_sk#51], Inner, BuildRight + +(71) CometProject +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_demo_sk#51, hd_income_band_sk#52] +Arguments: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52], [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52] + +(72) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#53, hd_income_band_sk#54] + +(73) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52] +Right output [2]: [hd_demo_sk#53, hd_income_band_sk#54] +Arguments: [c_current_hdemo_sk#38], [hd_demo_sk#53], Inner, BuildRight + +(74) CometProject +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_hdemo_sk#38, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_demo_sk#53, hd_income_band_sk#54] +Arguments: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54], [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54] + +(75) CometScan parquet spark_catalog.default.customer_address +Output [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(76) CometFilter +Input [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Condition : isnotnull(ca_address_sk#55) + +(77) CometBroadcastExchange +Input [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] + +(78) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54] +Right output [5]: [ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ss_addr_sk#5], [ca_address_sk#55], Inner, BuildRight + +(79) CometProject +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_address_sk#55, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] + +(80) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(81) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59] +Right output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: [c_current_addr_sk#39], [ca_address_sk#60], Inner, BuildRight + +(82) CometProject +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, c_current_addr_sk#39, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(83) CometScan parquet spark_catalog.default.income_band +Output [1]: [ib_income_band_sk#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(84) CometFilter +Input [1]: [ib_income_band_sk#65] +Condition : isnotnull(ib_income_band_sk#65) + +(85) CometBroadcastExchange +Input [1]: [ib_income_band_sk#65] +Arguments: [ib_income_band_sk#65] + +(86) CometBroadcastHashJoin +Left output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [1]: [ib_income_band_sk#65] +Arguments: [hd_income_band_sk#52], [ib_income_band_sk#65], Inner, BuildRight + +(87) CometProject +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#52, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ib_income_band_sk#65] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(88) ReusedExchange [Reuses operator id: 85] +Output [1]: [ib_income_band_sk#66] + +(89) CometBroadcastHashJoin +Left output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [1]: [ib_income_band_sk#66] +Arguments: [hd_income_band_sk#54], [ib_income_band_sk#66], Inner, BuildRight + +(90) CometProject +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, hd_income_band_sk#54, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ib_income_band_sk#66] +Arguments: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64], [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(91) CometScan parquet spark_catalog.default.item +Output [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood ,floral ,indian ,medium ,purple ,spring ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(92) CometFilter +Input [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Condition : ((((((isnotnull(i_current_price#68) AND i_color#69 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#68 >= 64.00)) AND (i_current_price#68 <= 74.00)) AND (i_current_price#68 >= 65.00)) AND (i_current_price#68 <= 79.00)) AND isnotnull(i_item_sk#67)) + +(93) CometProject +Input [4]: [i_item_sk#67, i_current_price#68, i_color#69, i_product_name#70] +Arguments: [i_item_sk#67, i_product_name#70], [i_item_sk#67, i_product_name#70] + +(94) CometBroadcastExchange +Input [2]: [i_item_sk#67, i_product_name#70] +Arguments: [i_item_sk#67, i_product_name#70] + +(95) CometBroadcastHashJoin +Left output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Right output [2]: [i_item_sk#67, i_product_name#70] +Arguments: [ss_item_sk#1], [i_item_sk#67], Inner, BuildRight + +(96) CometProject +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, s_store_name#34, s_zip#35, d_year#43, d_year#45, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] +Arguments: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70], [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] + +(97) CometHashAggregate +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#32, d_year#43, d_year#45, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, i_item_sk#67, i_product_name#70] +Keys [15]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] + +(98) CometHashAggregate +Input [19]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45, count#71, sum#72, sum#73, sum#74] +Keys [15]: [i_product_name#70, i_item_sk#67, s_store_name#34, s_zip#35, ca_street_number#56, ca_street_name#57, ca_city#58, ca_zip#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, d_year#32, d_year#43, d_year#45] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] + +(99) CometExchange +Input [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Arguments: hashpartitioning(item_sk#76, store_name#77, store_zip#78, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(100) CometSort +Input [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Arguments: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91], [item_sk#76 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, store_zip#78 ASC NULLS FIRST] + +(101) ReusedExchange [Reuses operator id: 9] +Output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] + +(102) CometSort +Input [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102], [ss_item_sk#92 ASC NULLS FIRST] + +(103) ReusedExchange [Reuses operator id: 24] +Output [4]: [cs_item_sk#103, sum#104, sum#105, isEmpty#106] + +(104) CometHashAggregate +Input [4]: [cs_item_sk#103, sum#104, sum#105, isEmpty#106] +Keys [1]: [cs_item_sk#103] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#107)), sum(((cr_refunded_cash#108 + cr_reversed_charge#109) + cr_store_credit#110))] + +(105) CometFilter +Input [3]: [cs_item_sk#103, sale#29, refund#30] +Condition : ((isnotnull(sale#29) AND isnotnull(refund#30)) AND (cast(sale#29 as decimal(21,2)) > (2 * refund#30))) + +(106) CometProject +Input [3]: [cs_item_sk#103, sale#29, refund#30] +Arguments: [cs_item_sk#103], [cs_item_sk#103] + +(107) CometSort +Input [1]: [cs_item_sk#103] +Arguments: [cs_item_sk#103], [cs_item_sk#103 ASC NULLS FIRST] + +(108) CometSortMergeJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Right output [1]: [cs_item_sk#103] +Arguments: [ss_item_sk#92], [cs_item_sk#103], Inner + +(109) CometProject +Input [12]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102, cs_item_sk#103] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] + +(110) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#111, d_year#112] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(111) CometFilter +Input [2]: [d_date_sk#111, d_year#112] +Condition : ((isnotnull(d_year#112) AND (d_year#112 = 2000)) AND isnotnull(d_date_sk#111)) + +(112) CometBroadcastExchange +Input [2]: [d_date_sk#111, d_year#112] +Arguments: [d_date_sk#111, d_year#112] + +(113) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102] +Right output [2]: [d_date_sk#111, d_year#112] +Arguments: [ss_sold_date_sk#102], [d_date_sk#111], Inner, BuildRight + +(114) CometProject +Input [13]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, ss_sold_date_sk#102, d_date_sk#111, d_year#112] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112] + +(115) ReusedExchange [Reuses operator id: 38] +Output [3]: [s_store_sk#113, s_store_name#114, s_zip#115] + +(116) CometBroadcastHashJoin +Left output [11]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112] +Right output [3]: [s_store_sk#113, s_store_name#114, s_zip#115] +Arguments: [ss_store_sk#97], [s_store_sk#113], Inner, BuildRight + +(117) CometProject +Input [14]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_store_sk#97, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_sk#113, s_store_name#114, s_zip#115] +Arguments: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115], [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115] + +(118) ReusedExchange [Reuses operator id: 43] +Output [6]: [c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] + +(119) CometBroadcastHashJoin +Left output [12]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115] +Right output [6]: [c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Arguments: [ss_customer_sk#93], [c_customer_sk#116], Inner, BuildRight + +(120) CometProject +Input [18]: [ss_item_sk#92, ss_customer_sk#93, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_customer_sk#116, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] + +(121) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#122, d_year#123] + +(122) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121] +Right output [2]: [d_date_sk#122, d_year#123] +Arguments: [c_first_sales_date_sk#121], [d_date_sk#122], Inner, BuildRight + +(123) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, c_first_sales_date_sk#121, d_date_sk#122, d_year#123] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123] + +(124) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#124, d_year#125] + +(125) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123] +Right output [2]: [d_date_sk#124, d_year#125] +Arguments: [c_first_shipto_date_sk#120], [d_date_sk#124], Inner, BuildRight + +(126) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, c_first_shipto_date_sk#120, d_year#123, d_date_sk#124, d_year#125] +Arguments: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(127) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#126, cd_marital_status#127] + +(128) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [2]: [cd_demo_sk#126, cd_marital_status#127] +Arguments: [ss_cdemo_sk#94], [cd_demo_sk#126], Inner, BuildRight + +(129) CometProject +Input [18]: [ss_item_sk#92, ss_cdemo_sk#94, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_demo_sk#126, cd_marital_status#127] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127] + +(130) ReusedExchange [Reuses operator id: 56] +Output [2]: [cd_demo_sk#128, cd_marital_status#129] + +(131) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127] +Right output [2]: [cd_demo_sk#128, cd_marital_status#129] +Arguments: [c_current_cdemo_sk#117], [cd_demo_sk#128], Inner, NOT (cd_marital_status#127 = cd_marital_status#129), BuildRight + +(132) CometProject +Input [18]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_cdemo_sk#117, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, cd_marital_status#127, cd_demo_sk#128, cd_marital_status#129] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(133) ReusedExchange [Reuses operator id: 64] +Output [1]: [p_promo_sk#130] + +(134) CometBroadcastHashJoin +Left output [14]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [1]: [p_promo_sk#130] +Arguments: [ss_promo_sk#98], [p_promo_sk#130], Inner, BuildRight + +(135) CometProject +Input [15]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_promo_sk#98, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, p_promo_sk#130] +Arguments: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125], [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] + +(136) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#131, hd_income_band_sk#132] + +(137) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125] +Right output [2]: [hd_demo_sk#131, hd_income_band_sk#132] +Arguments: [ss_hdemo_sk#95], [hd_demo_sk#131], Inner, BuildRight + +(138) CometProject +Input [15]: [ss_item_sk#92, ss_hdemo_sk#95, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_demo_sk#131, hd_income_band_sk#132] +Arguments: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132], [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132] + +(139) ReusedExchange [Reuses operator id: 69] +Output [2]: [hd_demo_sk#133, hd_income_band_sk#134] + +(140) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132] +Right output [2]: [hd_demo_sk#133, hd_income_band_sk#134] +Arguments: [c_current_hdemo_sk#118], [hd_demo_sk#133], Inner, BuildRight + +(141) CometProject +Input [15]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_hdemo_sk#118, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_demo_sk#133, hd_income_band_sk#134] +Arguments: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134], [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134] + +(142) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] + +(143) CometBroadcastHashJoin +Left output [13]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134] +Right output [5]: [ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Arguments: [ss_addr_sk#96], [ca_address_sk#135], Inner, BuildRight + +(144) CometProject +Input [18]: [ss_item_sk#92, ss_addr_sk#96, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_address_sk#135, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] + +(145) ReusedExchange [Reuses operator id: 77] +Output [5]: [ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(146) CometBroadcastHashJoin +Left output [16]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139] +Right output [5]: [ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Arguments: [c_current_addr_sk#119], [ca_address_sk#140], Inner, BuildRight + +(147) CometProject +Input [21]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, c_current_addr_sk#119, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_address_sk#140, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(148) ReusedExchange [Reuses operator id: 85] +Output [1]: [ib_income_band_sk#145] + +(149) CometBroadcastHashJoin +Left output [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [1]: [ib_income_band_sk#145] +Arguments: [hd_income_band_sk#132], [ib_income_band_sk#145], Inner, BuildRight + +(150) CometProject +Input [20]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#132, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, ib_income_band_sk#145] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(151) ReusedExchange [Reuses operator id: 85] +Output [1]: [ib_income_band_sk#146] + +(152) CometBroadcastHashJoin +Left output [18]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [1]: [ib_income_band_sk#146] +Arguments: [hd_income_band_sk#134], [ib_income_band_sk#146], Inner, BuildRight + +(153) CometProject +Input [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, hd_income_band_sk#134, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, ib_income_band_sk#146] +Arguments: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144], [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] + +(154) ReusedExchange [Reuses operator id: 94] +Output [2]: [i_item_sk#147, i_product_name#148] + +(155) CometBroadcastHashJoin +Left output [17]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144] +Right output [2]: [i_item_sk#147, i_product_name#148] +Arguments: [ss_item_sk#92], [i_item_sk#147], Inner, BuildRight + +(156) CometProject +Input [19]: [ss_item_sk#92, ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, s_store_name#114, s_zip#115, d_year#123, d_year#125, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] +Arguments: [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148], [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] + +(157) CometHashAggregate +Input [18]: [ss_wholesale_cost#99, ss_list_price#100, ss_coupon_amt#101, d_year#112, d_year#123, d_year#125, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, i_item_sk#147, i_product_name#148] +Keys [15]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#99)), partial_sum(UnscaledValue(ss_list_price#100)), partial_sum(UnscaledValue(ss_coupon_amt#101))] + +(158) CometHashAggregate +Input [19]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125, count#71, sum#149, sum#150, sum#151] +Keys [15]: [i_product_name#148, i_item_sk#147, s_store_name#114, s_zip#115, ca_street_number#136, ca_street_name#137, ca_city#138, ca_zip#139, ca_street_number#141, ca_street_name#142, ca_city#143, ca_zip#144, d_year#112, d_year#123, d_year#125] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#99)), sum(UnscaledValue(ss_list_price#100)), sum(UnscaledValue(ss_coupon_amt#101))] + +(159) CometExchange +Input [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: hashpartitioning(item_sk#152, store_name#153, store_zip#154, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(160) CometSort +Input [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159], [item_sk#152 ASC NULLS FIRST, store_name#153 ASC NULLS FIRST, store_zip#154 ASC NULLS FIRST] + +(161) CometSortMergeJoin +Left output [17]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91] +Right output [8]: [item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [item_sk#76, store_name#77, store_zip#78], [item_sk#152, store_name#153, store_zip#154], Inner, (cnt#156 <= cnt#88) + +(162) CometProject +Input [25]: [product_name#75, item_sk#76, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, item_sk#152, store_name#153, store_zip#154, syear#155, cnt#156, s1#157, s2#158, s3#159] +Arguments: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156], [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] + +(163) CometColumnarExchange +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] +Arguments: rangepartitioning(product_name#75 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, cnt#156 ASC NULLS FIRST, s1#89 ASC NULLS FIRST, s1#157 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=7] + +(164) CometSort +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] +Arguments: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156], [product_name#75 ASC NULLS FIRST, store_name#77 ASC NULLS FIRST, cnt#156 ASC NULLS FIRST, s1#89 ASC NULLS FIRST, s1#157 ASC NULLS FIRST] + +(165) ColumnarToRow [codegen id : 1] +Input [21]: [product_name#75, store_name#77, store_zip#78, b_street_number#79, b_streen_name#80, b_city#81, b_zip#82, c_street_number#83, c_street_name#84, c_city#85, c_zip#86, syear#87, cnt#88, s1#89, s2#90, s3#91, s1#157, s2#158, s3#159, syear#155, cnt#156] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..0bf9320776 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q64.native_iceberg_compat/simplified.txt @@ -0,0 +1,167 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + CometColumnarExchange [product_name,store_name,cnt,s1,s1] #1 + CometProject [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + CometSortMergeJoin [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,item_sk,store_name,store_zip,syear,cnt,s1,s2,s3] + CometSort [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3] + CometExchange [item_sk,store_name,store_zip] #2 + CometHashAggregate [product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt))] + CometHashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + CometProject [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,p_promo_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_sk,s_store_name,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,cs_item_sk] + CometSort [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometExchange [ss_item_sk] #3 + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometBroadcastExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #4 + CometFilter [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometSort [cs_item_sk] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,sale,refund] + CometHashAggregate [cs_item_sk,sale,refund,sum,sum,isEmpty,sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))] + CometExchange [cs_item_sk] #5 + CometHashAggregate [cs_item_sk,sum,sum,isEmpty,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometProject [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_ext_list_price,cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometSort [cs_item_sk,cs_order_number,cs_ext_list_price] + CometExchange [cs_item_sk,cs_order_number] #6 + CometProject [cs_item_sk,cs_order_number,cs_ext_list_price] + CometFilter [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometExchange [cr_item_sk,cr_order_number] #7 + CometProject [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + CometFilter [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #8 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [s_store_sk,s_store_name,s_zip] #9 + CometFilter [s_store_sk,s_store_name,s_zip] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip] + CometBroadcastExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + CometFilter [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #11 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + ReusedExchange [d_date_sk,d_year] #11 + CometBroadcastExchange [cd_demo_sk,cd_marital_status] #12 + CometFilter [cd_demo_sk,cd_marital_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + CometBroadcastExchange [p_promo_sk] #13 + CometFilter [p_promo_sk] + CometScan parquet spark_catalog.default.promotion [p_promo_sk] + CometBroadcastExchange [hd_demo_sk,hd_income_band_sk] #14 + CometFilter [hd_demo_sk,hd_income_band_sk] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk] + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + CometBroadcastExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + CometFilter [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometScan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + CometBroadcastExchange [ib_income_band_sk] #16 + CometFilter [ib_income_band_sk] + CometScan parquet spark_catalog.default.income_band [ib_income_band_sk] + ReusedExchange [ib_income_band_sk] #16 + CometBroadcastExchange [i_item_sk,i_product_name] #17 + CometProject [i_item_sk,i_product_name] + CometFilter [i_item_sk,i_current_price,i_color,i_product_name] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color,i_product_name] + CometSort [item_sk,store_name,store_zip,syear,cnt,s1,s2,s3] + CometExchange [item_sk,store_name,store_zip] #18 + CometHashAggregate [item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt))] + CometHashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + CometProject [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ib_income_band_sk] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_demo_sk,hd_income_band_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,p_promo_sk] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_demo_sk,cd_marital_status] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_sk,s_store_name,s_zip] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk,cs_item_sk] + CometSort [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + ReusedExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #3 + CometSort [cs_item_sk] + CometProject [cs_item_sk] + CometFilter [cs_item_sk,sale,refund] + CometHashAggregate [cs_item_sk,sale,refund,sum,sum,isEmpty,sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))] + ReusedExchange [cs_item_sk,sum,sum,isEmpty] #5 + CometBroadcastExchange [d_date_sk,d_year] #19 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + ReusedExchange [s_store_sk,s_store_name,s_zip] #9 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + ReusedExchange [d_date_sk,d_year] #11 + ReusedExchange [d_date_sk,d_year] #11 + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + ReusedExchange [p_promo_sk] #13 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + ReusedExchange [ib_income_band_sk] #16 + ReusedExchange [ib_income_band_sk] #16 + ReusedExchange [i_item_sk,i_product_name] #17 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_datafusion/explain.txt new file mode 100644 index 0000000000..bd7991a8ef --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_datafusion/explain.txt @@ -0,0 +1,385 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Filter (70) + +- Window (69) + +- WindowGroupLimit (68) + +- * Sort (67) + +- Exchange (66) + +- WindowGroupLimit (65) + +- * ColumnarToRow (64) + +- CometSort (63) + +- CometUnion (62) + :- CometHashAggregate (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : +- CometBroadcastExchange (16) + : +- CometFilter (15) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (14) + :- CometHashAggregate (26) + : +- CometExchange (25) + : +- CometHashAggregate (24) + : +- CometHashAggregate (23) + : +- ReusedExchange (22) + :- CometHashAggregate (31) + : +- CometExchange (30) + : +- CometHashAggregate (29) + : +- CometHashAggregate (28) + : +- ReusedExchange (27) + :- CometHashAggregate (36) + : +- CometExchange (35) + : +- CometHashAggregate (34) + : +- CometHashAggregate (33) + : +- ReusedExchange (32) + :- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometHashAggregate (38) + : +- ReusedExchange (37) + :- CometHashAggregate (46) + : +- CometExchange (45) + : +- CometHashAggregate (44) + : +- CometHashAggregate (43) + : +- ReusedExchange (42) + :- CometHashAggregate (51) + : +- CometExchange (50) + : +- CometHashAggregate (49) + : +- CometHashAggregate (48) + : +- ReusedExchange (47) + :- CometHashAggregate (56) + : +- CometExchange (55) + : +- CometHashAggregate (54) + : +- CometHashAggregate (53) + : +- ReusedExchange (52) + +- CometHashAggregate (61) + +- CometExchange (60) + +- CometHashAggregate (59) + +- CometHashAggregate (58) + +- ReusedExchange (57) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(4) CometFilter +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1212)) AND (d_month_seq#7 <= 1223)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10], [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(6) CometBroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Right output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10], [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#11, s_store_id#12] +Arguments: [s_store_sk#11, s_store_id#12] + +(10) CometFilter +Input [2]: [s_store_sk#11, s_store_id#12] +Condition : isnotnull(s_store_sk#11) + +(11) CometBroadcastExchange +Input [2]: [s_store_sk#11, s_store_id#12] +Arguments: [s_store_sk#11, s_store_id#12] + +(12) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] +Right output [2]: [s_store_sk#11, s_store_id#12] +Arguments: [ss_store_sk#2], [s_store_sk#11], Inner, BuildRight + +(13) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] +Arguments: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12], [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] + +(14) CometNativeScan: `spark_catalog`.`default`.`item` +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(15) CometFilter +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) + +(16) CometBroadcastExchange +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(17) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] +Right output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [ss_item_sk#1], [i_item_sk#13], Inner, BuildRight + +(18) CometProject +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17], [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(19) CometHashAggregate +Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] + +(20) CometExchange +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#18, isEmpty#19] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#18, isEmpty#19] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] + +(22) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sum#28, isEmpty#29] + +(23) CometHashAggregate +Input [10]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sum#28, isEmpty#29] +Keys [8]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27] +Functions [1]: [sum(coalesce((ss_sales_price#30 * cast(ss_quantity#31 as decimal(10,0))), 0.00))] + +(24) CometHashAggregate +Input [8]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, sumsales#32] +Keys [7]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26] +Functions [1]: [partial_sum(sumsales#32)] + +(25) CometExchange +Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, sum#33, isEmpty#34] +Arguments: hashpartitioning(i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(26) CometHashAggregate +Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, sum#33, isEmpty#34] +Keys [7]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26] +Functions [1]: [sum(sumsales#32)] + +(27) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, d_moy#41, s_store_id#42, sum#43, isEmpty#44] + +(28) CometHashAggregate +Input [10]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, d_moy#41, s_store_id#42, sum#43, isEmpty#44] +Keys [8]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, d_moy#41, s_store_id#42] +Functions [1]: [sum(coalesce((ss_sales_price#45 * cast(ss_quantity#46 as decimal(10,0))), 0.00))] + +(29) CometHashAggregate +Input [7]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, sumsales#47] +Keys [6]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40] +Functions [1]: [partial_sum(sumsales#47)] + +(30) CometExchange +Input [8]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, sum#48, isEmpty#49] +Arguments: hashpartitioning(i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(31) CometHashAggregate +Input [8]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, sum#48, isEmpty#49] +Keys [6]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40] +Functions [1]: [sum(sumsales#47)] + +(32) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, d_qoy#55, d_moy#56, s_store_id#57, sum#58, isEmpty#59] + +(33) CometHashAggregate +Input [10]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, d_qoy#55, d_moy#56, s_store_id#57, sum#58, isEmpty#59] +Keys [8]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, d_qoy#55, d_moy#56, s_store_id#57] +Functions [1]: [sum(coalesce((ss_sales_price#60 * cast(ss_quantity#61 as decimal(10,0))), 0.00))] + +(34) CometHashAggregate +Input [6]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, sumsales#62] +Keys [5]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54] +Functions [1]: [partial_sum(sumsales#62)] + +(35) CometExchange +Input [7]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, sum#63, isEmpty#64] +Arguments: hashpartitioning(i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(36) CometHashAggregate +Input [7]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, sum#63, isEmpty#64] +Keys [5]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54] +Functions [1]: [sum(sumsales#62)] + +(37) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, d_year#69, d_qoy#70, d_moy#71, s_store_id#72, sum#73, isEmpty#74] + +(38) CometHashAggregate +Input [10]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, d_year#69, d_qoy#70, d_moy#71, s_store_id#72, sum#73, isEmpty#74] +Keys [8]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, d_year#69, d_qoy#70, d_moy#71, s_store_id#72] +Functions [1]: [sum(coalesce((ss_sales_price#75 * cast(ss_quantity#76 as decimal(10,0))), 0.00))] + +(39) CometHashAggregate +Input [5]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, sumsales#77] +Keys [4]: [i_category#65, i_class#66, i_brand#67, i_product_name#68] +Functions [1]: [partial_sum(sumsales#77)] + +(40) CometExchange +Input [6]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, sum#78, isEmpty#79] +Arguments: hashpartitioning(i_category#65, i_class#66, i_brand#67, i_product_name#68, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(41) CometHashAggregate +Input [6]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, sum#78, isEmpty#79] +Keys [4]: [i_category#65, i_class#66, i_brand#67, i_product_name#68] +Functions [1]: [sum(sumsales#77)] + +(42) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#80, i_class#81, i_brand#82, i_product_name#83, d_year#84, d_qoy#85, d_moy#86, s_store_id#87, sum#88, isEmpty#89] + +(43) CometHashAggregate +Input [10]: [i_category#80, i_class#81, i_brand#82, i_product_name#83, d_year#84, d_qoy#85, d_moy#86, s_store_id#87, sum#88, isEmpty#89] +Keys [8]: [i_category#80, i_class#81, i_brand#82, i_product_name#83, d_year#84, d_qoy#85, d_moy#86, s_store_id#87] +Functions [1]: [sum(coalesce((ss_sales_price#90 * cast(ss_quantity#91 as decimal(10,0))), 0.00))] + +(44) CometHashAggregate +Input [4]: [i_category#80, i_class#81, i_brand#82, sumsales#92] +Keys [3]: [i_category#80, i_class#81, i_brand#82] +Functions [1]: [partial_sum(sumsales#92)] + +(45) CometExchange +Input [5]: [i_category#80, i_class#81, i_brand#82, sum#93, isEmpty#94] +Arguments: hashpartitioning(i_category#80, i_class#81, i_brand#82, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(46) CometHashAggregate +Input [5]: [i_category#80, i_class#81, i_brand#82, sum#93, isEmpty#94] +Keys [3]: [i_category#80, i_class#81, i_brand#82] +Functions [1]: [sum(sumsales#92)] + +(47) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#95, i_class#96, i_brand#97, i_product_name#98, d_year#99, d_qoy#100, d_moy#101, s_store_id#102, sum#103, isEmpty#104] + +(48) CometHashAggregate +Input [10]: [i_category#95, i_class#96, i_brand#97, i_product_name#98, d_year#99, d_qoy#100, d_moy#101, s_store_id#102, sum#103, isEmpty#104] +Keys [8]: [i_category#95, i_class#96, i_brand#97, i_product_name#98, d_year#99, d_qoy#100, d_moy#101, s_store_id#102] +Functions [1]: [sum(coalesce((ss_sales_price#105 * cast(ss_quantity#106 as decimal(10,0))), 0.00))] + +(49) CometHashAggregate +Input [3]: [i_category#95, i_class#96, sumsales#107] +Keys [2]: [i_category#95, i_class#96] +Functions [1]: [partial_sum(sumsales#107)] + +(50) CometExchange +Input [4]: [i_category#95, i_class#96, sum#108, isEmpty#109] +Arguments: hashpartitioning(i_category#95, i_class#96, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(51) CometHashAggregate +Input [4]: [i_category#95, i_class#96, sum#108, isEmpty#109] +Keys [2]: [i_category#95, i_class#96] +Functions [1]: [sum(sumsales#107)] + +(52) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#110, i_class#111, i_brand#112, i_product_name#113, d_year#114, d_qoy#115, d_moy#116, s_store_id#117, sum#118, isEmpty#119] + +(53) CometHashAggregate +Input [10]: [i_category#110, i_class#111, i_brand#112, i_product_name#113, d_year#114, d_qoy#115, d_moy#116, s_store_id#117, sum#118, isEmpty#119] +Keys [8]: [i_category#110, i_class#111, i_brand#112, i_product_name#113, d_year#114, d_qoy#115, d_moy#116, s_store_id#117] +Functions [1]: [sum(coalesce((ss_sales_price#120 * cast(ss_quantity#121 as decimal(10,0))), 0.00))] + +(54) CometHashAggregate +Input [2]: [i_category#110, sumsales#122] +Keys [1]: [i_category#110] +Functions [1]: [partial_sum(sumsales#122)] + +(55) CometExchange +Input [3]: [i_category#110, sum#123, isEmpty#124] +Arguments: hashpartitioning(i_category#110, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(56) CometHashAggregate +Input [3]: [i_category#110, sum#123, isEmpty#124] +Keys [1]: [i_category#110] +Functions [1]: [sum(sumsales#122)] + +(57) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#125, i_class#126, i_brand#127, i_product_name#128, d_year#129, d_qoy#130, d_moy#131, s_store_id#132, sum#133, isEmpty#134] + +(58) CometHashAggregate +Input [10]: [i_category#125, i_class#126, i_brand#127, i_product_name#128, d_year#129, d_qoy#130, d_moy#131, s_store_id#132, sum#133, isEmpty#134] +Keys [8]: [i_category#125, i_class#126, i_brand#127, i_product_name#128, d_year#129, d_qoy#130, d_moy#131, s_store_id#132] +Functions [1]: [sum(coalesce((ss_sales_price#135 * cast(ss_quantity#136 as decimal(10,0))), 0.00))] + +(59) CometHashAggregate +Input [1]: [sumsales#137] +Keys: [] +Functions [1]: [partial_sum(sumsales#137)] + +(60) CometExchange +Input [2]: [sum#138, isEmpty#139] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(61) CometHashAggregate +Input [2]: [sum#138, isEmpty#139] +Keys: [] +Functions [1]: [sum(sumsales#137)] + +(62) CometUnion +Child 0 Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Child 1 Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#141, sumsales#142] +Child 2 Input [9]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, d_moy#143, s_store_id#144, sumsales#145] +Child 3 Input [9]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, d_qoy#146, d_moy#147, s_store_id#148, sumsales#149] +Child 4 Input [9]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, d_year#150, d_qoy#151, d_moy#152, s_store_id#153, sumsales#154] +Child 5 Input [9]: [i_category#80, i_class#81, i_brand#82, i_product_name#155, d_year#156, d_qoy#157, d_moy#158, s_store_id#159, sumsales#160] +Child 6 Input [9]: [i_category#95, i_class#96, i_brand#161, i_product_name#162, d_year#163, d_qoy#164, d_moy#165, s_store_id#166, sumsales#167] +Child 7 Input [9]: [i_category#110, i_class#168, i_brand#169, i_product_name#170, d_year#171, d_qoy#172, d_moy#173, s_store_id#174, sumsales#175] +Child 8 Input [9]: [i_category#176, i_class#177, i_brand#178, i_product_name#179, d_year#180, d_qoy#181, d_moy#182, s_store_id#183, sumsales#184] + +(63) CometSort +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140], [i_category#16 ASC NULLS FIRST, sumsales#140 DESC NULLS LAST] + +(64) ColumnarToRow [codegen id : 1] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] + +(65) WindowGroupLimit +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: [i_category#16], [sumsales#140 DESC NULLS LAST], rank(sumsales#140), 100, Partial + +(66) Exchange +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(67) Sort [codegen id : 2] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: [i_category#16 ASC NULLS FIRST, sumsales#140 DESC NULLS LAST], false, 0 + +(68) WindowGroupLimit +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: [i_category#16], [sumsales#140 DESC NULLS LAST], rank(sumsales#140), 100, Final + +(69) Window +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: [rank(sumsales#140) windowspecdefinition(i_category#16, sumsales#140 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#185], [i_category#16], [sumsales#140 DESC NULLS LAST] + +(70) Filter [codegen id : 3] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140, rk#185] +Condition : (rk#185 <= 100) + +(71) TakeOrderedAndProject +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140, rk#185] +Arguments: 100, [i_category#16 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_product_name#17 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, d_moy#9 ASC NULLS FIRST, s_store_id#12 ASC NULLS FIRST, sumsales#140 ASC NULLS FIRST, rk#185 ASC NULLS FIRST], [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140, rk#185] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6bf8ad0746 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_datafusion/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] + WholeStageCodegen (3) + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WindowGroupLimit [i_category,sumsales] + WholeStageCodegen (2) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WindowGroupLimit [i_category,sumsales] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales] + CometUnion [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + CometExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,ss_sales_price,ss_quantity] + CometProject [ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_sk,s_store_id] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_moy,d_qoy] + CometFilter [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy,d_qoy] #3 + CometProject [d_date_sk,d_year,d_moy,d_qoy] + CometFilter [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + CometBroadcastExchange [s_store_sk,s_store_id] #4 + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #5 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy] #6 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sumsales,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy] #7 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sumsales,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class,i_brand,i_product_name,d_year] #8 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sumsales,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class,i_brand,i_product_name] #9 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,sumsales,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class,i_brand] #10 + CometHashAggregate [i_category,i_class,i_brand,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,i_brand,sumsales,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class] #11 + CometHashAggregate [i_category,i_class,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,sumsales,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category] #12 + CometHashAggregate [i_category,sum,isEmpty,sumsales] + CometHashAggregate [i_category,sumsales,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange #13 + CometHashAggregate [sum,isEmpty,sumsales] + CometHashAggregate [sumsales,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..f499e23c60 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_iceberg_compat/explain.txt @@ -0,0 +1,398 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Filter (70) + +- Window (69) + +- WindowGroupLimit (68) + +- * Sort (67) + +- Exchange (66) + +- WindowGroupLimit (65) + +- * ColumnarToRow (64) + +- CometSort (63) + +- CometUnion (62) + :- CometHashAggregate (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometProject (18) + : +- CometBroadcastHashJoin (17) + : :- CometProject (13) + : : +- CometBroadcastHashJoin (12) + : : :- CometProject (8) + : : : +- CometBroadcastHashJoin (7) + : : : :- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometBroadcastExchange (6) + : : : +- CometProject (5) + : : : +- CometFilter (4) + : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : +- CometBroadcastExchange (11) + : : +- CometFilter (10) + : : +- CometScan parquet spark_catalog.default.store (9) + : +- CometBroadcastExchange (16) + : +- CometFilter (15) + : +- CometScan parquet spark_catalog.default.item (14) + :- CometHashAggregate (26) + : +- CometExchange (25) + : +- CometHashAggregate (24) + : +- CometHashAggregate (23) + : +- ReusedExchange (22) + :- CometHashAggregate (31) + : +- CometExchange (30) + : +- CometHashAggregate (29) + : +- CometHashAggregate (28) + : +- ReusedExchange (27) + :- CometHashAggregate (36) + : +- CometExchange (35) + : +- CometHashAggregate (34) + : +- CometHashAggregate (33) + : +- ReusedExchange (32) + :- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometHashAggregate (38) + : +- ReusedExchange (37) + :- CometHashAggregate (46) + : +- CometExchange (45) + : +- CometHashAggregate (44) + : +- CometHashAggregate (43) + : +- ReusedExchange (42) + :- CometHashAggregate (51) + : +- CometExchange (50) + : +- CometHashAggregate (49) + : +- CometHashAggregate (48) + : +- ReusedExchange (47) + :- CometHashAggregate (56) + : +- CometExchange (55) + : +- CometHashAggregate (54) + : +- CometHashAggregate (53) + : +- ReusedExchange (52) + +- CometHashAggregate (61) + +- CometExchange (60) + +- CometHashAggregate (59) + +- CometHashAggregate (58) + +- ReusedExchange (57) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1212)) AND (d_month_seq#7 <= 1223)) AND isnotnull(d_date_sk#6)) + +(5) CometProject +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10], [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(6) CometBroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(7) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Right output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [ss_sold_date_sk#5], [d_date_sk#6], Inner, BuildRight + +(8) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10], [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] + +(9) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_store_id#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [s_store_sk#11, s_store_id#12] +Condition : isnotnull(s_store_sk#11) + +(11) CometBroadcastExchange +Input [2]: [s_store_sk#11, s_store_id#12] +Arguments: [s_store_sk#11, s_store_id#12] + +(12) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] +Right output [2]: [s_store_sk#11, s_store_id#12] +Arguments: [ss_store_sk#2], [s_store_sk#11], Inner, BuildRight + +(13) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] +Arguments: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12], [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] + +(14) CometScan parquet spark_catalog.default.item +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(15) CometFilter +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) + +(16) CometBroadcastExchange +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(17) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] +Right output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [ss_item_sk#1], [i_item_sk#13], Inner, BuildRight + +(18) CometProject +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17], [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(19) CometHashAggregate +Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] + +(20) CometExchange +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#18, isEmpty#19] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(21) CometHashAggregate +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#18, isEmpty#19] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] + +(22) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sum#28, isEmpty#29] + +(23) CometHashAggregate +Input [10]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sum#28, isEmpty#29] +Keys [8]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27] +Functions [1]: [sum(coalesce((ss_sales_price#30 * cast(ss_quantity#31 as decimal(10,0))), 0.00))] + +(24) CometHashAggregate +Input [8]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, sumsales#32] +Keys [7]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26] +Functions [1]: [partial_sum(sumsales#32)] + +(25) CometExchange +Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, sum#33, isEmpty#34] +Arguments: hashpartitioning(i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(26) CometHashAggregate +Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, sum#33, isEmpty#34] +Keys [7]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26] +Functions [1]: [sum(sumsales#32)] + +(27) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, d_moy#41, s_store_id#42, sum#43, isEmpty#44] + +(28) CometHashAggregate +Input [10]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, d_moy#41, s_store_id#42, sum#43, isEmpty#44] +Keys [8]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, d_moy#41, s_store_id#42] +Functions [1]: [sum(coalesce((ss_sales_price#45 * cast(ss_quantity#46 as decimal(10,0))), 0.00))] + +(29) CometHashAggregate +Input [7]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, sumsales#47] +Keys [6]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40] +Functions [1]: [partial_sum(sumsales#47)] + +(30) CometExchange +Input [8]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, sum#48, isEmpty#49] +Arguments: hashpartitioning(i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(31) CometHashAggregate +Input [8]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, sum#48, isEmpty#49] +Keys [6]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40] +Functions [1]: [sum(sumsales#47)] + +(32) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, d_qoy#55, d_moy#56, s_store_id#57, sum#58, isEmpty#59] + +(33) CometHashAggregate +Input [10]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, d_qoy#55, d_moy#56, s_store_id#57, sum#58, isEmpty#59] +Keys [8]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, d_qoy#55, d_moy#56, s_store_id#57] +Functions [1]: [sum(coalesce((ss_sales_price#60 * cast(ss_quantity#61 as decimal(10,0))), 0.00))] + +(34) CometHashAggregate +Input [6]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, sumsales#62] +Keys [5]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54] +Functions [1]: [partial_sum(sumsales#62)] + +(35) CometExchange +Input [7]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, sum#63, isEmpty#64] +Arguments: hashpartitioning(i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(36) CometHashAggregate +Input [7]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, sum#63, isEmpty#64] +Keys [5]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54] +Functions [1]: [sum(sumsales#62)] + +(37) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, d_year#69, d_qoy#70, d_moy#71, s_store_id#72, sum#73, isEmpty#74] + +(38) CometHashAggregate +Input [10]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, d_year#69, d_qoy#70, d_moy#71, s_store_id#72, sum#73, isEmpty#74] +Keys [8]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, d_year#69, d_qoy#70, d_moy#71, s_store_id#72] +Functions [1]: [sum(coalesce((ss_sales_price#75 * cast(ss_quantity#76 as decimal(10,0))), 0.00))] + +(39) CometHashAggregate +Input [5]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, sumsales#77] +Keys [4]: [i_category#65, i_class#66, i_brand#67, i_product_name#68] +Functions [1]: [partial_sum(sumsales#77)] + +(40) CometExchange +Input [6]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, sum#78, isEmpty#79] +Arguments: hashpartitioning(i_category#65, i_class#66, i_brand#67, i_product_name#68, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(41) CometHashAggregate +Input [6]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, sum#78, isEmpty#79] +Keys [4]: [i_category#65, i_class#66, i_brand#67, i_product_name#68] +Functions [1]: [sum(sumsales#77)] + +(42) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#80, i_class#81, i_brand#82, i_product_name#83, d_year#84, d_qoy#85, d_moy#86, s_store_id#87, sum#88, isEmpty#89] + +(43) CometHashAggregate +Input [10]: [i_category#80, i_class#81, i_brand#82, i_product_name#83, d_year#84, d_qoy#85, d_moy#86, s_store_id#87, sum#88, isEmpty#89] +Keys [8]: [i_category#80, i_class#81, i_brand#82, i_product_name#83, d_year#84, d_qoy#85, d_moy#86, s_store_id#87] +Functions [1]: [sum(coalesce((ss_sales_price#90 * cast(ss_quantity#91 as decimal(10,0))), 0.00))] + +(44) CometHashAggregate +Input [4]: [i_category#80, i_class#81, i_brand#82, sumsales#92] +Keys [3]: [i_category#80, i_class#81, i_brand#82] +Functions [1]: [partial_sum(sumsales#92)] + +(45) CometExchange +Input [5]: [i_category#80, i_class#81, i_brand#82, sum#93, isEmpty#94] +Arguments: hashpartitioning(i_category#80, i_class#81, i_brand#82, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(46) CometHashAggregate +Input [5]: [i_category#80, i_class#81, i_brand#82, sum#93, isEmpty#94] +Keys [3]: [i_category#80, i_class#81, i_brand#82] +Functions [1]: [sum(sumsales#92)] + +(47) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#95, i_class#96, i_brand#97, i_product_name#98, d_year#99, d_qoy#100, d_moy#101, s_store_id#102, sum#103, isEmpty#104] + +(48) CometHashAggregate +Input [10]: [i_category#95, i_class#96, i_brand#97, i_product_name#98, d_year#99, d_qoy#100, d_moy#101, s_store_id#102, sum#103, isEmpty#104] +Keys [8]: [i_category#95, i_class#96, i_brand#97, i_product_name#98, d_year#99, d_qoy#100, d_moy#101, s_store_id#102] +Functions [1]: [sum(coalesce((ss_sales_price#105 * cast(ss_quantity#106 as decimal(10,0))), 0.00))] + +(49) CometHashAggregate +Input [3]: [i_category#95, i_class#96, sumsales#107] +Keys [2]: [i_category#95, i_class#96] +Functions [1]: [partial_sum(sumsales#107)] + +(50) CometExchange +Input [4]: [i_category#95, i_class#96, sum#108, isEmpty#109] +Arguments: hashpartitioning(i_category#95, i_class#96, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(51) CometHashAggregate +Input [4]: [i_category#95, i_class#96, sum#108, isEmpty#109] +Keys [2]: [i_category#95, i_class#96] +Functions [1]: [sum(sumsales#107)] + +(52) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#110, i_class#111, i_brand#112, i_product_name#113, d_year#114, d_qoy#115, d_moy#116, s_store_id#117, sum#118, isEmpty#119] + +(53) CometHashAggregate +Input [10]: [i_category#110, i_class#111, i_brand#112, i_product_name#113, d_year#114, d_qoy#115, d_moy#116, s_store_id#117, sum#118, isEmpty#119] +Keys [8]: [i_category#110, i_class#111, i_brand#112, i_product_name#113, d_year#114, d_qoy#115, d_moy#116, s_store_id#117] +Functions [1]: [sum(coalesce((ss_sales_price#120 * cast(ss_quantity#121 as decimal(10,0))), 0.00))] + +(54) CometHashAggregate +Input [2]: [i_category#110, sumsales#122] +Keys [1]: [i_category#110] +Functions [1]: [partial_sum(sumsales#122)] + +(55) CometExchange +Input [3]: [i_category#110, sum#123, isEmpty#124] +Arguments: hashpartitioning(i_category#110, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(56) CometHashAggregate +Input [3]: [i_category#110, sum#123, isEmpty#124] +Keys [1]: [i_category#110] +Functions [1]: [sum(sumsales#122)] + +(57) ReusedExchange [Reuses operator id: 20] +Output [10]: [i_category#125, i_class#126, i_brand#127, i_product_name#128, d_year#129, d_qoy#130, d_moy#131, s_store_id#132, sum#133, isEmpty#134] + +(58) CometHashAggregate +Input [10]: [i_category#125, i_class#126, i_brand#127, i_product_name#128, d_year#129, d_qoy#130, d_moy#131, s_store_id#132, sum#133, isEmpty#134] +Keys [8]: [i_category#125, i_class#126, i_brand#127, i_product_name#128, d_year#129, d_qoy#130, d_moy#131, s_store_id#132] +Functions [1]: [sum(coalesce((ss_sales_price#135 * cast(ss_quantity#136 as decimal(10,0))), 0.00))] + +(59) CometHashAggregate +Input [1]: [sumsales#137] +Keys: [] +Functions [1]: [partial_sum(sumsales#137)] + +(60) CometExchange +Input [2]: [sum#138, isEmpty#139] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(61) CometHashAggregate +Input [2]: [sum#138, isEmpty#139] +Keys: [] +Functions [1]: [sum(sumsales#137)] + +(62) CometUnion +Child 0 Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Child 1 Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#141, sumsales#142] +Child 2 Input [9]: [i_category#35, i_class#36, i_brand#37, i_product_name#38, d_year#39, d_qoy#40, d_moy#143, s_store_id#144, sumsales#145] +Child 3 Input [9]: [i_category#50, i_class#51, i_brand#52, i_product_name#53, d_year#54, d_qoy#146, d_moy#147, s_store_id#148, sumsales#149] +Child 4 Input [9]: [i_category#65, i_class#66, i_brand#67, i_product_name#68, d_year#150, d_qoy#151, d_moy#152, s_store_id#153, sumsales#154] +Child 5 Input [9]: [i_category#80, i_class#81, i_brand#82, i_product_name#155, d_year#156, d_qoy#157, d_moy#158, s_store_id#159, sumsales#160] +Child 6 Input [9]: [i_category#95, i_class#96, i_brand#161, i_product_name#162, d_year#163, d_qoy#164, d_moy#165, s_store_id#166, sumsales#167] +Child 7 Input [9]: [i_category#110, i_class#168, i_brand#169, i_product_name#170, d_year#171, d_qoy#172, d_moy#173, s_store_id#174, sumsales#175] +Child 8 Input [9]: [i_category#176, i_class#177, i_brand#178, i_product_name#179, d_year#180, d_qoy#181, d_moy#182, s_store_id#183, sumsales#184] + +(63) CometSort +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140], [i_category#16 ASC NULLS FIRST, sumsales#140 DESC NULLS LAST] + +(64) ColumnarToRow [codegen id : 1] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] + +(65) WindowGroupLimit +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: [i_category#16], [sumsales#140 DESC NULLS LAST], rank(sumsales#140), 100, Partial + +(66) Exchange +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(67) Sort [codegen id : 2] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: [i_category#16 ASC NULLS FIRST, sumsales#140 DESC NULLS LAST], false, 0 + +(68) WindowGroupLimit +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: [i_category#16], [sumsales#140 DESC NULLS LAST], rank(sumsales#140), 100, Final + +(69) Window +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140] +Arguments: [rank(sumsales#140) windowspecdefinition(i_category#16, sumsales#140 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#185], [i_category#16], [sumsales#140 DESC NULLS LAST] + +(70) Filter [codegen id : 3] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140, rk#185] +Condition : (rk#185 <= 100) + +(71) TakeOrderedAndProject +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140, rk#185] +Arguments: 100, [i_category#16 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_product_name#17 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, d_moy#9 ASC NULLS FIRST, s_store_id#12 ASC NULLS FIRST, sumsales#140 ASC NULLS FIRST, rk#185 ASC NULLS FIRST], [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#140, rk#185] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..aea039f186 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q67a.native_iceberg_compat/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] + WholeStageCodegen (3) + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WindowGroupLimit [i_category,sumsales] + WholeStageCodegen (2) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WindowGroupLimit [i_category,sumsales] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales] + CometUnion [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + CometExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,ss_sales_price,ss_quantity] + CometProject [ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id,i_brand,i_class,i_category,i_product_name] + CometBroadcastHashJoin [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id,i_item_sk,i_brand,i_class,i_category,i_product_name] + CometProject [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_sk,s_store_id] + CometProject [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year,d_moy,d_qoy] + CometFilter [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year,d_moy,d_qoy] #3 + CometProject [d_date_sk,d_year,d_moy,d_qoy] + CometFilter [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + CometBroadcastExchange [s_store_sk,s_store_id] #4 + CometFilter [s_store_sk,s_store_id] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id] + CometBroadcastExchange [i_item_sk,i_brand,i_class,i_category,i_product_name] #5 + CometFilter [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy] #6 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sumsales,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy] #7 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sumsales,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class,i_brand,i_product_name,d_year] #8 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sumsales,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class,i_brand,i_product_name] #9 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,i_brand,i_product_name,sumsales,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class,i_brand] #10 + CometHashAggregate [i_category,i_class,i_brand,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,i_brand,sumsales,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category,i_class] #11 + CometHashAggregate [i_category,i_class,sum,isEmpty,sumsales] + CometHashAggregate [i_category,i_class,sumsales,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange [i_category] #12 + CometHashAggregate [i_category,sum,isEmpty,sumsales] + CometHashAggregate [i_category,sumsales,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + CometHashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty,sum(sumsales)] + CometExchange #13 + CometHashAggregate [sum,isEmpty,sumsales] + CometHashAggregate [sumsales,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty,sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00))] + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_datafusion/explain.txt new file mode 100644 index 0000000000..c29a4118cd --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_datafusion/explain.txt @@ -0,0 +1,324 @@ +== Physical Plan == +TakeOrderedAndProject (58) ++- * Project (57) + +- Window (56) + +- * Sort (55) + +- Exchange (54) + +- * HashAggregate (53) + +- Exchange (52) + +- * HashAggregate (51) + +- Union (50) + :- * HashAggregate (39) + : +- Exchange (38) + : +- * HashAggregate (37) + : +- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * ColumnarToRow (9) + : : +- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- BroadcastExchange (34) + : +- * BroadcastHashJoin LeftSemi BuildRight (33) + : :- * ColumnarToRow (12) + : : +- CometFilter (11) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (10) + : +- BroadcastExchange (32) + : +- * Project (31) + : +- * Filter (30) + : +- Window (29) + : +- WindowGroupLimit (28) + : +- * ColumnarToRow (27) + : +- CometSort (26) + : +- CometHashAggregate (25) + : +- CometExchange (24) + : +- CometHashAggregate (23) + : +- CometProject (22) + : +- CometBroadcastHashJoin (21) + : :- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometFilter (14) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (13) + : : +- CometBroadcastExchange (17) + : : +- CometFilter (16) + : : +- CometNativeScan: `spark_catalog`.`default`.`store` (15) + : +- ReusedExchange (20) + :- * HashAggregate (44) + : +- Exchange (43) + : +- * HashAggregate (42) + : +- * HashAggregate (41) + : +- ReusedExchange (40) + +- * HashAggregate (49) + +- Exchange (48) + +- * HashAggregate (47) + +- * HashAggregate (46) + +- ReusedExchange (45) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Arguments: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4, d_month_seq#5] + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_store_sk#1, ss_net_profit#2], [ss_store_sk#1, ss_net_profit#2] + +(9) ColumnarToRow [codegen id : 4] +Input [2]: [ss_store_sk#1, ss_net_profit#2] + +(10) CometNativeScan: `spark_catalog`.`default`.`store` +Output [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: [s_store_sk#6, s_county#7, s_state#8] + +(11) CometFilter +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Condition : isnotnull(s_store_sk#6) + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] + +(13) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Arguments: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] + +(14) CometFilter +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_store_sk#9) + +(15) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#12, s_state#13] +Arguments: [s_store_sk#12, s_state#13] + +(16) CometFilter +Input [2]: [s_store_sk#12, s_state#13] +Condition : isnotnull(s_store_sk#12) + +(17) CometBroadcastExchange +Input [2]: [s_store_sk#12, s_state#13] +Arguments: [s_store_sk#12, s_state#13] + +(18) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Right output [2]: [s_store_sk#12, s_state#13] +Arguments: [ss_store_sk#9], [s_store_sk#12], Inner, BuildRight + +(19) CometProject +Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] +Arguments: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13], [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] + +(20) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] +Right output [1]: [d_date_sk#14] +Arguments: [ss_sold_date_sk#11], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] +Arguments: [ss_net_profit#10, s_state#13], [ss_net_profit#10, s_state#13] + +(23) CometHashAggregate +Input [2]: [ss_net_profit#10, s_state#13] +Keys [1]: [s_state#13] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] + +(24) CometExchange +Input [2]: [s_state#13, sum#15] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(25) CometHashAggregate +Input [2]: [s_state#13, sum#15] +Keys [1]: [s_state#13] +Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] + +(26) CometSort +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [s_state#13, _w0#16, s_state#13], [s_state#13 ASC NULLS FIRST, _w0#16 DESC NULLS LAST] + +(27) ColumnarToRow [codegen id : 1] +Input [3]: [s_state#13, _w0#16, s_state#13] + +(28) WindowGroupLimit +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [s_state#13], [_w0#16 DESC NULLS LAST], rank(_w0#16), 5, Final + +(29) Window +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [rank(_w0#16) windowspecdefinition(s_state#13, _w0#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#17], [s_state#13], [_w0#16 DESC NULLS LAST] + +(30) Filter [codegen id : 2] +Input [4]: [s_state#13, _w0#16, s_state#13, ranking#17] +Condition : (ranking#17 <= 5) + +(31) Project [codegen id : 2] +Output [1]: [s_state#13] +Input [4]: [s_state#13, _w0#16, s_state#13, ranking#17] + +(32) BroadcastExchange +Input [1]: [s_state#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] + +(33) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [s_state#8] +Right keys [1]: [s_state#13] +Join type: LeftSemi +Join condition: None + +(34) BroadcastExchange +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(35) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#6] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 4] +Output [3]: [ss_net_profit#2, s_county#7, s_state#8] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] + +(37) HashAggregate [codegen id : 4] +Input [3]: [ss_net_profit#2, s_county#7, s_state#8] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [s_state#8, s_county#7, sum#19] + +(38) Exchange +Input [3]: [s_state#8, s_county#7, sum#19] +Arguments: hashpartitioning(s_state#8, s_county#7, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(39) HashAggregate [codegen id : 5] +Input [3]: [s_state#8, s_county#7, sum#19] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#20] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#20,17,2) as decimal(27,2)) AS total_sum#21, s_state#8, s_county#7, 0 AS g_state#22, 0 AS g_county#23, 0 AS lochierarchy#24] + +(40) ReusedExchange [Reuses operator id: 38] +Output [3]: [s_state#25, s_county#26, sum#27] + +(41) HashAggregate [codegen id : 10] +Input [3]: [s_state#25, s_county#26, sum#27] +Keys [2]: [s_state#25, s_county#26] +Functions [1]: [sum(UnscaledValue(ss_net_profit#28))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#28))#20] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#28))#20,17,2) AS total_sum#29, s_state#25] + +(42) HashAggregate [codegen id : 10] +Input [2]: [total_sum#29, s_state#25] +Keys [1]: [s_state#25] +Functions [1]: [partial_sum(total_sum#29)] +Aggregate Attributes [2]: [sum#30, isEmpty#31] +Results [3]: [s_state#25, sum#32, isEmpty#33] + +(43) Exchange +Input [3]: [s_state#25, sum#32, isEmpty#33] +Arguments: hashpartitioning(s_state#25, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(44) HashAggregate [codegen id : 11] +Input [3]: [s_state#25, sum#32, isEmpty#33] +Keys [1]: [s_state#25] +Functions [1]: [sum(total_sum#29)] +Aggregate Attributes [1]: [sum(total_sum#29)#34] +Results [6]: [sum(total_sum#29)#34 AS total_sum#35, s_state#25, null AS s_county#36, 0 AS g_state#37, 1 AS g_county#38, 1 AS lochierarchy#39] + +(45) ReusedExchange [Reuses operator id: 38] +Output [3]: [s_state#40, s_county#41, sum#42] + +(46) HashAggregate [codegen id : 16] +Input [3]: [s_state#40, s_county#41, sum#42] +Keys [2]: [s_state#40, s_county#41] +Functions [1]: [sum(UnscaledValue(ss_net_profit#43))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#43))#20] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#43))#20,17,2) AS total_sum#44] + +(47) HashAggregate [codegen id : 16] +Input [1]: [total_sum#44] +Keys: [] +Functions [1]: [partial_sum(total_sum#44)] +Aggregate Attributes [2]: [sum#45, isEmpty#46] +Results [2]: [sum#47, isEmpty#48] + +(48) Exchange +Input [2]: [sum#47, isEmpty#48] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(49) HashAggregate [codegen id : 17] +Input [2]: [sum#47, isEmpty#48] +Keys: [] +Functions [1]: [sum(total_sum#44)] +Aggregate Attributes [1]: [sum(total_sum#44)#49] +Results [6]: [sum(total_sum#44)#49 AS total_sum#50, null AS s_state#51, null AS s_county#52, 1 AS g_state#53, 1 AS g_county#54, 2 AS lochierarchy#55] + +(50) Union + +(51) HashAggregate [codegen id : 18] +Input [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] +Keys [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] + +(52) Exchange +Input [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] +Arguments: hashpartitioning(total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(53) HashAggregate [codegen id : 19] +Input [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] +Keys [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] +Functions: [] +Aggregate Attributes: [] +Results [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, CASE WHEN (g_county#23 = 0) THEN s_state#8 END AS _w0#56] + +(54) Exchange +Input [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, _w0#56] +Arguments: hashpartitioning(lochierarchy#24, _w0#56, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(55) Sort [codegen id : 20] +Input [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, _w0#56] +Arguments: [lochierarchy#24 ASC NULLS FIRST, _w0#56 ASC NULLS FIRST, total_sum#21 DESC NULLS LAST], false, 0 + +(56) Window +Input [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, _w0#56] +Arguments: [rank(total_sum#21) windowspecdefinition(lochierarchy#24, _w0#56, total_sum#21 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#57], [lochierarchy#24, _w0#56], [total_sum#21 DESC NULLS LAST] + +(57) Project [codegen id : 21] +Output [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, rank_within_parent#57] +Input [6]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, _w0#56, rank_within_parent#57] + +(58) TakeOrderedAndProject +Input [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, rank_within_parent#57] +Arguments: 100, [lochierarchy#24 DESC NULLS LAST, CASE WHEN (lochierarchy#24 = 0) THEN s_state#8 END ASC NULLS FIRST, rank_within_parent#57 ASC NULLS FIRST], [total_sum#21, s_state#8, s_county#7, lochierarchy#24, rank_within_parent#57] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..03fbe0bb70 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_datafusion/simplified.txt @@ -0,0 +1,86 @@ +TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] + WholeStageCodegen (21) + Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] + InputAdapter + Window [total_sum,lochierarchy,_w0] + WholeStageCodegen (20) + Sort [lochierarchy,_w0,total_sum] + InputAdapter + Exchange [lochierarchy,_w0] #1 + WholeStageCodegen (19) + HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] [_w0] + InputAdapter + Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #2 + WholeStageCodegen (18) + HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] + InputAdapter + Union + WholeStageCodegen (5) + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum] + InputAdapter + Exchange [s_state,s_county] #3 + WholeStageCodegen (4) + HashAggregate [s_state,s_county,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_county,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + ColumnarToRow + InputAdapter + CometProject [ss_store_sk,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + BroadcastHashJoin [s_state,s_state] + ColumnarToRow + InputAdapter + CometFilter [s_store_sk,s_county,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w0,s_state] + WindowGroupLimit [s_state,_w0] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [s_state,_w0] + CometHashAggregate [s_state,_w0,sum,sum(UnscaledValue(ss_net_profit))] + CometExchange [s_state] #7 + CometHashAggregate [s_state,sum,ss_net_profit] + CometProject [ss_net_profit,s_state] + CometBroadcastHashJoin [ss_net_profit,ss_sold_date_sk,s_state,d_date_sk] + CometProject [ss_net_profit,ss_sold_date_sk,s_state] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,s_store_sk,s_state] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk,s_state] #8 + CometFilter [s_store_sk,s_state] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_state] + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (11) + HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty] + InputAdapter + Exchange [s_state] #9 + WholeStageCodegen (10) + HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] + InputAdapter + ReusedExchange [s_state,s_county,sum] #3 + WholeStageCodegen (17) + HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,s_state,s_county,g_state,g_county,lochierarchy,sum,isEmpty] + InputAdapter + Exchange #10 + WholeStageCodegen (16) + HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] + InputAdapter + ReusedExchange [s_state,s_county,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..3abc1daa05 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_iceberg_compat/explain.txt @@ -0,0 +1,341 @@ +== Physical Plan == +TakeOrderedAndProject (58) ++- * Project (57) + +- Window (56) + +- * Sort (55) + +- Exchange (54) + +- * HashAggregate (53) + +- Exchange (52) + +- * HashAggregate (51) + +- Union (50) + :- * HashAggregate (39) + : +- Exchange (38) + : +- * HashAggregate (37) + : +- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * ColumnarToRow (9) + : : +- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.date_dim (3) + : +- BroadcastExchange (34) + : +- * BroadcastHashJoin LeftSemi BuildRight (33) + : :- * ColumnarToRow (12) + : : +- CometFilter (11) + : : +- CometScan parquet spark_catalog.default.store (10) + : +- BroadcastExchange (32) + : +- * Project (31) + : +- * Filter (30) + : +- Window (29) + : +- WindowGroupLimit (28) + : +- * ColumnarToRow (27) + : +- CometSort (26) + : +- CometHashAggregate (25) + : +- CometExchange (24) + : +- CometHashAggregate (23) + : +- CometProject (22) + : +- CometBroadcastHashJoin (21) + : :- CometProject (19) + : : +- CometBroadcastHashJoin (18) + : : :- CometFilter (14) + : : : +- CometScan parquet spark_catalog.default.store_sales (13) + : : +- CometBroadcastExchange (17) + : : +- CometFilter (16) + : : +- CometScan parquet spark_catalog.default.store (15) + : +- ReusedExchange (20) + :- * HashAggregate (44) + : +- Exchange (43) + : +- * HashAggregate (42) + : +- * HashAggregate (41) + : +- ReusedExchange (40) + +- * HashAggregate (49) + +- Exchange (48) + +- * HashAggregate (47) + +- * HashAggregate (46) + +- ReusedExchange (45) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ss_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] +Arguments: [ss_store_sk#1, ss_net_profit#2], [ss_store_sk#1, ss_net_profit#2] + +(9) ColumnarToRow [codegen id : 4] +Input [2]: [ss_store_sk#1, ss_net_profit#2] + +(10) CometScan parquet spark_catalog.default.store +Output [3]: [s_store_sk#6, s_county#7, s_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(11) CometFilter +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Condition : isnotnull(s_store_sk#6) + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] + +(13) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#11)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(14) CometFilter +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_store_sk#9) + +(15) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#12, s_state#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(16) CometFilter +Input [2]: [s_store_sk#12, s_state#13] +Condition : isnotnull(s_store_sk#12) + +(17) CometBroadcastExchange +Input [2]: [s_store_sk#12, s_state#13] +Arguments: [s_store_sk#12, s_state#13] + +(18) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Right output [2]: [s_store_sk#12, s_state#13] +Arguments: [ss_store_sk#9], [s_store_sk#12], Inner, BuildRight + +(19) CometProject +Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] +Arguments: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13], [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] + +(20) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#14] + +(21) CometBroadcastHashJoin +Left output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] +Right output [1]: [d_date_sk#14] +Arguments: [ss_sold_date_sk#11], [d_date_sk#14], Inner, BuildRight + +(22) CometProject +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] +Arguments: [ss_net_profit#10, s_state#13], [ss_net_profit#10, s_state#13] + +(23) CometHashAggregate +Input [2]: [ss_net_profit#10, s_state#13] +Keys [1]: [s_state#13] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] + +(24) CometExchange +Input [2]: [s_state#13, sum#15] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(25) CometHashAggregate +Input [2]: [s_state#13, sum#15] +Keys [1]: [s_state#13] +Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] + +(26) CometSort +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [s_state#13, _w0#16, s_state#13], [s_state#13 ASC NULLS FIRST, _w0#16 DESC NULLS LAST] + +(27) ColumnarToRow [codegen id : 1] +Input [3]: [s_state#13, _w0#16, s_state#13] + +(28) WindowGroupLimit +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [s_state#13], [_w0#16 DESC NULLS LAST], rank(_w0#16), 5, Final + +(29) Window +Input [3]: [s_state#13, _w0#16, s_state#13] +Arguments: [rank(_w0#16) windowspecdefinition(s_state#13, _w0#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#17], [s_state#13], [_w0#16 DESC NULLS LAST] + +(30) Filter [codegen id : 2] +Input [4]: [s_state#13, _w0#16, s_state#13, ranking#17] +Condition : (ranking#17 <= 5) + +(31) Project [codegen id : 2] +Output [1]: [s_state#13] +Input [4]: [s_state#13, _w0#16, s_state#13, ranking#17] + +(32) BroadcastExchange +Input [1]: [s_state#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] + +(33) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [s_state#8] +Right keys [1]: [s_state#13] +Join type: LeftSemi +Join condition: None + +(34) BroadcastExchange +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(35) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#6] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 4] +Output [3]: [ss_net_profit#2, s_county#7, s_state#8] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] + +(37) HashAggregate [codegen id : 4] +Input [3]: [ss_net_profit#2, s_county#7, s_state#8] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [s_state#8, s_county#7, sum#19] + +(38) Exchange +Input [3]: [s_state#8, s_county#7, sum#19] +Arguments: hashpartitioning(s_state#8, s_county#7, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(39) HashAggregate [codegen id : 5] +Input [3]: [s_state#8, s_county#7, sum#19] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#20] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#20,17,2) as decimal(27,2)) AS total_sum#21, s_state#8, s_county#7, 0 AS g_state#22, 0 AS g_county#23, 0 AS lochierarchy#24] + +(40) ReusedExchange [Reuses operator id: 38] +Output [3]: [s_state#25, s_county#26, sum#27] + +(41) HashAggregate [codegen id : 10] +Input [3]: [s_state#25, s_county#26, sum#27] +Keys [2]: [s_state#25, s_county#26] +Functions [1]: [sum(UnscaledValue(ss_net_profit#28))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#28))#20] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#28))#20,17,2) AS total_sum#29, s_state#25] + +(42) HashAggregate [codegen id : 10] +Input [2]: [total_sum#29, s_state#25] +Keys [1]: [s_state#25] +Functions [1]: [partial_sum(total_sum#29)] +Aggregate Attributes [2]: [sum#30, isEmpty#31] +Results [3]: [s_state#25, sum#32, isEmpty#33] + +(43) Exchange +Input [3]: [s_state#25, sum#32, isEmpty#33] +Arguments: hashpartitioning(s_state#25, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(44) HashAggregate [codegen id : 11] +Input [3]: [s_state#25, sum#32, isEmpty#33] +Keys [1]: [s_state#25] +Functions [1]: [sum(total_sum#29)] +Aggregate Attributes [1]: [sum(total_sum#29)#34] +Results [6]: [sum(total_sum#29)#34 AS total_sum#35, s_state#25, null AS s_county#36, 0 AS g_state#37, 1 AS g_county#38, 1 AS lochierarchy#39] + +(45) ReusedExchange [Reuses operator id: 38] +Output [3]: [s_state#40, s_county#41, sum#42] + +(46) HashAggregate [codegen id : 16] +Input [3]: [s_state#40, s_county#41, sum#42] +Keys [2]: [s_state#40, s_county#41] +Functions [1]: [sum(UnscaledValue(ss_net_profit#43))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#43))#20] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#43))#20,17,2) AS total_sum#44] + +(47) HashAggregate [codegen id : 16] +Input [1]: [total_sum#44] +Keys: [] +Functions [1]: [partial_sum(total_sum#44)] +Aggregate Attributes [2]: [sum#45, isEmpty#46] +Results [2]: [sum#47, isEmpty#48] + +(48) Exchange +Input [2]: [sum#47, isEmpty#48] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(49) HashAggregate [codegen id : 17] +Input [2]: [sum#47, isEmpty#48] +Keys: [] +Functions [1]: [sum(total_sum#44)] +Aggregate Attributes [1]: [sum(total_sum#44)#49] +Results [6]: [sum(total_sum#44)#49 AS total_sum#50, null AS s_state#51, null AS s_county#52, 1 AS g_state#53, 1 AS g_county#54, 2 AS lochierarchy#55] + +(50) Union + +(51) HashAggregate [codegen id : 18] +Input [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] +Keys [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] + +(52) Exchange +Input [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] +Arguments: hashpartitioning(total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(53) HashAggregate [codegen id : 19] +Input [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] +Keys [6]: [total_sum#21, s_state#8, s_county#7, g_state#22, g_county#23, lochierarchy#24] +Functions: [] +Aggregate Attributes: [] +Results [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, CASE WHEN (g_county#23 = 0) THEN s_state#8 END AS _w0#56] + +(54) Exchange +Input [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, _w0#56] +Arguments: hashpartitioning(lochierarchy#24, _w0#56, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(55) Sort [codegen id : 20] +Input [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, _w0#56] +Arguments: [lochierarchy#24 ASC NULLS FIRST, _w0#56 ASC NULLS FIRST, total_sum#21 DESC NULLS LAST], false, 0 + +(56) Window +Input [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, _w0#56] +Arguments: [rank(total_sum#21) windowspecdefinition(lochierarchy#24, _w0#56, total_sum#21 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#57], [lochierarchy#24, _w0#56], [total_sum#21 DESC NULLS LAST] + +(57) Project [codegen id : 21] +Output [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, rank_within_parent#57] +Input [6]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, _w0#56, rank_within_parent#57] + +(58) TakeOrderedAndProject +Input [5]: [total_sum#21, s_state#8, s_county#7, lochierarchy#24, rank_within_parent#57] +Arguments: 100, [lochierarchy#24 DESC NULLS LAST, CASE WHEN (lochierarchy#24 = 0) THEN s_state#8 END ASC NULLS FIRST, rank_within_parent#57 ASC NULLS FIRST], [total_sum#21, s_state#8, s_county#7, lochierarchy#24, rank_within_parent#57] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a7f6a0cdf0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q70a.native_iceberg_compat/simplified.txt @@ -0,0 +1,86 @@ +TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] + WholeStageCodegen (21) + Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] + InputAdapter + Window [total_sum,lochierarchy,_w0] + WholeStageCodegen (20) + Sort [lochierarchy,_w0,total_sum] + InputAdapter + Exchange [lochierarchy,_w0] #1 + WholeStageCodegen (19) + HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] [_w0] + InputAdapter + Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #2 + WholeStageCodegen (18) + HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] + InputAdapter + Union + WholeStageCodegen (5) + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum] + InputAdapter + Exchange [s_state,s_county] #3 + WholeStageCodegen (4) + HashAggregate [s_state,s_county,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_county,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + ColumnarToRow + InputAdapter + CometProject [ss_store_sk,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + BroadcastHashJoin [s_state,s_state] + ColumnarToRow + InputAdapter + CometFilter [s_store_sk,s_county,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w0,s_state] + WindowGroupLimit [s_state,_w0] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [s_state,_w0] + CometHashAggregate [s_state,_w0,sum,sum(UnscaledValue(ss_net_profit))] + CometExchange [s_state] #7 + CometHashAggregate [s_state,sum,ss_net_profit] + CometProject [ss_net_profit,s_state] + CometBroadcastHashJoin [ss_net_profit,ss_sold_date_sk,s_state,d_date_sk] + CometProject [ss_net_profit,ss_sold_date_sk,s_state] + CometBroadcastHashJoin [ss_store_sk,ss_net_profit,ss_sold_date_sk,s_store_sk,s_state] + CometFilter [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [s_store_sk,s_state] #8 + CometFilter [s_store_sk,s_state] + CometScan parquet spark_catalog.default.store [s_store_sk,s_state] + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (11) + HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty] + InputAdapter + Exchange [s_state] #9 + WholeStageCodegen (10) + HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] + InputAdapter + ReusedExchange [s_state,s_county,sum] #3 + WholeStageCodegen (17) + HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,s_state,s_county,g_state,g_county,lochierarchy,sum,isEmpty] + InputAdapter + Exchange #10 + WholeStageCodegen (16) + HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] + InputAdapter + ReusedExchange [s_state,s_county,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_datafusion/explain.txt new file mode 100644 index 0000000000..bdc0aa2e1e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_datafusion/explain.txt @@ -0,0 +1,323 @@ +== Physical Plan == +* ColumnarToRow (62) ++- CometTakeOrderedAndProject (61) + +- CometHashAggregate (60) + +- CometExchange (59) + +- CometHashAggregate (58) + +- CometProject (57) + +- CometSortMergeJoin (56) + :- CometSort (50) + : +- CometExchange (49) + : +- CometProject (48) + : +- CometBroadcastHashJoin (47) + : :- CometProject (43) + : : +- CometBroadcastHashJoin (42) + : : :- CometProject (38) + : : : +- CometBroadcastHashJoin (37) + : : : :- CometProject (33) + : : : : +- CometBroadcastHashJoin (32) + : : : : :- CometProject (27) + : : : : : +- CometBroadcastHashJoin (26) + : : : : : :- CometProject (21) + : : : : : : +- CometBroadcastHashJoin (20) + : : : : : : :- CometProject (15) + : : : : : : : +- CometBroadcastHashJoin (14) + : : : : : : : :- CometProject (12) + : : : : : : : : +- CometBroadcastHashJoin (11) + : : : : : : : : :- CometProject (7) + : : : : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : : : : :- CometFilter (2) + : : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : : : : : : +- CometBroadcastExchange (5) + : : : : : : : : : +- CometFilter (4) + : : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`inventory` (3) + : : : : : : : : +- CometBroadcastExchange (10) + : : : : : : : : +- CometFilter (9) + : : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`warehouse` (8) + : : : : : : : +- ReusedExchange (13) + : : : : : : +- CometBroadcastExchange (19) + : : : : : : +- CometProject (18) + : : : : : : +- CometFilter (17) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer_demographics` (16) + : : : : : +- CometBroadcastExchange (25) + : : : : : +- CometProject (24) + : : : : : +- CometFilter (23) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`household_demographics` (22) + : : : : +- CometBroadcastExchange (31) + : : : : +- CometProject (30) + : : : : +- CometFilter (29) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (28) + : : : +- CometBroadcastExchange (36) + : : : +- CometFilter (35) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (34) + : : +- CometBroadcastExchange (41) + : : +- CometFilter (40) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (39) + : +- CometBroadcastExchange (46) + : +- CometFilter (45) + : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (44) + +- CometSort (55) + +- CometExchange (54) + +- CometProject (53) + +- CometFilter (52) + +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (51) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] + +(2) CometFilter +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1)) + +(3) CometNativeScan: `spark_catalog`.`default`.`inventory` +Output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(4) CometFilter +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Condition : ((isnotnull(inv_quantity_on_hand#11) AND isnotnull(inv_item_sk#9)) AND isnotnull(inv_warehouse_sk#10)) + +(5) CometBroadcastExchange +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(6) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Right output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [cs_item_sk#4], [inv_item_sk#9], Inner, (inv_quantity_on_hand#11 < cs_quantity#7), BuildRight + +(7) CometProject +Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] + +(8) CometNativeScan: `spark_catalog`.`default`.`warehouse` +Output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [w_warehouse_sk#13, w_warehouse_name#14] + +(9) CometFilter +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Condition : isnotnull(w_warehouse_sk#13) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [w_warehouse_sk#13, w_warehouse_name#14] + +(11) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] +Right output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [inv_warehouse_sk#10], [w_warehouse_sk#13], Inner, BuildRight + +(12) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12, w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] + +(13) ReusedExchange [Reuses operator id: 10] +Output [2]: [i_item_sk#15, i_item_desc#16] + +(14) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] +Right output [2]: [i_item_sk#15, i_item_desc#16] +Arguments: [cs_item_sk#4], [i_item_sk#15], Inner, BuildRight + +(15) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_sk#15, i_item_desc#16] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(16) CometNativeScan: `spark_catalog`.`default`.`customer_demographics` +Output [2]: [cd_demo_sk#17, cd_marital_status#18] +Arguments: [cd_demo_sk#17, cd_marital_status#18] + +(17) CometFilter +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Condition : ((isnotnull(cd_marital_status#18) AND (cd_marital_status#18 = M)) AND isnotnull(cd_demo_sk#17)) + +(18) CometProject +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Arguments: [cd_demo_sk#17], [cd_demo_sk#17] + +(19) CometBroadcastExchange +Input [1]: [cd_demo_sk#17] +Arguments: [cd_demo_sk#17] + +(20) CometBroadcastHashJoin +Left output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [1]: [cd_demo_sk#17] +Arguments: [cs_bill_cdemo_sk#2], [cd_demo_sk#17], Inner, BuildRight + +(21) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, cd_demo_sk#17] +Arguments: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(22) CometNativeScan: `spark_catalog`.`default`.`household_demographics` +Output [2]: [hd_demo_sk#19, hd_buy_potential#20] +Arguments: [hd_demo_sk#19, hd_buy_potential#20] + +(23) CometFilter +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Condition : ((isnotnull(hd_buy_potential#20) AND (hd_buy_potential#20 = 1001-5000 )) AND isnotnull(hd_demo_sk#19)) + +(24) CometProject +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Arguments: [hd_demo_sk#19], [hd_demo_sk#19] + +(25) CometBroadcastExchange +Input [1]: [hd_demo_sk#19] +Arguments: [hd_demo_sk#19] + +(26) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [1]: [hd_demo_sk#19] +Arguments: [cs_bill_hdemo_sk#3], [hd_demo_sk#19], Inner, BuildRight + +(27) CometProject +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, hd_demo_sk#19] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(28) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(29) CometFilter +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Condition : ((((isnotnull(d_year#24) AND (d_year#24 = 2001)) AND isnotnull(d_date_sk#21)) AND isnotnull(d_week_seq#23)) AND isnotnull(d_date#22)) + +(30) CometProject +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23], [d_date_sk#21, d_date#22, d_week_seq#23] + +(31) CometBroadcastExchange +Input [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23] + +(32) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [cs_sold_date_sk#8], [d_date_sk#21], Inner, BuildRight + +(33) CometProject +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] + +(34) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_date_sk#25, d_week_seq#26] + +(35) CometFilter +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) + +(36) CometBroadcastExchange +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_date_sk#25, d_week_seq#26] + +(37) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Right output [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_week_seq#23, inv_date_sk#12], [d_week_seq#26, d_date_sk#25], Inner, BuildRight + +(38) CometProject +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#25, d_week_seq#26] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] + +(39) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#27, d_date#28] +Arguments: [d_date_sk#27, d_date#28] + +(40) CometFilter +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) + +(41) CometBroadcastExchange +Input [2]: [d_date_sk#27, d_date#28] +Arguments: [d_date_sk#27, d_date#28] + +(42) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Right output [2]: [d_date_sk#27, d_date#28] +Arguments: [cs_ship_date_sk#1], [d_date_sk#27], Inner, (d_date#28 > date_add(d_date#22, 5)), BuildRight + +(43) CometProject +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#27, d_date#28] +Arguments: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(44) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [1]: [p_promo_sk#29] +Arguments: [p_promo_sk#29] + +(45) CometFilter +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) + +(46) CometBroadcastExchange +Input [1]: [p_promo_sk#29] +Arguments: [p_promo_sk#29] + +(47) CometBroadcastHashJoin +Left output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Right output [1]: [p_promo_sk#29] +Arguments: [cs_promo_sk#5], [p_promo_sk#29], LeftOuter, BuildRight + +(48) CometProject +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, p_promo_sk#29] +Arguments: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(49) CometExchange +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(50) CometSort +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST] + +(51) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Arguments: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(52) CometFilter +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) + +(53) CometProject +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Arguments: [cr_item_sk#30, cr_order_number#31], [cr_item_sk#30, cr_order_number#31] + +(54) CometExchange +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(55) CometSort +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30, cr_order_number#31], [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST] + +(56) CometSortMergeJoin +Left output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Right output [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cs_item_sk#4, cs_order_number#6], [cr_item_sk#30, cr_order_number#31], LeftOuter + +(57) CometProject +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, cr_item_sk#30, cr_order_number#31] +Arguments: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(58) CometHashAggregate +Input [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [partial_count(1)] + +(59) CometExchange +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#33] +Arguments: hashpartitioning(i_item_desc#16, w_warehouse_name#14, d_week_seq#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(60) CometHashAggregate +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#33] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [count(1)] + +(61) CometTakeOrderedAndProject +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[total_cnt#36 DESC NULLS LAST,i_item_desc#16 ASC NULLS FIRST,w_warehouse_name#14 ASC NULLS FIRST,d_week_seq#23 ASC NULLS FIRST], output=[i_item_desc#16,w_warehouse_name#14,d_week_seq#23,no_promo#34,promo#35,total_cnt#36]), [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36], 100, [total_cnt#36 DESC NULLS LAST, i_item_desc#16 ASC NULLS FIRST, w_warehouse_name#14 ASC NULLS FIRST, d_week_seq#23 ASC NULLS FIRST], [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] + +(62) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_datafusion/simplified.txt new file mode 100644 index 0000000000..e70defef63 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_datafusion/simplified.txt @@ -0,0 +1,64 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo,total_cnt] + CometHashAggregate [i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo,total_cnt,count,count(1)] + CometExchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + CometHashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] + CometProject [w_warehouse_name,i_item_desc,d_week_seq] + CometSortMergeJoin [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq,cr_item_sk,cr_order_number] + CometSort [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometExchange [cs_item_sk,cs_order_number] #2 + CometProject [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq,p_promo_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq,d_date_sk,d_date] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq,d_date_sk,d_week_seq] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,d_date_sk,d_date,d_week_seq] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,hd_demo_sk] + CometProject [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,cd_demo_sk] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_sk,i_item_desc] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk,w_warehouse_sk,w_warehouse_name] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometFilter [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] #3 + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometNativeScan: `spark_catalog`.`default`.`inventory` [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometNativeScan: `spark_catalog`.`default`.`warehouse` [w_warehouse_sk,w_warehouse_name] + ReusedExchange [i_item_sk,i_item_desc] #4 + CometBroadcastExchange [cd_demo_sk] #5 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_marital_status] + CometNativeScan: `spark_catalog`.`default`.`customer_demographics` [cd_demo_sk,cd_marital_status] + CometBroadcastExchange [hd_demo_sk] #6 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential] + CometNativeScan: `spark_catalog`.`default`.`household_demographics` [hd_demo_sk,hd_buy_potential] + CometBroadcastExchange [d_date_sk,d_date,d_week_seq] #7 + CometProject [d_date_sk,d_date,d_week_seq] + CometFilter [d_date_sk,d_date,d_week_seq,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date,d_week_seq,d_year] + CometBroadcastExchange [d_date_sk,d_week_seq] #8 + CometFilter [d_date_sk,d_week_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_week_seq] + CometBroadcastExchange [d_date_sk,d_date] #9 + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [p_promo_sk] #10 + CometFilter [p_promo_sk] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk] + CometSort [cr_item_sk,cr_order_number] + CometExchange [cr_item_sk,cr_order_number] #11 + CometProject [cr_item_sk,cr_order_number] + CometFilter [cr_item_sk,cr_order_number,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..11dbe2e941 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_iceberg_compat/explain.txt @@ -0,0 +1,369 @@ +== Physical Plan == +* ColumnarToRow (64) ++- CometTakeOrderedAndProject (63) + +- CometHashAggregate (62) + +- CometExchange (61) + +- CometHashAggregate (60) + +- CometProject (59) + +- CometSortMergeJoin (58) + :- CometSort (52) + : +- CometExchange (51) + : +- CometProject (50) + : +- CometBroadcastHashJoin (49) + : :- CometProject (45) + : : +- CometBroadcastHashJoin (44) + : : :- CometProject (40) + : : : +- CometBroadcastHashJoin (39) + : : : :- CometProject (35) + : : : : +- CometBroadcastHashJoin (34) + : : : : :- CometProject (29) + : : : : : +- CometBroadcastHashJoin (28) + : : : : : :- CometProject (23) + : : : : : : +- CometBroadcastHashJoin (22) + : : : : : : :- CometProject (17) + : : : : : : : +- CometBroadcastHashJoin (16) + : : : : : : : :- CometProject (12) + : : : : : : : : +- CometBroadcastHashJoin (11) + : : : : : : : : :- CometProject (7) + : : : : : : : : : +- CometBroadcastHashJoin (6) + : : : : : : : : : :- CometFilter (2) + : : : : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : : : : : : : +- CometBroadcastExchange (5) + : : : : : : : : : +- CometFilter (4) + : : : : : : : : : +- CometScan parquet spark_catalog.default.inventory (3) + : : : : : : : : +- CometBroadcastExchange (10) + : : : : : : : : +- CometFilter (9) + : : : : : : : : +- CometScan parquet spark_catalog.default.warehouse (8) + : : : : : : : +- CometBroadcastExchange (15) + : : : : : : : +- CometFilter (14) + : : : : : : : +- CometScan parquet spark_catalog.default.item (13) + : : : : : : +- CometBroadcastExchange (21) + : : : : : : +- CometProject (20) + : : : : : : +- CometFilter (19) + : : : : : : +- CometScan parquet spark_catalog.default.customer_demographics (18) + : : : : : +- CometBroadcastExchange (27) + : : : : : +- CometProject (26) + : : : : : +- CometFilter (25) + : : : : : +- CometScan parquet spark_catalog.default.household_demographics (24) + : : : : +- CometBroadcastExchange (33) + : : : : +- CometProject (32) + : : : : +- CometFilter (31) + : : : : +- CometScan parquet spark_catalog.default.date_dim (30) + : : : +- CometBroadcastExchange (38) + : : : +- CometFilter (37) + : : : +- CometScan parquet spark_catalog.default.date_dim (36) + : : +- CometBroadcastExchange (43) + : : +- CometFilter (42) + : : +- CometScan parquet spark_catalog.default.date_dim (41) + : +- CometBroadcastExchange (48) + : +- CometFilter (47) + : +- CometScan parquet spark_catalog.default.promotion (46) + +- CometSort (57) + +- CometExchange (56) + +- CometProject (55) + +- CometFilter (54) + +- CometScan parquet spark_catalog.default.catalog_returns (53) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) CometFilter +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1)) + +(3) CometScan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#12)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(4) CometFilter +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Condition : ((isnotnull(inv_quantity_on_hand#11) AND isnotnull(inv_item_sk#9)) AND isnotnull(inv_warehouse_sk#10)) + +(5) CometBroadcastExchange +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(6) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Right output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [cs_item_sk#4], [inv_item_sk#9], Inner, (inv_quantity_on_hand#11 < cs_quantity#7), BuildRight + +(7) CometProject +Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] + +(8) CometScan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Condition : isnotnull(w_warehouse_sk#13) + +(10) CometBroadcastExchange +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [w_warehouse_sk#13, w_warehouse_name#14] + +(11) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] +Right output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [inv_warehouse_sk#10], [w_warehouse_sk#13], Inner, BuildRight + +(12) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12, w_warehouse_sk#13, w_warehouse_name#14] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] + +(13) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_desc#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [i_item_sk#15, i_item_desc#16] +Condition : isnotnull(i_item_sk#15) + +(15) CometBroadcastExchange +Input [2]: [i_item_sk#15, i_item_desc#16] +Arguments: [i_item_sk#15, i_item_desc#16] + +(16) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] +Right output [2]: [i_item_sk#15, i_item_desc#16] +Arguments: [cs_item_sk#4], [i_item_sk#15], Inner, BuildRight + +(17) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_sk#15, i_item_desc#16] +Arguments: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(18) CometScan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#17, cd_marital_status#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,M), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(19) CometFilter +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Condition : ((isnotnull(cd_marital_status#18) AND (cd_marital_status#18 = M)) AND isnotnull(cd_demo_sk#17)) + +(20) CometProject +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Arguments: [cd_demo_sk#17], [cd_demo_sk#17] + +(21) CometBroadcastExchange +Input [1]: [cd_demo_sk#17] +Arguments: [cd_demo_sk#17] + +(22) CometBroadcastHashJoin +Left output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [1]: [cd_demo_sk#17] +Arguments: [cs_bill_cdemo_sk#2], [cd_demo_sk#17], Inner, BuildRight + +(23) CometProject +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, cd_demo_sk#17] +Arguments: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(24) CometScan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#19, hd_buy_potential#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,1001-5000 ), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(25) CometFilter +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Condition : ((isnotnull(hd_buy_potential#20) AND (hd_buy_potential#20 = 1001-5000 )) AND isnotnull(hd_demo_sk#19)) + +(26) CometProject +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Arguments: [hd_demo_sk#19], [hd_demo_sk#19] + +(27) CometBroadcastExchange +Input [1]: [hd_demo_sk#19] +Arguments: [hd_demo_sk#19] + +(28) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [1]: [hd_demo_sk#19] +Arguments: [cs_bill_hdemo_sk#3], [hd_demo_sk#19], Inner, BuildRight + +(29) CometProject +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, hd_demo_sk#19] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] + +(30) CometScan parquet spark_catalog.default.date_dim +Output [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(31) CometFilter +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Condition : ((((isnotnull(d_year#24) AND (d_year#24 = 2001)) AND isnotnull(d_date_sk#21)) AND isnotnull(d_week_seq#23)) AND isnotnull(d_date#22)) + +(32) CometProject +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23], [d_date_sk#21, d_date#22, d_week_seq#23] + +(33) CometBroadcastExchange +Input [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [d_date_sk#21, d_date#22, d_week_seq#23] + +(34) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Right output [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [cs_sold_date_sk#8], [d_date_sk#21], Inner, BuildRight + +(35) CometProject +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] + +(36) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#25, d_week_seq#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(37) CometFilter +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) + +(38) CometBroadcastExchange +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_date_sk#25, d_week_seq#26] + +(39) CometBroadcastHashJoin +Left output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Right output [2]: [d_date_sk#25, d_week_seq#26] +Arguments: [d_week_seq#23, inv_date_sk#12], [d_week_seq#26, d_date_sk#25], Inner, BuildRight + +(40) CometProject +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#25, d_week_seq#26] +Arguments: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23], [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] + +(41) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#27, d_date#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] +ReadSchema: struct + +(42) CometFilter +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) + +(43) CometBroadcastExchange +Input [2]: [d_date_sk#27, d_date#28] +Arguments: [d_date_sk#27, d_date#28] + +(44) CometBroadcastHashJoin +Left output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Right output [2]: [d_date_sk#27, d_date#28] +Arguments: [cs_ship_date_sk#1], [d_date_sk#27], Inner, (d_date#28 > date_add(d_date#22, 5)), BuildRight + +(45) CometProject +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#27, d_date#28] +Arguments: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(46) CometScan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(47) CometFilter +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) + +(48) CometBroadcastExchange +Input [1]: [p_promo_sk#29] +Arguments: [p_promo_sk#29] + +(49) CometBroadcastHashJoin +Left output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Right output [1]: [p_promo_sk#29] +Arguments: [cs_promo_sk#5], [p_promo_sk#29], LeftOuter, BuildRight + +(50) CometProject +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, p_promo_sk#29] +Arguments: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(51) CometExchange +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(52) CometSort +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST] + +(53) CometScan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(54) CometFilter +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) + +(55) CometProject +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Arguments: [cr_item_sk#30, cr_order_number#31], [cr_item_sk#30, cr_order_number#31] + +(56) CometExchange +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(57) CometSort +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30, cr_order_number#31], [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST] + +(58) CometSortMergeJoin +Left output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Right output [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cs_item_sk#4, cs_order_number#6], [cr_item_sk#30, cr_order_number#31], LeftOuter + +(59) CometProject +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, cr_item_sk#30, cr_order_number#31] +Arguments: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23], [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] + +(60) CometHashAggregate +Input [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [partial_count(1)] + +(61) CometExchange +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#33] +Arguments: hashpartitioning(i_item_desc#16, w_warehouse_name#14, d_week_seq#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(62) CometHashAggregate +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#33] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [count(1)] + +(63) CometTakeOrderedAndProject +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[total_cnt#36 DESC NULLS LAST,i_item_desc#16 ASC NULLS FIRST,w_warehouse_name#14 ASC NULLS FIRST,d_week_seq#23 ASC NULLS FIRST], output=[i_item_desc#16,w_warehouse_name#14,d_week_seq#23,no_promo#34,promo#35,total_cnt#36]), [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36], 100, [total_cnt#36 DESC NULLS LAST, i_item_desc#16 ASC NULLS FIRST, w_warehouse_name#14 ASC NULLS FIRST, d_week_seq#23 ASC NULLS FIRST], [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] + +(64) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#34, promo#35, total_cnt#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..59c8e37099 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q72.native_iceberg_compat/simplified.txt @@ -0,0 +1,66 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo,total_cnt] + CometHashAggregate [i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo,total_cnt,count,count(1)] + CometExchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + CometHashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] + CometProject [w_warehouse_name,i_item_desc,d_week_seq] + CometSortMergeJoin [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq,cr_item_sk,cr_order_number] + CometSort [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometExchange [cs_item_sk,cs_order_number] #2 + CometProject [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq,p_promo_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq,d_date_sk,d_date] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq,d_date_sk,d_week_seq] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] + CometBroadcastHashJoin [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,d_date_sk,d_date,d_week_seq] + CometProject [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,hd_demo_sk] + CometProject [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc,cd_demo_sk] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_sk,i_item_desc] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk,w_warehouse_sk,w_warehouse_name] + CometProject [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk] + CometBroadcastHashJoin [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometFilter [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + CometBroadcastExchange [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] #3 + CometFilter [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometScan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + CometBroadcastExchange [w_warehouse_sk,w_warehouse_name] #4 + CometFilter [w_warehouse_sk,w_warehouse_name] + CometScan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + CometBroadcastExchange [i_item_sk,i_item_desc] #5 + CometFilter [i_item_sk,i_item_desc] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + CometBroadcastExchange [cd_demo_sk] #6 + CometProject [cd_demo_sk] + CometFilter [cd_demo_sk,cd_marital_status] + CometScan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + CometBroadcastExchange [hd_demo_sk] #7 + CometProject [hd_demo_sk] + CometFilter [hd_demo_sk,hd_buy_potential] + CometScan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential] + CometBroadcastExchange [d_date_sk,d_date,d_week_seq] #8 + CometProject [d_date_sk,d_date,d_week_seq] + CometFilter [d_date_sk,d_date,d_week_seq,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + CometBroadcastExchange [d_date_sk,d_week_seq] #9 + CometFilter [d_date_sk,d_week_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + CometBroadcastExchange [d_date_sk,d_date] #10 + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [p_promo_sk] #11 + CometFilter [p_promo_sk] + CometScan parquet spark_catalog.default.promotion [p_promo_sk] + CometSort [cr_item_sk,cr_order_number] + CometExchange [cr_item_sk,cr_order_number] #12 + CometProject [cr_item_sk,cr_order_number] + CometFilter [cr_item_sk,cr_order_number,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_datafusion/explain.txt new file mode 100644 index 0000000000..2b7ef29c29 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_datafusion/explain.txt @@ -0,0 +1,232 @@ +== Physical Plan == +* ColumnarToRow (44) ++- CometTakeOrderedAndProject (43) + +- CometProject (42) + +- CometBroadcastHashJoin (41) + :- CometProject (37) + : +- CometBroadcastHashJoin (36) + : :- CometBroadcastHashJoin (31) + : : :- CometFilter (16) + : : : +- CometHashAggregate (15) + : : : +- CometExchange (14) + : : : +- CometHashAggregate (13) + : : : +- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + : : +- CometBroadcastExchange (30) + : : +- CometHashAggregate (29) + : : +- CometExchange (28) + : : +- CometHashAggregate (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometProject (21) + : : : +- CometBroadcastHashJoin (20) + : : : :- CometFilter (18) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`customer` (17) + : : : +- ReusedExchange (19) + : : +- CometBroadcastExchange (24) + : : +- CometFilter (23) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (22) + : +- CometBroadcastExchange (35) + : +- CometFilter (34) + : +- CometHashAggregate (33) + : +- ReusedExchange (32) + +- CometBroadcastExchange (40) + +- CometHashAggregate (39) + +- ReusedExchange (38) + + +(1) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Arguments: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(2) CometFilter +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(4) CometFilter +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_customer_sk#5) + +(5) CometBroadcastExchange +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Right output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [c_customer_sk#1], [ss_customer_sk#5], Inner, BuildRight + +(7) CometProject +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7], [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#8, d_year#9] +Arguments: [d_date_sk#8, d_year#9] + +(9) CometFilter +Input [2]: [d_date_sk#8, d_year#9] +Condition : (((isnotnull(d_year#9) AND (d_year#9 = 2001)) AND d_year#9 IN (2001,2002)) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_year#9] +Arguments: [d_date_sk#8, d_year#9] + +(11) CometBroadcastHashJoin +Left output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_year#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9], [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] + +(13) CometHashAggregate +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))] + +(14) CometExchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#10] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [sum(UnscaledValue(ss_net_paid#6))] + +(16) CometFilter +Input [2]: [customer_id#11, year_total#12] +Condition : (isnotnull(year_total#12) AND (year_total#12 > 0.00)) + +(17) CometNativeScan: `spark_catalog`.`default`.`customer` +Output [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Arguments: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] + +(18) CometFilter +Input [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Condition : (isnotnull(c_customer_sk#13) AND isnotnull(c_customer_id#14)) + +(19) ReusedExchange [Reuses operator id: 5] +Output [3]: [ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] + +(20) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Right output [3]: [ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] +Arguments: [c_customer_sk#13], [ss_customer_sk#17], Inner, BuildRight + +(21) CometProject +Input [7]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16, ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] +Arguments: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19], [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19] + +(22) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#20, d_year#21] +Arguments: [d_date_sk#20, d_year#21] + +(23) CometFilter +Input [2]: [d_date_sk#20, d_year#21] +Condition : (((isnotnull(d_year#21) AND (d_year#21 = 2002)) AND d_year#21 IN (2001,2002)) AND isnotnull(d_date_sk#20)) + +(24) CometBroadcastExchange +Input [2]: [d_date_sk#20, d_year#21] +Arguments: [d_date_sk#20, d_year#21] + +(25) CometBroadcastHashJoin +Left output [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19] +Right output [2]: [d_date_sk#20, d_year#21] +Arguments: [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + +(26) CometProject +Input [7]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19, d_date_sk#20, d_year#21] +Arguments: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21], [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21] + +(27) CometHashAggregate +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21] +Keys [4]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#18))] + +(28) CometExchange +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, sum#22] +Arguments: hashpartitioning(c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, sum#22] +Keys [4]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21] +Functions [1]: [sum(UnscaledValue(ss_net_paid#18))] + +(30) CometBroadcastExchange +Input [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] + +(31) CometBroadcastHashJoin +Left output [2]: [customer_id#11, year_total#12] +Right output [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: [customer_id#11], [customer_id#23], Inner, BuildRight + +(32) ReusedExchange [Reuses operator id: 14] +Output [5]: [c_customer_id#27, c_first_name#28, c_last_name#29, d_year#30, sum#31] + +(33) CometHashAggregate +Input [5]: [c_customer_id#27, c_first_name#28, c_last_name#29, d_year#30, sum#31] +Keys [4]: [c_customer_id#27, c_first_name#28, c_last_name#29, d_year#30] +Functions [1]: [sum(UnscaledValue(ws_net_paid#32))] + +(34) CometFilter +Input [2]: [customer_id#33, year_total#34] +Condition : (isnotnull(year_total#34) AND (year_total#34 > 0.00)) + +(35) CometBroadcastExchange +Input [2]: [customer_id#33, year_total#34] +Arguments: [customer_id#33, year_total#34] + +(36) CometBroadcastHashJoin +Left output [6]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Right output [2]: [customer_id#33, year_total#34] +Arguments: [customer_id#11], [customer_id#33], Inner, BuildRight + +(37) CometProject +Input [8]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, customer_id#33, year_total#34] +Arguments: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#34], [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#34] + +(38) ReusedExchange [Reuses operator id: 28] +Output [5]: [c_customer_id#35, c_first_name#36, c_last_name#37, d_year#38, sum#39] + +(39) CometHashAggregate +Input [5]: [c_customer_id#35, c_first_name#36, c_last_name#37, d_year#38, sum#39] +Keys [4]: [c_customer_id#35, c_first_name#36, c_last_name#37, d_year#38] +Functions [1]: [sum(UnscaledValue(ws_net_paid#40))] + +(40) CometBroadcastExchange +Input [2]: [customer_id#41, year_total#42] +Arguments: [customer_id#41, year_total#42] + +(41) CometBroadcastHashJoin +Left output [7]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#34] +Right output [2]: [customer_id#41, year_total#42] +Arguments: [customer_id#11], [customer_id#41], Inner, (CASE WHEN (year_total#34 > 0.00) THEN (year_total#42 / year_total#34) END > CASE WHEN (year_total#12 > 0.00) THEN (year_total#26 / year_total#12) END), BuildRight + +(42) CometProject +Input [9]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#34, customer_id#41, year_total#42] +Arguments: [customer_id#23, customer_first_name#24, customer_last_name#25], [customer_id#23, customer_first_name#24, customer_last_name#25] + +(43) CometTakeOrderedAndProject +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[customer_first_name#24 ASC NULLS FIRST,customer_id#23 ASC NULLS FIRST,customer_last_name#25 ASC NULLS FIRST], output=[customer_id#23,customer_first_name#24,customer_last_name#25]), [customer_id#23, customer_first_name#24, customer_last_name#25], 100, [customer_first_name#24 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST, customer_last_name#25 ASC NULLS FIRST], [customer_id#23, customer_first_name#24, customer_last_name#25] + +(44) ColumnarToRow [codegen id : 1] +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ea904874f9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_datafusion/simplified.txt @@ -0,0 +1,46 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] + CometProject [customer_id,customer_first_name,customer_last_name] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,customer_id,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total] + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ss_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometBroadcastExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [customer_id,customer_first_name,customer_last_name,year_total] #4 + CometHashAggregate [customer_id,customer_first_name,customer_last_name,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ss_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometNativeScan: `spark_catalog`.`default`.`customer` [c_customer_sk,c_customer_id,c_first_name,c_last_name] + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometBroadcastExchange [customer_id,year_total] #7 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] + ReusedExchange [c_customer_id,c_first_name,c_last_name,d_year,sum] #1 + CometBroadcastExchange [customer_id,year_total] #8 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] + ReusedExchange [c_customer_id,c_first_name,c_last_name,d_year,sum] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..b7b1658ff3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_iceberg_compat/explain.txt @@ -0,0 +1,363 @@ +== Physical Plan == +* ColumnarToRow (64) ++- CometTakeOrderedAndProject (63) + +- CometProject (62) + +- CometBroadcastHashJoin (61) + :- CometProject (48) + : +- CometBroadcastHashJoin (47) + : :- CometBroadcastHashJoin (31) + : : :- CometFilter (16) + : : : +- CometHashAggregate (15) + : : : +- CometExchange (14) + : : : +- CometHashAggregate (13) + : : : +- CometProject (12) + : : : +- CometBroadcastHashJoin (11) + : : : :- CometProject (7) + : : : : +- CometBroadcastHashJoin (6) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.customer (1) + : : : : +- CometBroadcastExchange (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.store_sales (3) + : : : +- CometBroadcastExchange (10) + : : : +- CometFilter (9) + : : : +- CometScan parquet spark_catalog.default.date_dim (8) + : : +- CometBroadcastExchange (30) + : : +- CometHashAggregate (29) + : : +- CometExchange (28) + : : +- CometHashAggregate (27) + : : +- CometProject (26) + : : +- CometBroadcastHashJoin (25) + : : :- CometProject (21) + : : : +- CometBroadcastHashJoin (20) + : : : :- CometFilter (18) + : : : : +- CometScan parquet spark_catalog.default.customer (17) + : : : +- ReusedExchange (19) + : : +- CometBroadcastExchange (24) + : : +- CometFilter (23) + : : +- CometScan parquet spark_catalog.default.date_dim (22) + : +- CometBroadcastExchange (46) + : +- CometFilter (45) + : +- CometHashAggregate (44) + : +- CometExchange (43) + : +- CometHashAggregate (42) + : +- CometProject (41) + : +- CometBroadcastHashJoin (40) + : :- CometProject (38) + : : +- CometBroadcastHashJoin (37) + : : :- CometFilter (33) + : : : +- CometScan parquet spark_catalog.default.customer (32) + : : +- CometBroadcastExchange (36) + : : +- CometFilter (35) + : : +- CometScan parquet spark_catalog.default.web_sales (34) + : +- ReusedExchange (39) + +- CometBroadcastExchange (60) + +- CometHashAggregate (59) + +- CometExchange (58) + +- CometHashAggregate (57) + +- CometProject (56) + +- CometBroadcastHashJoin (55) + :- CometProject (53) + : +- CometBroadcastHashJoin (52) + : :- CometFilter (50) + : : +- CometScan parquet spark_catalog.default.customer (49) + : +- ReusedExchange (51) + +- ReusedExchange (54) + + +(1) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(3) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(4) CometFilter +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_customer_sk#5) + +(5) CometBroadcastExchange +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(6) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Right output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [c_customer_sk#1], [ss_customer_sk#5], Inner, BuildRight + +(7) CometProject +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7], [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_year#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#8, d_year#9] +Condition : (((isnotnull(d_year#9) AND (d_year#9 = 2001)) AND d_year#9 IN (2001,2002)) AND isnotnull(d_date_sk#8)) + +(10) CometBroadcastExchange +Input [2]: [d_date_sk#8, d_year#9] +Arguments: [d_date_sk#8, d_year#9] + +(11) CometBroadcastHashJoin +Left output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#8, d_year#9] +Arguments: [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + +(12) CometProject +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9] +Arguments: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9], [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] + +(13) CometHashAggregate +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))] + +(14) CometExchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#10] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometHashAggregate +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [sum(UnscaledValue(ss_net_paid#6))] + +(16) CometFilter +Input [2]: [customer_id#11, year_total#12] +Condition : (isnotnull(year_total#12) AND (year_total#12 > 0.00)) + +(17) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(18) CometFilter +Input [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Condition : (isnotnull(c_customer_sk#13) AND isnotnull(c_customer_id#14)) + +(19) ReusedExchange [Reuses operator id: 5] +Output [3]: [ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] + +(20) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16] +Right output [3]: [ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] +Arguments: [c_customer_sk#13], [ss_customer_sk#17], Inner, BuildRight + +(21) CometProject +Input [7]: [c_customer_sk#13, c_customer_id#14, c_first_name#15, c_last_name#16, ss_customer_sk#17, ss_net_paid#18, ss_sold_date_sk#19] +Arguments: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19], [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19] + +(22) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#20, d_year#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) CometFilter +Input [2]: [d_date_sk#20, d_year#21] +Condition : (((isnotnull(d_year#21) AND (d_year#21 = 2002)) AND d_year#21 IN (2001,2002)) AND isnotnull(d_date_sk#20)) + +(24) CometBroadcastExchange +Input [2]: [d_date_sk#20, d_year#21] +Arguments: [d_date_sk#20, d_year#21] + +(25) CometBroadcastHashJoin +Left output [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19] +Right output [2]: [d_date_sk#20, d_year#21] +Arguments: [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + +(26) CometProject +Input [7]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, ss_sold_date_sk#19, d_date_sk#20, d_year#21] +Arguments: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21], [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21] + +(27) CometHashAggregate +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, ss_net_paid#18, d_year#21] +Keys [4]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#18))] + +(28) CometExchange +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, sum#22] +Arguments: hashpartitioning(c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [5]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21, sum#22] +Keys [4]: [c_customer_id#14, c_first_name#15, c_last_name#16, d_year#21] +Functions [1]: [sum(UnscaledValue(ss_net_paid#18))] + +(30) CometBroadcastExchange +Input [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] + +(31) CometBroadcastHashJoin +Left output [2]: [customer_id#11, year_total#12] +Right output [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: [customer_id#11], [customer_id#23], Inner, BuildRight + +(32) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(33) CometFilter +Input [4]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30] +Condition : (isnotnull(c_customer_sk#27) AND isnotnull(c_customer_id#28)) + +(34) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#33)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(35) CometFilter +Input [3]: [ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] +Condition : isnotnull(ws_bill_customer_sk#31) + +(36) CometBroadcastExchange +Input [3]: [ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] +Arguments: [ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] + +(37) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30] +Right output [3]: [ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] +Arguments: [c_customer_sk#27], [ws_bill_customer_sk#31], Inner, BuildRight + +(38) CometProject +Input [7]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30, ws_bill_customer_sk#31, ws_net_paid#32, ws_sold_date_sk#33] +Arguments: [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, ws_sold_date_sk#33], [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, ws_sold_date_sk#33] + +(39) ReusedExchange [Reuses operator id: 10] +Output [2]: [d_date_sk#34, d_year#35] + +(40) CometBroadcastHashJoin +Left output [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, ws_sold_date_sk#33] +Right output [2]: [d_date_sk#34, d_year#35] +Arguments: [ws_sold_date_sk#33], [d_date_sk#34], Inner, BuildRight + +(41) CometProject +Input [7]: [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, ws_sold_date_sk#33, d_date_sk#34, d_year#35] +Arguments: [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, d_year#35], [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, d_year#35] + +(42) CometHashAggregate +Input [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, ws_net_paid#32, d_year#35] +Keys [4]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#35] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#32))] + +(43) CometExchange +Input [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#35, sum#36] +Arguments: hashpartitioning(c_customer_id#28, c_first_name#29, c_last_name#30, d_year#35, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(44) CometHashAggregate +Input [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#35, sum#36] +Keys [4]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#35] +Functions [1]: [sum(UnscaledValue(ws_net_paid#32))] + +(45) CometFilter +Input [2]: [customer_id#37, year_total#38] +Condition : (isnotnull(year_total#38) AND (year_total#38 > 0.00)) + +(46) CometBroadcastExchange +Input [2]: [customer_id#37, year_total#38] +Arguments: [customer_id#37, year_total#38] + +(47) CometBroadcastHashJoin +Left output [6]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Right output [2]: [customer_id#37, year_total#38] +Arguments: [customer_id#11], [customer_id#37], Inner, BuildRight + +(48) CometProject +Input [8]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, customer_id#37, year_total#38] +Arguments: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#38], [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#38] + +(49) CometScan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(50) CometFilter +Input [4]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42] +Condition : (isnotnull(c_customer_sk#39) AND isnotnull(c_customer_id#40)) + +(51) ReusedExchange [Reuses operator id: 36] +Output [3]: [ws_bill_customer_sk#43, ws_net_paid#44, ws_sold_date_sk#45] + +(52) CometBroadcastHashJoin +Left output [4]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42] +Right output [3]: [ws_bill_customer_sk#43, ws_net_paid#44, ws_sold_date_sk#45] +Arguments: [c_customer_sk#39], [ws_bill_customer_sk#43], Inner, BuildRight + +(53) CometProject +Input [7]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, ws_bill_customer_sk#43, ws_net_paid#44, ws_sold_date_sk#45] +Arguments: [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, ws_sold_date_sk#45], [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, ws_sold_date_sk#45] + +(54) ReusedExchange [Reuses operator id: 24] +Output [2]: [d_date_sk#46, d_year#47] + +(55) CometBroadcastHashJoin +Left output [5]: [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, ws_sold_date_sk#45] +Right output [2]: [d_date_sk#46, d_year#47] +Arguments: [ws_sold_date_sk#45], [d_date_sk#46], Inner, BuildRight + +(56) CometProject +Input [7]: [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, ws_sold_date_sk#45, d_date_sk#46, d_year#47] +Arguments: [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, d_year#47], [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, d_year#47] + +(57) CometHashAggregate +Input [5]: [c_customer_id#40, c_first_name#41, c_last_name#42, ws_net_paid#44, d_year#47] +Keys [4]: [c_customer_id#40, c_first_name#41, c_last_name#42, d_year#47] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#44))] + +(58) CometExchange +Input [5]: [c_customer_id#40, c_first_name#41, c_last_name#42, d_year#47, sum#48] +Arguments: hashpartitioning(c_customer_id#40, c_first_name#41, c_last_name#42, d_year#47, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(59) CometHashAggregate +Input [5]: [c_customer_id#40, c_first_name#41, c_last_name#42, d_year#47, sum#48] +Keys [4]: [c_customer_id#40, c_first_name#41, c_last_name#42, d_year#47] +Functions [1]: [sum(UnscaledValue(ws_net_paid#44))] + +(60) CometBroadcastExchange +Input [2]: [customer_id#49, year_total#50] +Arguments: [customer_id#49, year_total#50] + +(61) CometBroadcastHashJoin +Left output [7]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#38] +Right output [2]: [customer_id#49, year_total#50] +Arguments: [customer_id#11], [customer_id#49], Inner, (CASE WHEN (year_total#38 > 0.00) THEN (year_total#50 / year_total#38) END > CASE WHEN (year_total#12 > 0.00) THEN (year_total#26 / year_total#12) END), BuildRight + +(62) CometProject +Input [9]: [customer_id#11, year_total#12, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#38, customer_id#49, year_total#50] +Arguments: [customer_id#23, customer_first_name#24, customer_last_name#25], [customer_id#23, customer_first_name#24, customer_last_name#25] + +(63) CometTakeOrderedAndProject +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[customer_first_name#24 ASC NULLS FIRST,customer_id#23 ASC NULLS FIRST,customer_last_name#25 ASC NULLS FIRST], output=[customer_id#23,customer_first_name#24,customer_last_name#25]), [customer_id#23, customer_first_name#24, customer_last_name#25], 100, [customer_first_name#24 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST, customer_last_name#25 ASC NULLS FIRST], [customer_id#23, customer_first_name#24, customer_last_name#25] + +(64) ColumnarToRow [codegen id : 1] +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..1d3e33a2fb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q74.native_iceberg_compat/simplified.txt @@ -0,0 +1,66 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] + CometProject [customer_id,customer_first_name,customer_last_name] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total,customer_id,year_total] + CometProject [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,customer_id,year_total] + CometBroadcastHashJoin [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total] + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ss_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometBroadcastExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + CometFilter [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #3 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [customer_id,customer_first_name,customer_last_name,year_total] #4 + CometHashAggregate [customer_id,customer_first_name,customer_last_name,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ss_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ss_customer_sk,ss_net_paid,ss_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometBroadcastExchange [customer_id,year_total] #7 + CometFilter [customer_id,year_total] + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #8 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ws_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometBroadcastExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + CometFilter [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + ReusedExchange [d_date_sk,d_year] #3 + CometBroadcastExchange [customer_id,year_total] #10 + CometHashAggregate [customer_id,year_total,c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] + CometExchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + CometHashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,ws_net_paid] + CometProject [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + CometBroadcastHashJoin [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk,d_date_sk,d_year] + CometProject [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + CometBroadcastHashJoin [c_customer_sk,c_customer_id,c_first_name,c_last_name,ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + CometFilter [c_customer_sk,c_customer_id,c_first_name,c_last_name] + CometScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_datafusion/explain.txt new file mode 100644 index 0000000000..1c6fedd6e0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_datafusion/explain.txt @@ -0,0 +1,443 @@ +== Physical Plan == +* ColumnarToRow (86) ++- CometTakeOrderedAndProject (85) + +- CometProject (84) + +- CometSortMergeJoin (83) + :- CometSort (44) + : +- CometExchange (43) + : +- CometFilter (42) + : +- CometHashAggregate (41) + : +- CometExchange (40) + : +- CometHashAggregate (39) + : +- CometHashAggregate (38) + : +- CometExchange (37) + : +- CometHashAggregate (36) + : +- CometUnion (35) + : :- CometProject (22) + : : +- CometSortMergeJoin (21) + : : :- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (9) + : : +- CometSort (20) + : : +- CometExchange (19) + : : +- CometProject (18) + : : +- CometFilter (17) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (16) + : :- CometProject (28) + : : +- CometSortMergeJoin (27) + : : :- CometSort (24) + : : : +- ReusedExchange (23) + : : +- CometSort (26) + : : +- ReusedExchange (25) + : +- CometProject (34) + : +- CometSortMergeJoin (33) + : :- CometSort (30) + : : +- ReusedExchange (29) + : +- CometSort (32) + : +- ReusedExchange (31) + +- CometSort (82) + +- CometExchange (81) + +- CometFilter (80) + +- CometHashAggregate (79) + +- CometExchange (78) + +- CometHashAggregate (77) + +- CometHashAggregate (76) + +- CometExchange (75) + +- CometHashAggregate (74) + +- CometUnion (73) + :- CometProject (60) + : +- CometSortMergeJoin (59) + : :- CometSort (56) + : : +- CometExchange (55) + : : +- CometProject (54) + : : +- CometBroadcastHashJoin (53) + : : :- CometProject (49) + : : : +- CometBroadcastHashJoin (48) + : : : :- CometFilter (46) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (45) + : : : +- ReusedExchange (47) + : : +- CometBroadcastExchange (52) + : : +- CometFilter (51) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (50) + : +- CometSort (58) + : +- ReusedExchange (57) + :- CometProject (66) + : +- CometSortMergeJoin (65) + : :- CometSort (62) + : : +- ReusedExchange (61) + : +- CometSort (64) + : +- ReusedExchange (63) + +- CometProject (72) + +- CometSortMergeJoin (71) + :- CometSort (68) + : +- ReusedExchange (67) + +- CometSort (70) + +- ReusedExchange (69) + + +(1) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] + +(2) CometFilter +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(4) CometFilter +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books )) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(5) CometProject +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(6) CometBroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(7) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Right output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [cs_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(8) CometProject +Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(9) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#12, d_year#13] +Arguments: [d_date_sk#12, d_year#13] + +(10) CometFilter +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2002)) AND isnotnull(d_date_sk#12)) + +(11) CometBroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: [d_date_sk#12, d_year#13] + +(12) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right output [2]: [d_date_sk#12, d_year#13] +Arguments: [cs_sold_date_sk#5], [d_date_sk#12], Inner, BuildRight + +(13) CometProject +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#12, d_year#13] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13], [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] + +(14) CometExchange +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometSort +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13], [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST] + +(16) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(17) CometFilter +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Condition : (isnotnull(cr_order_number#15) AND isnotnull(cr_item_sk#14)) + +(18) CometProject +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17], [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] + +(19) CometExchange +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: hashpartitioning(cr_order_number#15, cr_item_sk#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometSort +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17], [cr_order_number#15 ASC NULLS FIRST, cr_item_sk#14 ASC NULLS FIRST] + +(21) CometSortMergeJoin +Left output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Right output [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cs_order_number#2, cs_item_sk#1], [cr_order_number#15, cr_item_sk#14], LeftOuter + +(22) CometProject +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13, cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20], [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#3 - coalesce(cr_return_quantity#16, 0)) AS sales_cnt#19, (cs_ext_sales_price#4 - coalesce(cr_return_amount#17, 0.00)) AS sales_amt#20] + +(23) ReusedExchange [Reuses operator id: 14] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29] + +(24) CometSort +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29] +Arguments: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29], [ss_ticket_number#22 ASC NULLS FIRST, ss_item_sk#21 ASC NULLS FIRST] + +(25) ReusedExchange [Reuses operator id: 19] +Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] + +(26) CometSort +Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33], [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST] + +(27) CometSortMergeJoin +Left output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29] +Right output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [ss_ticket_number#22, ss_item_sk#21], [sr_ticket_number#31, sr_item_sk#30], LeftOuter + +(28) CometProject +Input [13]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, d_year#29, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33] +Arguments: [d_year#29, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, sales_cnt#34, sales_amt#35], [d_year#29, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, (ss_quantity#23 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#34, (ss_ext_sales_price#24 - coalesce(sr_return_amt#33, 0.00)) AS sales_amt#35] + +(29) ReusedExchange [Reuses operator id: 14] +Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44] + +(30) CometSort +Input [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44] +Arguments: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44], [ws_order_number#37 ASC NULLS FIRST, ws_item_sk#36 ASC NULLS FIRST] + +(31) ReusedExchange [Reuses operator id: 19] +Output [4]: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] + +(32) CometSort +Input [4]: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] +Arguments: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48], [wr_order_number#46 ASC NULLS FIRST, wr_item_sk#45 ASC NULLS FIRST] + +(33) CometSortMergeJoin +Left output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44] +Right output [4]: [wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] +Arguments: [ws_order_number#37, ws_item_sk#36], [wr_order_number#46, wr_item_sk#45], LeftOuter + +(34) CometProject +Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, d_year#44, wr_item_sk#45, wr_order_number#46, wr_return_quantity#47, wr_return_amt#48] +Arguments: [d_year#44, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, sales_cnt#49, sales_amt#50], [d_year#44, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, (ws_quantity#38 - coalesce(wr_return_quantity#47, 0)) AS sales_cnt#49, (ws_ext_sales_price#39 - coalesce(wr_return_amt#48, 0.00)) AS sales_amt#50] + +(35) CometUnion +Child 0 Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Child 1 Input [7]: [d_year#29, i_brand_id#25, i_class_id#26, i_category_id#27, i_manufact_id#28, sales_cnt#34, sales_amt#35] +Child 2 Input [7]: [d_year#44, i_brand_id#40, i_class_id#41, i_category_id#42, i_manufact_id#43, sales_cnt#49, sales_amt#50] + +(36) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] + +(37) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(38) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] + +(39) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] + +(40) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(41) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] + +(42) CometFilter +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54] +Condition : isnotnull(sales_cnt#53) + +(43) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(44) CometSort +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54] +Arguments: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54], [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST] + +(45) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [5]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59] +Arguments: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59] + +(46) CometFilter +Input [5]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59] +Condition : isnotnull(cs_item_sk#55) + +(47) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] + +(48) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59] +Right output [5]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] +Arguments: [cs_item_sk#55], [i_item_sk#60], Inner, BuildRight + +(49) CometProject +Input [10]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59, i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] +Arguments: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64], [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] + +(50) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#65, d_year#66] +Arguments: [d_date_sk#65, d_year#66] + +(51) CometFilter +Input [2]: [d_date_sk#65, d_year#66] +Condition : ((isnotnull(d_year#66) AND (d_year#66 = 2001)) AND isnotnull(d_date_sk#65)) + +(52) CometBroadcastExchange +Input [2]: [d_date_sk#65, d_year#66] +Arguments: [d_date_sk#65, d_year#66] + +(53) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] +Right output [2]: [d_date_sk#65, d_year#66] +Arguments: [cs_sold_date_sk#59], [d_date_sk#65], Inner, BuildRight + +(54) CometProject +Input [11]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, cs_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_date_sk#65, d_year#66] +Arguments: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66], [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66] + +(55) CometExchange +Input [9]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66] +Arguments: hashpartitioning(cs_order_number#56, cs_item_sk#55, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(56) CometSort +Input [9]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66] +Arguments: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66], [cs_order_number#56 ASC NULLS FIRST, cs_item_sk#55 ASC NULLS FIRST] + +(57) ReusedExchange [Reuses operator id: 19] +Output [4]: [cr_item_sk#67, cr_order_number#68, cr_return_quantity#69, cr_return_amount#70] + +(58) CometSort +Input [4]: [cr_item_sk#67, cr_order_number#68, cr_return_quantity#69, cr_return_amount#70] +Arguments: [cr_item_sk#67, cr_order_number#68, cr_return_quantity#69, cr_return_amount#70], [cr_order_number#68 ASC NULLS FIRST, cr_item_sk#67 ASC NULLS FIRST] + +(59) CometSortMergeJoin +Left output [9]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66] +Right output [4]: [cr_item_sk#67, cr_order_number#68, cr_return_quantity#69, cr_return_amount#70] +Arguments: [cs_order_number#56, cs_item_sk#55], [cr_order_number#68, cr_item_sk#67], LeftOuter + +(60) CometProject +Input [13]: [cs_item_sk#55, cs_order_number#56, cs_quantity#57, cs_ext_sales_price#58, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, d_year#66, cr_item_sk#67, cr_order_number#68, cr_return_quantity#69, cr_return_amount#70] +Arguments: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20], [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, (cs_quantity#57 - coalesce(cr_return_quantity#69, 0)) AS sales_cnt#19, (cs_ext_sales_price#58 - coalesce(cr_return_amount#70, 0.00)) AS sales_amt#20] + +(61) ReusedExchange [Reuses operator id: 55] +Output [9]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_ext_sales_price#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#79] + +(62) CometSort +Input [9]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_ext_sales_price#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#79] +Arguments: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_ext_sales_price#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#79], [ss_ticket_number#72 ASC NULLS FIRST, ss_item_sk#71 ASC NULLS FIRST] + +(63) ReusedExchange [Reuses operator id: 19] +Output [4]: [sr_item_sk#80, sr_ticket_number#81, sr_return_quantity#82, sr_return_amt#83] + +(64) CometSort +Input [4]: [sr_item_sk#80, sr_ticket_number#81, sr_return_quantity#82, sr_return_amt#83] +Arguments: [sr_item_sk#80, sr_ticket_number#81, sr_return_quantity#82, sr_return_amt#83], [sr_ticket_number#81 ASC NULLS FIRST, sr_item_sk#80 ASC NULLS FIRST] + +(65) CometSortMergeJoin +Left output [9]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_ext_sales_price#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#79] +Right output [4]: [sr_item_sk#80, sr_ticket_number#81, sr_return_quantity#82, sr_return_amt#83] +Arguments: [ss_ticket_number#72, ss_item_sk#71], [sr_ticket_number#81, sr_item_sk#80], LeftOuter + +(66) CometProject +Input [13]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_ext_sales_price#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#79, sr_item_sk#80, sr_ticket_number#81, sr_return_quantity#82, sr_return_amt#83] +Arguments: [d_year#79, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#34, sales_amt#35], [d_year#79, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, (ss_quantity#73 - coalesce(sr_return_quantity#82, 0)) AS sales_cnt#34, (ss_ext_sales_price#74 - coalesce(sr_return_amt#83, 0.00)) AS sales_amt#35] + +(67) ReusedExchange [Reuses operator id: 55] +Output [9]: [ws_item_sk#84, ws_order_number#85, ws_quantity#86, ws_ext_sales_price#87, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, d_year#92] + +(68) CometSort +Input [9]: [ws_item_sk#84, ws_order_number#85, ws_quantity#86, ws_ext_sales_price#87, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, d_year#92] +Arguments: [ws_item_sk#84, ws_order_number#85, ws_quantity#86, ws_ext_sales_price#87, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, d_year#92], [ws_order_number#85 ASC NULLS FIRST, ws_item_sk#84 ASC NULLS FIRST] + +(69) ReusedExchange [Reuses operator id: 19] +Output [4]: [wr_item_sk#93, wr_order_number#94, wr_return_quantity#95, wr_return_amt#96] + +(70) CometSort +Input [4]: [wr_item_sk#93, wr_order_number#94, wr_return_quantity#95, wr_return_amt#96] +Arguments: [wr_item_sk#93, wr_order_number#94, wr_return_quantity#95, wr_return_amt#96], [wr_order_number#94 ASC NULLS FIRST, wr_item_sk#93 ASC NULLS FIRST] + +(71) CometSortMergeJoin +Left output [9]: [ws_item_sk#84, ws_order_number#85, ws_quantity#86, ws_ext_sales_price#87, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, d_year#92] +Right output [4]: [wr_item_sk#93, wr_order_number#94, wr_return_quantity#95, wr_return_amt#96] +Arguments: [ws_order_number#85, ws_item_sk#84], [wr_order_number#94, wr_item_sk#93], LeftOuter + +(72) CometProject +Input [13]: [ws_item_sk#84, ws_order_number#85, ws_quantity#86, ws_ext_sales_price#87, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, d_year#92, wr_item_sk#93, wr_order_number#94, wr_return_quantity#95, wr_return_amt#96] +Arguments: [d_year#92, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, sales_cnt#49, sales_amt#50], [d_year#92, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, (ws_quantity#86 - coalesce(wr_return_quantity#95, 0)) AS sales_cnt#49, (ws_ext_sales_price#87 - coalesce(wr_return_amt#96, 0.00)) AS sales_amt#50] + +(73) CometUnion +Child 0 Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Child 1 Input [7]: [d_year#79, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#34, sales_amt#35] +Child 2 Input [7]: [d_year#92, i_brand_id#88, i_class_id#89, i_category_id#90, i_manufact_id#91, sales_cnt#49, sales_amt#50] + +(74) CometHashAggregate +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Functions: [] + +(75) CometExchange +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(76) CometHashAggregate +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Functions: [] + +(77) CometHashAggregate +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] + +(78) CometExchange +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sum#51, sum#97] +Arguments: hashpartitioning(d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(79) CometHashAggregate +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sum#51, sum#97] +Keys [5]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] + +(80) CometFilter +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99] +Condition : isnotnull(sales_cnt#98) + +(81) CometExchange +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99] +Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(82) CometSort +Input [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99] +Arguments: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99], [i_brand_id#61 ASC NULLS FIRST, i_class_id#62 ASC NULLS FIRST, i_category_id#63 ASC NULLS FIRST, i_manufact_id#64 ASC NULLS FIRST] + +(83) CometSortMergeJoin +Left output [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54] +Right output [7]: [d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99] +Arguments: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64], Inner, ((cast(sales_cnt#53 as decimal(17,2)) / cast(sales_cnt#98 as decimal(17,2))) < 0.90000000000000000000) + +(84) CometProject +Input [14]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#53, sales_amt#54, d_year#66, i_brand_id#61, i_class_id#62, i_category_id#63, i_manufact_id#64, sales_cnt#98, sales_amt#99] +Arguments: [prev_year#100, year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#102, curr_yr_cnt#103, sales_cnt_diff#104, sales_amt_diff#105], [d_year#66 AS prev_year#100, d_year#13 AS year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#98 AS prev_yr_cnt#102, sales_cnt#53 AS curr_yr_cnt#103, (sales_cnt#53 - sales_cnt#98) AS sales_cnt_diff#104, (sales_amt#54 - sales_amt#99) AS sales_amt_diff#105] + +(85) CometTakeOrderedAndProject +Input [10]: [prev_year#100, year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#102, curr_yr_cnt#103, sales_cnt_diff#104, sales_amt_diff#105] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#104 ASC NULLS FIRST,sales_amt_diff#105 ASC NULLS FIRST], output=[prev_year#100,year#101,i_brand_id#7,i_class_id#8,i_category_id#9,i_manufact_id#11,prev_yr_cnt#102,curr_yr_cnt#103,sales_cnt_diff#104,sales_amt_diff#105]), [prev_year#100, year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#102, curr_yr_cnt#103, sales_cnt_diff#104, sales_amt_diff#105], 100, [sales_cnt_diff#104 ASC NULLS FIRST, sales_amt_diff#105 ASC NULLS FIRST], [prev_year#100, year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#102, curr_yr_cnt#103, sales_cnt_diff#104, sales_amt_diff#105] + +(86) ColumnarToRow [codegen id : 1] +Input [10]: [prev_year#100, year#101, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#102, curr_yr_cnt#103, sales_cnt_diff#104, sales_amt_diff#105] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6503d71685 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_datafusion/simplified.txt @@ -0,0 +1,88 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_cnt_diff,sales_amt_diff] + CometProject [d_year,d_year,sales_cnt,sales_cnt,sales_amt,sales_amt] [prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_cnt_diff,sales_amt_diff] + CometSortMergeJoin [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSort [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 + CometFilter [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,sum,sum,sum(sales_cnt),sum(UnscaledValue(sales_amt))] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometUnion [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometProject [cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometSort [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [cs_order_number,cs_item_sk] #4 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + CometProject [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometSort [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometExchange [cr_order_number,cr_item_sk] #7 + CometProject [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometFilter [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometProject [ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometSort [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #4 + CometSort [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 + CometProject [ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometSort [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #4 + CometSort [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #7 + CometSort [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #8 + CometFilter [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,sum,sum,sum(sales_cnt),sum(UnscaledValue(sales_amt))] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #9 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #10 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometUnion [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometProject [cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometSort [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [cs_order_number,cs_item_sk] #11 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + CometBroadcastExchange [d_date_sk,d_year] #12 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometSort [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + CometProject [ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometSort [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #11 + CometSort [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 + CometProject [ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometSort [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + ReusedExchange [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] #11 + CometSort [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..5e2a619ed5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_iceberg_compat/explain.txt @@ -0,0 +1,681 @@ +== Physical Plan == +* ColumnarToRow (124) ++- CometTakeOrderedAndProject (123) + +- CometProject (122) + +- CometSortMergeJoin (121) + :- CometSort (66) + : +- CometExchange (65) + : +- CometFilter (64) + : +- CometHashAggregate (63) + : +- CometExchange (62) + : +- CometHashAggregate (61) + : +- CometHashAggregate (60) + : +- CometExchange (59) + : +- CometHashAggregate (58) + : +- CometUnion (57) + : :- CometProject (22) + : : +- CometSortMergeJoin (21) + : : :- CometSort (15) + : : : +- CometExchange (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.item (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometScan parquet spark_catalog.default.date_dim (9) + : : +- CometSort (20) + : : +- CometExchange (19) + : : +- CometProject (18) + : : +- CometFilter (17) + : : +- CometScan parquet spark_catalog.default.catalog_returns (16) + : :- CometProject (39) + : : +- CometSortMergeJoin (38) + : : :- CometSort (32) + : : : +- CometExchange (31) + : : : +- CometProject (30) + : : : +- CometBroadcastHashJoin (29) + : : : :- CometProject (27) + : : : : +- CometBroadcastHashJoin (26) + : : : : :- CometFilter (24) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (23) + : : : : +- ReusedExchange (25) + : : : +- ReusedExchange (28) + : : +- CometSort (37) + : : +- CometExchange (36) + : : +- CometProject (35) + : : +- CometFilter (34) + : : +- CometScan parquet spark_catalog.default.store_returns (33) + : +- CometProject (56) + : +- CometSortMergeJoin (55) + : :- CometSort (49) + : : +- CometExchange (48) + : : +- CometProject (47) + : : +- CometBroadcastHashJoin (46) + : : :- CometProject (44) + : : : +- CometBroadcastHashJoin (43) + : : : :- CometFilter (41) + : : : : +- CometScan parquet spark_catalog.default.web_sales (40) + : : : +- ReusedExchange (42) + : : +- ReusedExchange (45) + : +- CometSort (54) + : +- CometExchange (53) + : +- CometProject (52) + : +- CometFilter (51) + : +- CometScan parquet spark_catalog.default.web_returns (50) + +- CometSort (120) + +- CometExchange (119) + +- CometFilter (118) + +- CometHashAggregate (117) + +- CometExchange (116) + +- CometHashAggregate (115) + +- CometHashAggregate (114) + +- CometExchange (113) + +- CometHashAggregate (112) + +- CometUnion (111) + :- CometProject (82) + : +- CometSortMergeJoin (81) + : :- CometSort (78) + : : +- CometExchange (77) + : : +- CometProject (76) + : : +- CometBroadcastHashJoin (75) + : : :- CometProject (71) + : : : +- CometBroadcastHashJoin (70) + : : : :- CometFilter (68) + : : : : +- CometScan parquet spark_catalog.default.catalog_sales (67) + : : : +- ReusedExchange (69) + : : +- CometBroadcastExchange (74) + : : +- CometFilter (73) + : : +- CometScan parquet spark_catalog.default.date_dim (72) + : +- CometSort (80) + : +- ReusedExchange (79) + :- CometProject (96) + : +- CometSortMergeJoin (95) + : :- CometSort (92) + : : +- CometExchange (91) + : : +- CometProject (90) + : : +- CometBroadcastHashJoin (89) + : : :- CometProject (87) + : : : +- CometBroadcastHashJoin (86) + : : : :- CometFilter (84) + : : : : +- CometScan parquet spark_catalog.default.store_sales (83) + : : : +- ReusedExchange (85) + : : +- ReusedExchange (88) + : +- CometSort (94) + : +- ReusedExchange (93) + +- CometProject (110) + +- CometSortMergeJoin (109) + :- CometSort (106) + : +- CometExchange (105) + : +- CometProject (104) + : +- CometBroadcastHashJoin (103) + : :- CometProject (101) + : : +- CometBroadcastHashJoin (100) + : : :- CometFilter (98) + : : : +- CometScan parquet spark_catalog.default.web_sales (97) + : : +- ReusedExchange (99) + : +- ReusedExchange (102) + +- CometSort (108) + +- ReusedExchange (107) + + +(1) CometScan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books ), IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_manufact_id)] +ReadSchema: struct + +(4) CometFilter +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books )) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(5) CometProject +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(6) CometBroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(7) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Right output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [cs_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(8) CometProject +Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(9) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_year#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) CometFilter +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2002)) AND isnotnull(d_date_sk#12)) + +(11) CometBroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: [d_date_sk#12, d_year#13] + +(12) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right output [2]: [d_date_sk#12, d_year#13] +Arguments: [cs_sold_date_sk#5], [d_date_sk#12], Inner, BuildRight + +(13) CometProject +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#12, d_year#13] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13], [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] + +(14) CometExchange +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(15) CometSort +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13], [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST] + +(16) CometScan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(17) CometFilter +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Condition : (isnotnull(cr_order_number#15) AND isnotnull(cr_item_sk#14)) + +(18) CometProject +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17], [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] + +(19) CometExchange +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: hashpartitioning(cr_order_number#15, cr_item_sk#14, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(20) CometSort +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17], [cr_order_number#15 ASC NULLS FIRST, cr_item_sk#14 ASC NULLS FIRST] + +(21) CometSortMergeJoin +Left output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Right output [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cs_order_number#2, cs_item_sk#1], [cr_order_number#15, cr_item_sk#14], LeftOuter + +(22) CometProject +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13, cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20], [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#3 - coalesce(cr_return_quantity#16, 0)) AS sales_cnt#19, (cs_ext_sales_price#4 - coalesce(cr_return_amount#17, 0.00)) AS sales_amt#20] + +(23) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#25)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(24) CometFilter +Input [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Condition : isnotnull(ss_item_sk#21) + +(25) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(26) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Right output [5]: [i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] +Arguments: [ss_item_sk#21], [i_item_sk#26], Inner, BuildRight + +(27) CometProject +Input [10]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] +Arguments: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30], [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(28) ReusedExchange [Reuses operator id: 11] +Output [2]: [d_date_sk#31, d_year#32] + +(29) CometBroadcastHashJoin +Left output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] +Right output [2]: [d_date_sk#31, d_year#32] +Arguments: [ss_sold_date_sk#25], [d_date_sk#31], Inner, BuildRight + +(30) CometProject +Input [11]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_date_sk#31, d_year#32] +Arguments: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32], [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] + +(31) CometExchange +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: hashpartitioning(ss_ticket_number#22, ss_item_sk#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32], [ss_ticket_number#22 ASC NULLS FIRST, ss_item_sk#21 ASC NULLS FIRST] + +(33) CometScan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(34) CometFilter +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Condition : (isnotnull(sr_ticket_number#34) AND isnotnull(sr_item_sk#33)) + +(35) CometProject +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Arguments: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36], [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] + +(36) CometExchange +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: hashpartitioning(sr_ticket_number#34, sr_item_sk#33, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(37) CometSort +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36], [sr_ticket_number#34 ASC NULLS FIRST, sr_item_sk#33 ASC NULLS FIRST] + +(38) CometSortMergeJoin +Left output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Right output [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: [ss_ticket_number#22, ss_item_sk#21], [sr_ticket_number#34, sr_item_sk#33], LeftOuter + +(39) CometProject +Input [13]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32, sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: [d_year#32, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, sales_cnt#38, sales_amt#39], [d_year#32, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, (ss_quantity#23 - coalesce(sr_return_quantity#35, 0)) AS sales_cnt#38, (ss_ext_sales_price#24 - coalesce(sr_return_amt#36, 0.00)) AS sales_amt#39] + +(40) CometScan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#44)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(41) CometFilter +Input [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Condition : isnotnull(ws_item_sk#40) + +(42) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(43) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Right output [5]: [i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] +Arguments: [ws_item_sk#40], [i_item_sk#45], Inner, BuildRight + +(44) CometProject +Input [10]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] +Arguments: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49], [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(45) ReusedExchange [Reuses operator id: 11] +Output [2]: [d_date_sk#50, d_year#51] + +(46) CometBroadcastHashJoin +Left output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] +Right output [2]: [d_date_sk#50, d_year#51] +Arguments: [ws_sold_date_sk#44], [d_date_sk#50], Inner, BuildRight + +(47) CometProject +Input [11]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_date_sk#50, d_year#51] +Arguments: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51], [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] + +(48) CometExchange +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: hashpartitioning(ws_order_number#41, ws_item_sk#40, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(49) CometSort +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51], [ws_order_number#41 ASC NULLS FIRST, ws_item_sk#40 ASC NULLS FIRST] + +(50) CometScan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(51) CometFilter +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Condition : (isnotnull(wr_order_number#53) AND isnotnull(wr_item_sk#52)) + +(52) CometProject +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Arguments: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55], [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] + +(53) CometExchange +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: hashpartitioning(wr_order_number#53, wr_item_sk#52, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(54) CometSort +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55], [wr_order_number#53 ASC NULLS FIRST, wr_item_sk#52 ASC NULLS FIRST] + +(55) CometSortMergeJoin +Left output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Right output [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: [ws_order_number#41, ws_item_sk#40], [wr_order_number#53, wr_item_sk#52], LeftOuter + +(56) CometProject +Input [13]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51, wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: [d_year#51, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, sales_cnt#57, sales_amt#58], [d_year#51, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, (ws_quantity#42 - coalesce(wr_return_quantity#54, 0)) AS sales_cnt#57, (ws_ext_sales_price#43 - coalesce(wr_return_amt#55, 0.00)) AS sales_amt#58] + +(57) CometUnion +Child 0 Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Child 1 Input [7]: [d_year#32, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, sales_cnt#38, sales_amt#39] +Child 2 Input [7]: [d_year#51, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, sales_cnt#57, sales_amt#58] + +(58) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] + +(59) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(60) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] + +(61) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] + +(62) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#59, sum#60] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(63) CometHashAggregate +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#59, sum#60] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] + +(64) CometFilter +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62] +Condition : isnotnull(sales_cnt#61) + +(65) CometExchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(66) CometSort +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62] +Arguments: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62], [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST] + +(67) CometScan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#67)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(68) CometFilter +Input [5]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67] +Condition : isnotnull(cs_item_sk#63) + +(69) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] + +(70) CometBroadcastHashJoin +Left output [5]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67] +Right output [5]: [i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] +Arguments: [cs_item_sk#63], [i_item_sk#68], Inner, BuildRight + +(71) CometProject +Input [10]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67, i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] +Arguments: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72], [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] + +(72) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#73, d_year#74] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(73) CometFilter +Input [2]: [d_date_sk#73, d_year#74] +Condition : ((isnotnull(d_year#74) AND (d_year#74 = 2001)) AND isnotnull(d_date_sk#73)) + +(74) CometBroadcastExchange +Input [2]: [d_date_sk#73, d_year#74] +Arguments: [d_date_sk#73, d_year#74] + +(75) CometBroadcastHashJoin +Left output [9]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] +Right output [2]: [d_date_sk#73, d_year#74] +Arguments: [cs_sold_date_sk#67], [d_date_sk#73], Inner, BuildRight + +(76) CometProject +Input [11]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, cs_sold_date_sk#67, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_date_sk#73, d_year#74] +Arguments: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74], [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74] + +(77) CometExchange +Input [9]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74] +Arguments: hashpartitioning(cs_order_number#64, cs_item_sk#63, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=10] + +(78) CometSort +Input [9]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74] +Arguments: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74], [cs_order_number#64 ASC NULLS FIRST, cs_item_sk#63 ASC NULLS FIRST] + +(79) ReusedExchange [Reuses operator id: 19] +Output [4]: [cr_item_sk#75, cr_order_number#76, cr_return_quantity#77, cr_return_amount#78] + +(80) CometSort +Input [4]: [cr_item_sk#75, cr_order_number#76, cr_return_quantity#77, cr_return_amount#78] +Arguments: [cr_item_sk#75, cr_order_number#76, cr_return_quantity#77, cr_return_amount#78], [cr_order_number#76 ASC NULLS FIRST, cr_item_sk#75 ASC NULLS FIRST] + +(81) CometSortMergeJoin +Left output [9]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74] +Right output [4]: [cr_item_sk#75, cr_order_number#76, cr_return_quantity#77, cr_return_amount#78] +Arguments: [cs_order_number#64, cs_item_sk#63], [cr_order_number#76, cr_item_sk#75], LeftOuter + +(82) CometProject +Input [13]: [cs_item_sk#63, cs_order_number#64, cs_quantity#65, cs_ext_sales_price#66, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, d_year#74, cr_item_sk#75, cr_order_number#76, cr_return_quantity#77, cr_return_amount#78] +Arguments: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20], [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, (cs_quantity#65 - coalesce(cr_return_quantity#77, 0)) AS sales_cnt#19, (cs_ext_sales_price#66 - coalesce(cr_return_amount#78, 0.00)) AS sales_amt#20] + +(83) CometScan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#83)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(84) CometFilter +Input [5]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83] +Condition : isnotnull(ss_item_sk#79) + +(85) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#84, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] + +(86) CometBroadcastHashJoin +Left output [5]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83] +Right output [5]: [i_item_sk#84, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] +Arguments: [ss_item_sk#79], [i_item_sk#84], Inner, BuildRight + +(87) CometProject +Input [10]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83, i_item_sk#84, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] +Arguments: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88], [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] + +(88) ReusedExchange [Reuses operator id: 74] +Output [2]: [d_date_sk#89, d_year#90] + +(89) CometBroadcastHashJoin +Left output [9]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] +Right output [2]: [d_date_sk#89, d_year#90] +Arguments: [ss_sold_date_sk#83], [d_date_sk#89], Inner, BuildRight + +(90) CometProject +Input [11]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, ss_sold_date_sk#83, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_date_sk#89, d_year#90] +Arguments: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90], [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] + +(91) CometExchange +Input [9]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] +Arguments: hashpartitioning(ss_ticket_number#80, ss_item_sk#79, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=11] + +(92) CometSort +Input [9]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] +Arguments: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90], [ss_ticket_number#80 ASC NULLS FIRST, ss_item_sk#79 ASC NULLS FIRST] + +(93) ReusedExchange [Reuses operator id: 36] +Output [4]: [sr_item_sk#91, sr_ticket_number#92, sr_return_quantity#93, sr_return_amt#94] + +(94) CometSort +Input [4]: [sr_item_sk#91, sr_ticket_number#92, sr_return_quantity#93, sr_return_amt#94] +Arguments: [sr_item_sk#91, sr_ticket_number#92, sr_return_quantity#93, sr_return_amt#94], [sr_ticket_number#92 ASC NULLS FIRST, sr_item_sk#91 ASC NULLS FIRST] + +(95) CometSortMergeJoin +Left output [9]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] +Right output [4]: [sr_item_sk#91, sr_ticket_number#92, sr_return_quantity#93, sr_return_amt#94] +Arguments: [ss_ticket_number#80, ss_item_sk#79], [sr_ticket_number#92, sr_item_sk#91], LeftOuter + +(96) CometProject +Input [13]: [ss_item_sk#79, ss_ticket_number#80, ss_quantity#81, ss_ext_sales_price#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90, sr_item_sk#91, sr_ticket_number#92, sr_return_quantity#93, sr_return_amt#94] +Arguments: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#38, sales_amt#39], [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, (ss_quantity#81 - coalesce(sr_return_quantity#93, 0)) AS sales_cnt#38, (ss_ext_sales_price#82 - coalesce(sr_return_amt#94, 0.00)) AS sales_amt#39] + +(97) CometScan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#99)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(98) CometFilter +Input [5]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99] +Condition : isnotnull(ws_item_sk#95) + +(99) ReusedExchange [Reuses operator id: 6] +Output [5]: [i_item_sk#100, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104] + +(100) CometBroadcastHashJoin +Left output [5]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99] +Right output [5]: [i_item_sk#100, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104] +Arguments: [ws_item_sk#95], [i_item_sk#100], Inner, BuildRight + +(101) CometProject +Input [10]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99, i_item_sk#100, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104] +Arguments: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104], [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104] + +(102) ReusedExchange [Reuses operator id: 74] +Output [2]: [d_date_sk#105, d_year#106] + +(103) CometBroadcastHashJoin +Left output [9]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104] +Right output [2]: [d_date_sk#105, d_year#106] +Arguments: [ws_sold_date_sk#99], [d_date_sk#105], Inner, BuildRight + +(104) CometProject +Input [11]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, ws_sold_date_sk#99, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_date_sk#105, d_year#106] +Arguments: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106], [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106] + +(105) CometExchange +Input [9]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106] +Arguments: hashpartitioning(ws_order_number#96, ws_item_sk#95, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=12] + +(106) CometSort +Input [9]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106] +Arguments: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106], [ws_order_number#96 ASC NULLS FIRST, ws_item_sk#95 ASC NULLS FIRST] + +(107) ReusedExchange [Reuses operator id: 53] +Output [4]: [wr_item_sk#107, wr_order_number#108, wr_return_quantity#109, wr_return_amt#110] + +(108) CometSort +Input [4]: [wr_item_sk#107, wr_order_number#108, wr_return_quantity#109, wr_return_amt#110] +Arguments: [wr_item_sk#107, wr_order_number#108, wr_return_quantity#109, wr_return_amt#110], [wr_order_number#108 ASC NULLS FIRST, wr_item_sk#107 ASC NULLS FIRST] + +(109) CometSortMergeJoin +Left output [9]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106] +Right output [4]: [wr_item_sk#107, wr_order_number#108, wr_return_quantity#109, wr_return_amt#110] +Arguments: [ws_order_number#96, ws_item_sk#95], [wr_order_number#108, wr_item_sk#107], LeftOuter + +(110) CometProject +Input [13]: [ws_item_sk#95, ws_order_number#96, ws_quantity#97, ws_ext_sales_price#98, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, d_year#106, wr_item_sk#107, wr_order_number#108, wr_return_quantity#109, wr_return_amt#110] +Arguments: [d_year#106, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, sales_cnt#57, sales_amt#58], [d_year#106, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, (ws_quantity#97 - coalesce(wr_return_quantity#109, 0)) AS sales_cnt#57, (ws_ext_sales_price#98 - coalesce(wr_return_amt#110, 0.00)) AS sales_amt#58] + +(111) CometUnion +Child 0 Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Child 1 Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#38, sales_amt#39] +Child 2 Input [7]: [d_year#106, i_brand_id#101, i_class_id#102, i_category_id#103, i_manufact_id#104, sales_cnt#57, sales_amt#58] + +(112) CometHashAggregate +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Functions: [] + +(113) CometExchange +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=13] + +(114) CometHashAggregate +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Functions: [] + +(115) CometHashAggregate +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] + +(116) CometExchange +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sum#59, sum#111] +Arguments: hashpartitioning(d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=14] + +(117) CometHashAggregate +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sum#59, sum#111] +Keys [5]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] + +(118) CometFilter +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113] +Condition : isnotnull(sales_cnt#112) + +(119) CometExchange +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113] +Arguments: hashpartitioning(i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=15] + +(120) CometSort +Input [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113] +Arguments: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113], [i_brand_id#69 ASC NULLS FIRST, i_class_id#70 ASC NULLS FIRST, i_category_id#71 ASC NULLS FIRST, i_manufact_id#72 ASC NULLS FIRST] + +(121) CometSortMergeJoin +Left output [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62] +Right output [7]: [d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113] +Arguments: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11], [i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72], Inner, ((cast(sales_cnt#61 as decimal(17,2)) / cast(sales_cnt#112 as decimal(17,2))) < 0.90000000000000000000) + +(122) CometProject +Input [14]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#61, sales_amt#62, d_year#74, i_brand_id#69, i_class_id#70, i_category_id#71, i_manufact_id#72, sales_cnt#112, sales_amt#113] +Arguments: [prev_year#114, year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#116, curr_yr_cnt#117, sales_cnt_diff#118, sales_amt_diff#119], [d_year#74 AS prev_year#114, d_year#13 AS year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#112 AS prev_yr_cnt#116, sales_cnt#61 AS curr_yr_cnt#117, (sales_cnt#61 - sales_cnt#112) AS sales_cnt_diff#118, (sales_amt#62 - sales_amt#113) AS sales_amt_diff#119] + +(123) CometTakeOrderedAndProject +Input [10]: [prev_year#114, year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#116, curr_yr_cnt#117, sales_cnt_diff#118, sales_amt_diff#119] +Arguments: TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#118 ASC NULLS FIRST,sales_amt_diff#119 ASC NULLS FIRST], output=[prev_year#114,year#115,i_brand_id#7,i_class_id#8,i_category_id#9,i_manufact_id#11,prev_yr_cnt#116,curr_yr_cnt#117,sales_cnt_diff#118,sales_amt_diff#119]), [prev_year#114, year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#116, curr_yr_cnt#117, sales_cnt_diff#118, sales_amt_diff#119], 100, [sales_cnt_diff#118 ASC NULLS FIRST, sales_amt_diff#119 ASC NULLS FIRST], [prev_year#114, year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#116, curr_yr_cnt#117, sales_cnt_diff#118, sales_amt_diff#119] + +(124) ColumnarToRow [codegen id : 1] +Input [10]: [prev_year#114, year#115, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#116, curr_yr_cnt#117, sales_cnt_diff#118, sales_amt_diff#119] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..2c0b850895 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q75.native_iceberg_compat/simplified.txt @@ -0,0 +1,126 @@ +WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometTakeOrderedAndProject [prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_cnt_diff,sales_amt_diff] + CometProject [d_year,d_year,sales_cnt,sales_cnt,sales_amt,sales_amt] [prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_cnt_diff,sales_amt_diff] + CometSortMergeJoin [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSort [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 + CometFilter [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,sum,sum,sum(sales_cnt),sum(UnscaledValue(sales_amt))] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometUnion [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometProject [cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometSort [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [cs_order_number,cs_item_sk] #4 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + CometProject [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + CometScan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + CometBroadcastExchange [d_date_sk,d_year] #6 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometSort [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometExchange [cr_order_number,cr_item_sk] #7 + CometProject [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometFilter [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + CometProject [ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometSort [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [ss_ticket_number,ss_item_sk] #8 + CometProject [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + ReusedExchange [d_date_sk,d_year] #6 + CometSort [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometExchange [sr_ticket_number,sr_item_sk] #9 + CometProject [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometFilter [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + CometProject [ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometSort [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [ws_order_number,ws_item_sk] #10 + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + ReusedExchange [d_date_sk,d_year] #6 + CometSort [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometExchange [wr_order_number,wr_item_sk] #11 + CometProject [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometFilter [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + CometSort [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12 + CometFilter [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt,sum,sum,sum(sales_cnt),sum(UnscaledValue(sales_amt))] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum,sales_cnt,sales_amt] + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometExchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14 + CometHashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometUnion [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometProject [cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + CometSort [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [cs_order_number,cs_item_sk] #15 + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + CometBroadcastExchange [d_date_sk,d_year] #16 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometSort [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + CometProject [ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + CometSort [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [ss_ticket_number,ss_item_sk] #17 + CometProject [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + ReusedExchange [d_date_sk,d_year] #16 + CometSort [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9 + CometProject [ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + CometSortMergeJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year,wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + CometSort [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometExchange [ws_order_number,ws_item_sk] #18 + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_date_sk,d_year] + CometProject [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometBroadcastHashJoin [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + CometFilter [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + ReusedExchange [d_date_sk,d_year] #16 + CometSort [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_datafusion/explain.txt new file mode 100644 index 0000000000..cfbc95b886 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_datafusion/explain.txt @@ -0,0 +1,362 @@ +== Physical Plan == +TakeOrderedAndProject (66) ++- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- Union (62) + :- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- Union (48) + : :- * ColumnarToRow (22) + : : +- CometProject (21) + : : +- CometBroadcastHashJoin (20) + : : :- CometHashAggregate (16) + : : : +- CometExchange (15) + : : : +- CometHashAggregate (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (9) + : : +- CometBroadcastExchange (19) + : : +- CometHashAggregate (18) + : : +- ReusedExchange (17) + : :- * Project (41) + : : +- * BroadcastNestedLoopJoin Inner BuildLeft (40) + : : :- BroadcastExchange (31) + : : : +- * ColumnarToRow (30) + : : : +- CometHashAggregate (29) + : : : +- CometExchange (28) + : : : +- CometHashAggregate (27) + : : : +- CometProject (26) + : : : +- CometBroadcastHashJoin (25) + : : : :- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (23) + : : : +- ReusedExchange (24) + : : +- * ColumnarToRow (39) + : : +- CometHashAggregate (38) + : : +- CometExchange (37) + : : +- CometHashAggregate (36) + : : +- CometProject (35) + : : +- CometBroadcastHashJoin (34) + : : :- CometNativeScan: `spark_catalog`.`default`.`catalog_returns` (32) + : : +- ReusedExchange (33) + : +- * ColumnarToRow (47) + : +- CometProject (46) + : +- CometBroadcastHashJoin (45) + : :- CometHashAggregate (43) + : : +- ReusedExchange (42) + : +- ReusedExchange (44) + :- * HashAggregate (56) + : +- Exchange (55) + : +- * HashAggregate (54) + : +- * HashAggregate (53) + : +- ReusedExchange (52) + +- * HashAggregate (61) + +- Exchange (60) + +- * HashAggregate (59) + +- * HashAggregate (58) + +- ReusedExchange (57) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(2) CometFilter +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#5, d_date#6] +Arguments: [d_date_sk#5, d_date#6] + +(4) CometFilter +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 1998-08-04)) AND (d_date#6 <= 1998-09-03)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_date#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3], [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] + +(9) CometNativeScan: `spark_catalog`.`default`.`store` +Output [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(10) CometFilter +Input [1]: [s_store_sk#7] +Condition : isnotnull(s_store_sk#7) + +(11) CometBroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(12) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] +Right output [1]: [s_store_sk#7] +Arguments: [ss_store_sk#1], [s_store_sk#7], Inner, BuildRight + +(13) CometProject +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Arguments: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7], [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] + +(14) CometHashAggregate +Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Keys [1]: [s_store_sk#7] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] + +(15) CometExchange +Input [3]: [s_store_sk#7, sum#8, sum#9] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [s_store_sk#7, sum#8, sum#9] +Keys [1]: [s_store_sk#7] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] + +(17) ReusedExchange [Reuses operator id: 15] +Output [3]: [s_store_sk#10, sum#11, sum#12] + +(18) CometHashAggregate +Input [3]: [s_store_sk#10, sum#11, sum#12] +Keys [1]: [s_store_sk#10] +Functions [2]: [sum(UnscaledValue(sr_return_amt#13)), sum(UnscaledValue(sr_net_loss#14))] + +(19) CometBroadcastExchange +Input [3]: [s_store_sk#10, returns#15, profit_loss#16] +Arguments: [s_store_sk#10, returns#15, profit_loss#16] + +(20) CometBroadcastHashJoin +Left output [3]: [s_store_sk#7, sales#17, profit#18] +Right output [3]: [s_store_sk#10, returns#15, profit_loss#16] +Arguments: [s_store_sk#7], [s_store_sk#10], LeftOuter, BuildRight + +(21) CometProject +Input [6]: [s_store_sk#7, sales#17, profit#18, s_store_sk#10, returns#15, profit_loss#16] +Arguments: [channel#19, id#20, sales#17, returns#21, profit#22], [store channel AS channel#19, s_store_sk#7 AS id#20, sales#17, coalesce(returns#15, 0.00) AS returns#21, (profit#18 - coalesce(profit_loss#16, 0.00)) AS profit#22] + +(22) ColumnarToRow [codegen id : 1] +Input [5]: [channel#19, id#20, sales#17, returns#21, profit#22] + +(23) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [4]: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25, cs_sold_date_sk#26] +Arguments: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25, cs_sold_date_sk#26] + +(24) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#27] + +(25) CometBroadcastHashJoin +Left output [4]: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25, cs_sold_date_sk#26] +Right output [1]: [d_date_sk#27] +Arguments: [cs_sold_date_sk#26], [d_date_sk#27], Inner, BuildRight + +(26) CometProject +Input [5]: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25, cs_sold_date_sk#26, d_date_sk#27] +Arguments: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25], [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25] + +(27) CometHashAggregate +Input [3]: [cs_call_center_sk#23, cs_ext_sales_price#24, cs_net_profit#25] +Keys [1]: [cs_call_center_sk#23] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#24)), partial_sum(UnscaledValue(cs_net_profit#25))] + +(28) CometExchange +Input [3]: [cs_call_center_sk#23, sum#28, sum#29] +Arguments: hashpartitioning(cs_call_center_sk#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(29) CometHashAggregate +Input [3]: [cs_call_center_sk#23, sum#28, sum#29] +Keys [1]: [cs_call_center_sk#23] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#24)), sum(UnscaledValue(cs_net_profit#25))] + +(30) ColumnarToRow [codegen id : 2] +Input [3]: [cs_call_center_sk#23, sales#30, profit#31] + +(31) BroadcastExchange +Input [3]: [cs_call_center_sk#23, sales#30, profit#31] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(32) CometNativeScan: `spark_catalog`.`default`.`catalog_returns` +Output [3]: [cr_return_amount#32, cr_net_loss#33, cr_returned_date_sk#34] +Arguments: [cr_return_amount#32, cr_net_loss#33, cr_returned_date_sk#34] + +(33) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#35] + +(34) CometBroadcastHashJoin +Left output [3]: [cr_return_amount#32, cr_net_loss#33, cr_returned_date_sk#34] +Right output [1]: [d_date_sk#35] +Arguments: [cr_returned_date_sk#34], [d_date_sk#35], Inner, BuildRight + +(35) CometProject +Input [4]: [cr_return_amount#32, cr_net_loss#33, cr_returned_date_sk#34, d_date_sk#35] +Arguments: [cr_return_amount#32, cr_net_loss#33], [cr_return_amount#32, cr_net_loss#33] + +(36) CometHashAggregate +Input [2]: [cr_return_amount#32, cr_net_loss#33] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#32)), partial_sum(UnscaledValue(cr_net_loss#33))] + +(37) CometExchange +Input [2]: [sum#36, sum#37] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(38) CometHashAggregate +Input [2]: [sum#36, sum#37] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#32)), sum(UnscaledValue(cr_net_loss#33))] + +(39) ColumnarToRow +Input [2]: [returns#38, profit_loss#39] + +(40) BroadcastNestedLoopJoin [codegen id : 3] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 3] +Output [5]: [catalog channel AS channel#40, cs_call_center_sk#23 AS id#41, sales#30, returns#38, (profit#31 - profit_loss#39) AS profit#42] +Input [5]: [cs_call_center_sk#23, sales#30, profit#31, returns#38, profit_loss#39] + +(42) ReusedExchange [Reuses operator id: 15] +Output [3]: [wp_web_page_sk#43, sum#44, sum#45] + +(43) CometHashAggregate +Input [3]: [wp_web_page_sk#43, sum#44, sum#45] +Keys [1]: [wp_web_page_sk#43] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#46)), sum(UnscaledValue(ws_net_profit#47))] + +(44) ReusedExchange [Reuses operator id: 19] +Output [3]: [wp_web_page_sk#48, returns#49, profit_loss#50] + +(45) CometBroadcastHashJoin +Left output [3]: [wp_web_page_sk#43, sales#51, profit#52] +Right output [3]: [wp_web_page_sk#48, returns#49, profit_loss#50] +Arguments: [wp_web_page_sk#43], [wp_web_page_sk#48], LeftOuter, BuildRight + +(46) CometProject +Input [6]: [wp_web_page_sk#43, sales#51, profit#52, wp_web_page_sk#48, returns#49, profit_loss#50] +Arguments: [channel#53, id#54, sales#51, returns#55, profit#56], [web channel AS channel#53, wp_web_page_sk#43 AS id#54, sales#51, coalesce(returns#49, 0.00) AS returns#55, (profit#52 - coalesce(profit_loss#50, 0.00)) AS profit#56] + +(47) ColumnarToRow [codegen id : 4] +Input [5]: [channel#53, id#54, sales#51, returns#55, profit#56] + +(48) Union + +(49) HashAggregate [codegen id : 5] +Input [5]: [channel#19, id#20, sales#17, returns#21, profit#22] +Keys [2]: [channel#19, id#20] +Functions [3]: [partial_sum(sales#17), partial_sum(returns#21), partial_sum(profit#22)] +Aggregate Attributes [6]: [sum#57, isEmpty#58, sum#59, isEmpty#60, sum#61, isEmpty#62] +Results [8]: [channel#19, id#20, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68] + +(50) Exchange +Input [8]: [channel#19, id#20, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Arguments: hashpartitioning(channel#19, id#20, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(51) HashAggregate [codegen id : 6] +Input [8]: [channel#19, id#20, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Keys [2]: [channel#19, id#20] +Functions [3]: [sum(sales#17), sum(returns#21), sum(profit#22)] +Aggregate Attributes [3]: [sum(sales#17)#69, sum(returns#21)#70, sum(profit#22)#71] +Results [5]: [channel#19, id#20, cast(sum(sales#17)#69 as decimal(37,2)) AS sales#72, cast(sum(returns#21)#70 as decimal(37,2)) AS returns#73, cast(sum(profit#22)#71 as decimal(38,2)) AS profit#74] + +(52) ReusedExchange [Reuses operator id: 50] +Output [8]: [channel#19, id#20, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68] + +(53) HashAggregate [codegen id : 12] +Input [8]: [channel#19, id#20, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Keys [2]: [channel#19, id#20] +Functions [3]: [sum(sales#17), sum(returns#21), sum(profit#22)] +Aggregate Attributes [3]: [sum(sales#17)#69, sum(returns#21)#70, sum(profit#22)#71] +Results [4]: [channel#19, sum(sales#17)#69 AS sales#75, sum(returns#21)#70 AS returns#76, sum(profit#22)#71 AS profit#77] + +(54) HashAggregate [codegen id : 12] +Input [4]: [channel#19, sales#75, returns#76, profit#77] +Keys [1]: [channel#19] +Functions [3]: [partial_sum(sales#75), partial_sum(returns#76), partial_sum(profit#77)] +Aggregate Attributes [6]: [sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83] +Results [7]: [channel#19, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89] + +(55) Exchange +Input [7]: [channel#19, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89] +Arguments: hashpartitioning(channel#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(56) HashAggregate [codegen id : 13] +Input [7]: [channel#19, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89] +Keys [1]: [channel#19] +Functions [3]: [sum(sales#75), sum(returns#76), sum(profit#77)] +Aggregate Attributes [3]: [sum(sales#75)#90, sum(returns#76)#91, sum(profit#77)#92] +Results [5]: [channel#19, null AS id#93, sum(sales#75)#90 AS sales#94, sum(returns#76)#91 AS returns#95, sum(profit#77)#92 AS profit#96] + +(57) ReusedExchange [Reuses operator id: 50] +Output [8]: [channel#19, id#20, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68] + +(58) HashAggregate [codegen id : 19] +Input [8]: [channel#19, id#20, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Keys [2]: [channel#19, id#20] +Functions [3]: [sum(sales#17), sum(returns#21), sum(profit#22)] +Aggregate Attributes [3]: [sum(sales#17)#69, sum(returns#21)#70, sum(profit#22)#71] +Results [3]: [sum(sales#17)#69 AS sales#97, sum(returns#21)#70 AS returns#98, sum(profit#22)#71 AS profit#99] + +(59) HashAggregate [codegen id : 19] +Input [3]: [sales#97, returns#98, profit#99] +Keys: [] +Functions [3]: [partial_sum(sales#97), partial_sum(returns#98), partial_sum(profit#99)] +Aggregate Attributes [6]: [sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Results [6]: [sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111] + +(60) Exchange +Input [6]: [sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(61) HashAggregate [codegen id : 20] +Input [6]: [sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111] +Keys: [] +Functions [3]: [sum(sales#97), sum(returns#98), sum(profit#99)] +Aggregate Attributes [3]: [sum(sales#97)#112, sum(returns#98)#113, sum(profit#99)#114] +Results [5]: [null AS channel#115, null AS id#116, sum(sales#97)#112 AS sales#117, sum(returns#98)#113 AS returns#118, sum(profit#99)#114 AS profit#119] + +(62) Union + +(63) HashAggregate [codegen id : 21] +Input [5]: [channel#19, id#20, sales#72, returns#73, profit#74] +Keys [5]: [channel#19, id#20, sales#72, returns#73, profit#74] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#19, id#20, sales#72, returns#73, profit#74] + +(64) Exchange +Input [5]: [channel#19, id#20, sales#72, returns#73, profit#74] +Arguments: hashpartitioning(channel#19, id#20, sales#72, returns#73, profit#74, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(65) HashAggregate [codegen id : 22] +Input [5]: [channel#19, id#20, sales#72, returns#73, profit#74] +Keys [5]: [channel#19, id#20, sales#72, returns#73, profit#74] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#19, id#20, sales#72, returns#73, profit#74] + +(66) TakeOrderedAndProject +Input [5]: [channel#19, id#20, sales#72, returns#73, profit#74] +Arguments: 100, [channel#19 ASC NULLS FIRST, id#20 ASC NULLS FIRST], [channel#19, id#20, sales#72, returns#73, profit#74] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..1a5f29f9f3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_datafusion/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (22) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (21) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (5) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [s_store_sk,returns,profit,profit_loss] [channel,id,sales,returns,profit] + CometBroadcastHashJoin [s_store_sk,sales,profit,s_store_sk,returns,profit_loss] + CometHashAggregate [s_store_sk,sales,profit,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + CometExchange [s_store_sk] #3 + CometHashAggregate [s_store_sk,sum,sum,ss_ext_sales_price,ss_net_profit] + CometProject [ss_ext_sales_price,ss_net_profit,s_store_sk] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,s_store_sk] + CometProject [ss_store_sk,ss_ext_sales_price,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk] #5 + CometFilter [s_store_sk] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk] + CometBroadcastExchange [s_store_sk,returns,profit_loss] #6 + CometHashAggregate [s_store_sk,returns,profit_loss,sum,sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss))] + ReusedExchange [s_store_sk,sum,sum] #3 + WholeStageCodegen (3) + Project [cs_call_center_sk,sales,returns,profit,profit_loss] + BroadcastNestedLoopJoin + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometHashAggregate [cs_call_center_sk,sales,profit,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] + CometExchange [cs_call_center_sk] #8 + CometHashAggregate [cs_call_center_sk,sum,sum,cs_ext_sales_price,cs_net_profit] + CometProject [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + CometBroadcastHashJoin [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + ReusedExchange [d_date_sk] #4 + ColumnarToRow + InputAdapter + CometHashAggregate [returns,profit_loss,sum,sum,sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss))] + CometExchange #9 + CometHashAggregate [sum,sum,cr_return_amount,cr_net_loss] + CometProject [cr_return_amount,cr_net_loss] + CometBroadcastHashJoin [cr_return_amount,cr_net_loss,cr_returned_date_sk,d_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_returns` [cr_return_amount,cr_net_loss,cr_returned_date_sk] + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometProject [wp_web_page_sk,returns,profit,profit_loss] [channel,id,sales,returns,profit] + CometBroadcastHashJoin [wp_web_page_sk,sales,profit,wp_web_page_sk,returns,profit_loss] + CometHashAggregate [wp_web_page_sk,sales,profit,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] + ReusedExchange [wp_web_page_sk,sum,sum] #3 + ReusedExchange [wp_web_page_sk,returns,profit_loss] #6 + WholeStageCodegen (13) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #10 + WholeStageCodegen (12) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (20) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #11 + WholeStageCodegen (19) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..23d9166530 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_iceberg_compat/explain.txt @@ -0,0 +1,556 @@ +== Physical Plan == +TakeOrderedAndProject (97) ++- * HashAggregate (96) + +- Exchange (95) + +- * HashAggregate (94) + +- Union (93) + :- * HashAggregate (82) + : +- Exchange (81) + : +- * HashAggregate (80) + : +- Union (79) + : :- * ColumnarToRow (31) + : : +- CometProject (30) + : : +- CometBroadcastHashJoin (29) + : : :- CometHashAggregate (16) + : : : +- CometExchange (15) + : : : +- CometHashAggregate (14) + : : : +- CometProject (13) + : : : +- CometBroadcastHashJoin (12) + : : : :- CometProject (8) + : : : : +- CometBroadcastHashJoin (7) + : : : : :- CometFilter (2) + : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : +- CometBroadcastExchange (6) + : : : : +- CometProject (5) + : : : : +- CometFilter (4) + : : : : +- CometScan parquet spark_catalog.default.date_dim (3) + : : : +- CometBroadcastExchange (11) + : : : +- CometFilter (10) + : : : +- CometScan parquet spark_catalog.default.store (9) + : : +- CometBroadcastExchange (28) + : : +- CometHashAggregate (27) + : : +- CometExchange (26) + : : +- CometHashAggregate (25) + : : +- CometProject (24) + : : +- CometBroadcastHashJoin (23) + : : :- CometProject (21) + : : : +- CometBroadcastHashJoin (20) + : : : :- CometFilter (18) + : : : : +- CometScan parquet spark_catalog.default.store_returns (17) + : : : +- ReusedExchange (19) + : : +- ReusedExchange (22) + : :- * Project (50) + : : +- * BroadcastNestedLoopJoin Inner BuildLeft (49) + : : :- BroadcastExchange (40) + : : : +- * ColumnarToRow (39) + : : : +- CometHashAggregate (38) + : : : +- CometExchange (37) + : : : +- CometHashAggregate (36) + : : : +- CometProject (35) + : : : +- CometBroadcastHashJoin (34) + : : : :- CometScan parquet spark_catalog.default.catalog_sales (32) + : : : +- ReusedExchange (33) + : : +- * ColumnarToRow (48) + : : +- CometHashAggregate (47) + : : +- CometExchange (46) + : : +- CometHashAggregate (45) + : : +- CometProject (44) + : : +- CometBroadcastHashJoin (43) + : : :- CometScan parquet spark_catalog.default.catalog_returns (41) + : : +- ReusedExchange (42) + : +- * ColumnarToRow (78) + : +- CometProject (77) + : +- CometBroadcastHashJoin (76) + : :- CometHashAggregate (63) + : : +- CometExchange (62) + : : +- CometHashAggregate (61) + : : +- CometProject (60) + : : +- CometBroadcastHashJoin (59) + : : :- CometProject (55) + : : : +- CometBroadcastHashJoin (54) + : : : :- CometFilter (52) + : : : : +- CometScan parquet spark_catalog.default.web_sales (51) + : : : +- ReusedExchange (53) + : : +- CometBroadcastExchange (58) + : : +- CometFilter (57) + : : +- CometScan parquet spark_catalog.default.web_page (56) + : +- CometBroadcastExchange (75) + : +- CometHashAggregate (74) + : +- CometExchange (73) + : +- CometHashAggregate (72) + : +- CometProject (71) + : +- CometBroadcastHashJoin (70) + : :- CometProject (68) + : : +- CometBroadcastHashJoin (67) + : : :- CometFilter (65) + : : : +- CometScan parquet spark_catalog.default.web_returns (64) + : : +- ReusedExchange (66) + : +- ReusedExchange (69) + :- * HashAggregate (87) + : +- Exchange (86) + : +- * HashAggregate (85) + : +- * HashAggregate (84) + : +- ReusedExchange (83) + +- * HashAggregate (92) + +- Exchange (91) + +- * HashAggregate (90) + +- * HashAggregate (89) + +- ReusedExchange (88) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) CometFilter +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 1998-08-04)) AND (d_date#6 <= 1998-09-03)) AND isnotnull(d_date_sk#5)) + +(5) CometProject +Input [2]: [d_date_sk#5, d_date#6] +Arguments: [d_date_sk#5], [d_date_sk#5] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: [d_date_sk#5] + +(7) CometBroadcastHashJoin +Left output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Right output [1]: [d_date_sk#5] +Arguments: [ss_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + +(8) CometProject +Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#5] +Arguments: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3], [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] + +(9) CometScan parquet spark_catalog.default.store +Output [1]: [s_store_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(10) CometFilter +Input [1]: [s_store_sk#7] +Condition : isnotnull(s_store_sk#7) + +(11) CometBroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: [s_store_sk#7] + +(12) CometBroadcastHashJoin +Left output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] +Right output [1]: [s_store_sk#7] +Arguments: [ss_store_sk#1], [s_store_sk#7], Inner, BuildRight + +(13) CometProject +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Arguments: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7], [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] + +(14) CometHashAggregate +Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Keys [1]: [s_store_sk#7] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] + +(15) CometExchange +Input [3]: [s_store_sk#7, sum#8, sum#9] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [s_store_sk#7, sum#8, sum#9] +Keys [1]: [s_store_sk#7] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] + +(17) CometScan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#13)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(18) CometFilter +Input [4]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] +Condition : isnotnull(sr_store_sk#10) + +(19) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#14] + +(20) CometBroadcastHashJoin +Left output [4]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] +Right output [1]: [d_date_sk#14] +Arguments: [sr_returned_date_sk#13], [d_date_sk#14], Inner, BuildRight + +(21) CometProject +Input [5]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13, d_date_sk#14] +Arguments: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12], [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12] + +(22) ReusedExchange [Reuses operator id: 11] +Output [1]: [s_store_sk#15] + +(23) CometBroadcastHashJoin +Left output [3]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12] +Right output [1]: [s_store_sk#15] +Arguments: [sr_store_sk#10], [s_store_sk#15], Inner, BuildRight + +(24) CometProject +Input [4]: [sr_store_sk#10, sr_return_amt#11, sr_net_loss#12, s_store_sk#15] +Arguments: [sr_return_amt#11, sr_net_loss#12, s_store_sk#15], [sr_return_amt#11, sr_net_loss#12, s_store_sk#15] + +(25) CometHashAggregate +Input [3]: [sr_return_amt#11, sr_net_loss#12, s_store_sk#15] +Keys [1]: [s_store_sk#15] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#11)), partial_sum(UnscaledValue(sr_net_loss#12))] + +(26) CometExchange +Input [3]: [s_store_sk#15, sum#16, sum#17] +Arguments: hashpartitioning(s_store_sk#15, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(27) CometHashAggregate +Input [3]: [s_store_sk#15, sum#16, sum#17] +Keys [1]: [s_store_sk#15] +Functions [2]: [sum(UnscaledValue(sr_return_amt#11)), sum(UnscaledValue(sr_net_loss#12))] + +(28) CometBroadcastExchange +Input [3]: [s_store_sk#15, returns#18, profit_loss#19] +Arguments: [s_store_sk#15, returns#18, profit_loss#19] + +(29) CometBroadcastHashJoin +Left output [3]: [s_store_sk#7, sales#20, profit#21] +Right output [3]: [s_store_sk#15, returns#18, profit_loss#19] +Arguments: [s_store_sk#7], [s_store_sk#15], LeftOuter, BuildRight + +(30) CometProject +Input [6]: [s_store_sk#7, sales#20, profit#21, s_store_sk#15, returns#18, profit_loss#19] +Arguments: [channel#22, id#23, sales#20, returns#24, profit#25], [store channel AS channel#22, s_store_sk#7 AS id#23, sales#20, coalesce(returns#18, 0.00) AS returns#24, (profit#21 - coalesce(profit_loss#19, 0.00)) AS profit#25] + +(31) ColumnarToRow [codegen id : 1] +Input [5]: [channel#22, id#23, sales#20, returns#24, profit#25] + +(32) CometScan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28, cs_sold_date_sk#29] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#29)] +ReadSchema: struct + +(33) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#30] + +(34) CometBroadcastHashJoin +Left output [4]: [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28, cs_sold_date_sk#29] +Right output [1]: [d_date_sk#30] +Arguments: [cs_sold_date_sk#29], [d_date_sk#30], Inner, BuildRight + +(35) CometProject +Input [5]: [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28, cs_sold_date_sk#29, d_date_sk#30] +Arguments: [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28], [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28] + +(36) CometHashAggregate +Input [3]: [cs_call_center_sk#26, cs_ext_sales_price#27, cs_net_profit#28] +Keys [1]: [cs_call_center_sk#26] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#27)), partial_sum(UnscaledValue(cs_net_profit#28))] + +(37) CometExchange +Input [3]: [cs_call_center_sk#26, sum#31, sum#32] +Arguments: hashpartitioning(cs_call_center_sk#26, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(38) CometHashAggregate +Input [3]: [cs_call_center_sk#26, sum#31, sum#32] +Keys [1]: [cs_call_center_sk#26] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#27)), sum(UnscaledValue(cs_net_profit#28))] + +(39) ColumnarToRow [codegen id : 2] +Input [3]: [cs_call_center_sk#26, sales#33, profit#34] + +(40) BroadcastExchange +Input [3]: [cs_call_center_sk#26, sales#33, profit#34] +Arguments: IdentityBroadcastMode, [plan_id=4] + +(41) CometScan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_return_amount#35, cr_net_loss#36, cr_returned_date_sk#37] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#37)] +ReadSchema: struct + +(42) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#38] + +(43) CometBroadcastHashJoin +Left output [3]: [cr_return_amount#35, cr_net_loss#36, cr_returned_date_sk#37] +Right output [1]: [d_date_sk#38] +Arguments: [cr_returned_date_sk#37], [d_date_sk#38], Inner, BuildRight + +(44) CometProject +Input [4]: [cr_return_amount#35, cr_net_loss#36, cr_returned_date_sk#37, d_date_sk#38] +Arguments: [cr_return_amount#35, cr_net_loss#36], [cr_return_amount#35, cr_net_loss#36] + +(45) CometHashAggregate +Input [2]: [cr_return_amount#35, cr_net_loss#36] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#35)), partial_sum(UnscaledValue(cr_net_loss#36))] + +(46) CometExchange +Input [2]: [sum#39, sum#40] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(47) CometHashAggregate +Input [2]: [sum#39, sum#40] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#35)), sum(UnscaledValue(cr_net_loss#36))] + +(48) ColumnarToRow +Input [2]: [returns#41, profit_loss#42] + +(49) BroadcastNestedLoopJoin [codegen id : 3] +Join type: Inner +Join condition: None + +(50) Project [codegen id : 3] +Output [5]: [catalog channel AS channel#43, cs_call_center_sk#26 AS id#44, sales#33, returns#41, (profit#34 - profit_loss#42) AS profit#45] +Input [5]: [cs_call_center_sk#26, sales#33, profit#34, returns#41, profit_loss#42] + +(51) CometScan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48, ws_sold_date_sk#49] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#49)] +PushedFilters: [IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(52) CometFilter +Input [4]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48, ws_sold_date_sk#49] +Condition : isnotnull(ws_web_page_sk#46) + +(53) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#50] + +(54) CometBroadcastHashJoin +Left output [4]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48, ws_sold_date_sk#49] +Right output [1]: [d_date_sk#50] +Arguments: [ws_sold_date_sk#49], [d_date_sk#50], Inner, BuildRight + +(55) CometProject +Input [5]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48, ws_sold_date_sk#49, d_date_sk#50] +Arguments: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48], [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48] + +(56) CometScan parquet spark_catalog.default.web_page +Output [1]: [wp_web_page_sk#51] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(57) CometFilter +Input [1]: [wp_web_page_sk#51] +Condition : isnotnull(wp_web_page_sk#51) + +(58) CometBroadcastExchange +Input [1]: [wp_web_page_sk#51] +Arguments: [wp_web_page_sk#51] + +(59) CometBroadcastHashJoin +Left output [3]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48] +Right output [1]: [wp_web_page_sk#51] +Arguments: [ws_web_page_sk#46], [wp_web_page_sk#51], Inner, BuildRight + +(60) CometProject +Input [4]: [ws_web_page_sk#46, ws_ext_sales_price#47, ws_net_profit#48, wp_web_page_sk#51] +Arguments: [ws_ext_sales_price#47, ws_net_profit#48, wp_web_page_sk#51], [ws_ext_sales_price#47, ws_net_profit#48, wp_web_page_sk#51] + +(61) CometHashAggregate +Input [3]: [ws_ext_sales_price#47, ws_net_profit#48, wp_web_page_sk#51] +Keys [1]: [wp_web_page_sk#51] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#47)), partial_sum(UnscaledValue(ws_net_profit#48))] + +(62) CometExchange +Input [3]: [wp_web_page_sk#51, sum#52, sum#53] +Arguments: hashpartitioning(wp_web_page_sk#51, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(63) CometHashAggregate +Input [3]: [wp_web_page_sk#51, sum#52, sum#53] +Keys [1]: [wp_web_page_sk#51] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#47)), sum(UnscaledValue(ws_net_profit#48))] + +(64) CometScan parquet spark_catalog.default.web_returns +Output [4]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56, wr_returned_date_sk#57] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#57)] +PushedFilters: [IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(65) CometFilter +Input [4]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56, wr_returned_date_sk#57] +Condition : isnotnull(wr_web_page_sk#54) + +(66) ReusedExchange [Reuses operator id: 6] +Output [1]: [d_date_sk#58] + +(67) CometBroadcastHashJoin +Left output [4]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56, wr_returned_date_sk#57] +Right output [1]: [d_date_sk#58] +Arguments: [wr_returned_date_sk#57], [d_date_sk#58], Inner, BuildRight + +(68) CometProject +Input [5]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56, wr_returned_date_sk#57, d_date_sk#58] +Arguments: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56], [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56] + +(69) ReusedExchange [Reuses operator id: 58] +Output [1]: [wp_web_page_sk#59] + +(70) CometBroadcastHashJoin +Left output [3]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56] +Right output [1]: [wp_web_page_sk#59] +Arguments: [wr_web_page_sk#54], [wp_web_page_sk#59], Inner, BuildRight + +(71) CometProject +Input [4]: [wr_web_page_sk#54, wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#59] +Arguments: [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#59], [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#59] + +(72) CometHashAggregate +Input [3]: [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#59] +Keys [1]: [wp_web_page_sk#59] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#55)), partial_sum(UnscaledValue(wr_net_loss#56))] + +(73) CometExchange +Input [3]: [wp_web_page_sk#59, sum#60, sum#61] +Arguments: hashpartitioning(wp_web_page_sk#59, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(74) CometHashAggregate +Input [3]: [wp_web_page_sk#59, sum#60, sum#61] +Keys [1]: [wp_web_page_sk#59] +Functions [2]: [sum(UnscaledValue(wr_return_amt#55)), sum(UnscaledValue(wr_net_loss#56))] + +(75) CometBroadcastExchange +Input [3]: [wp_web_page_sk#59, returns#62, profit_loss#63] +Arguments: [wp_web_page_sk#59, returns#62, profit_loss#63] + +(76) CometBroadcastHashJoin +Left output [3]: [wp_web_page_sk#51, sales#64, profit#65] +Right output [3]: [wp_web_page_sk#59, returns#62, profit_loss#63] +Arguments: [wp_web_page_sk#51], [wp_web_page_sk#59], LeftOuter, BuildRight + +(77) CometProject +Input [6]: [wp_web_page_sk#51, sales#64, profit#65, wp_web_page_sk#59, returns#62, profit_loss#63] +Arguments: [channel#66, id#67, sales#64, returns#68, profit#69], [web channel AS channel#66, wp_web_page_sk#51 AS id#67, sales#64, coalesce(returns#62, 0.00) AS returns#68, (profit#65 - coalesce(profit_loss#63, 0.00)) AS profit#69] + +(78) ColumnarToRow [codegen id : 4] +Input [5]: [channel#66, id#67, sales#64, returns#68, profit#69] + +(79) Union + +(80) HashAggregate [codegen id : 5] +Input [5]: [channel#22, id#23, sales#20, returns#24, profit#25] +Keys [2]: [channel#22, id#23] +Functions [3]: [partial_sum(sales#20), partial_sum(returns#24), partial_sum(profit#25)] +Aggregate Attributes [6]: [sum#70, isEmpty#71, sum#72, isEmpty#73, sum#74, isEmpty#75] +Results [8]: [channel#22, id#23, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81] + +(81) Exchange +Input [8]: [channel#22, id#23, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81] +Arguments: hashpartitioning(channel#22, id#23, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(82) HashAggregate [codegen id : 6] +Input [8]: [channel#22, id#23, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81] +Keys [2]: [channel#22, id#23] +Functions [3]: [sum(sales#20), sum(returns#24), sum(profit#25)] +Aggregate Attributes [3]: [sum(sales#20)#82, sum(returns#24)#83, sum(profit#25)#84] +Results [5]: [channel#22, id#23, cast(sum(sales#20)#82 as decimal(37,2)) AS sales#85, cast(sum(returns#24)#83 as decimal(37,2)) AS returns#86, cast(sum(profit#25)#84 as decimal(38,2)) AS profit#87] + +(83) ReusedExchange [Reuses operator id: 81] +Output [8]: [channel#22, id#23, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81] + +(84) HashAggregate [codegen id : 12] +Input [8]: [channel#22, id#23, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81] +Keys [2]: [channel#22, id#23] +Functions [3]: [sum(sales#20), sum(returns#24), sum(profit#25)] +Aggregate Attributes [3]: [sum(sales#20)#82, sum(returns#24)#83, sum(profit#25)#84] +Results [4]: [channel#22, sum(sales#20)#82 AS sales#88, sum(returns#24)#83 AS returns#89, sum(profit#25)#84 AS profit#90] + +(85) HashAggregate [codegen id : 12] +Input [4]: [channel#22, sales#88, returns#89, profit#90] +Keys [1]: [channel#22] +Functions [3]: [partial_sum(sales#88), partial_sum(returns#89), partial_sum(profit#90)] +Aggregate Attributes [6]: [sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96] +Results [7]: [channel#22, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102] + +(86) Exchange +Input [7]: [channel#22, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Arguments: hashpartitioning(channel#22, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(87) HashAggregate [codegen id : 13] +Input [7]: [channel#22, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Keys [1]: [channel#22] +Functions [3]: [sum(sales#88), sum(returns#89), sum(profit#90)] +Aggregate Attributes [3]: [sum(sales#88)#103, sum(returns#89)#104, sum(profit#90)#105] +Results [5]: [channel#22, null AS id#106, sum(sales#88)#103 AS sales#107, sum(returns#89)#104 AS returns#108, sum(profit#90)#105 AS profit#109] + +(88) ReusedExchange [Reuses operator id: 81] +Output [8]: [channel#22, id#23, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81] + +(89) HashAggregate [codegen id : 19] +Input [8]: [channel#22, id#23, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81] +Keys [2]: [channel#22, id#23] +Functions [3]: [sum(sales#20), sum(returns#24), sum(profit#25)] +Aggregate Attributes [3]: [sum(sales#20)#82, sum(returns#24)#83, sum(profit#25)#84] +Results [3]: [sum(sales#20)#82 AS sales#110, sum(returns#24)#83 AS returns#111, sum(profit#25)#84 AS profit#112] + +(90) HashAggregate [codegen id : 19] +Input [3]: [sales#110, returns#111, profit#112] +Keys: [] +Functions [3]: [partial_sum(sales#110), partial_sum(returns#111), partial_sum(profit#112)] +Aggregate Attributes [6]: [sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118] +Results [6]: [sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124] + +(91) Exchange +Input [6]: [sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(92) HashAggregate [codegen id : 20] +Input [6]: [sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124] +Keys: [] +Functions [3]: [sum(sales#110), sum(returns#111), sum(profit#112)] +Aggregate Attributes [3]: [sum(sales#110)#125, sum(returns#111)#126, sum(profit#112)#127] +Results [5]: [null AS channel#128, null AS id#129, sum(sales#110)#125 AS sales#130, sum(returns#111)#126 AS returns#131, sum(profit#112)#127 AS profit#132] + +(93) Union + +(94) HashAggregate [codegen id : 21] +Input [5]: [channel#22, id#23, sales#85, returns#86, profit#87] +Keys [5]: [channel#22, id#23, sales#85, returns#86, profit#87] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#22, id#23, sales#85, returns#86, profit#87] + +(95) Exchange +Input [5]: [channel#22, id#23, sales#85, returns#86, profit#87] +Arguments: hashpartitioning(channel#22, id#23, sales#85, returns#86, profit#87, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(96) HashAggregate [codegen id : 22] +Input [5]: [channel#22, id#23, sales#85, returns#86, profit#87] +Keys [5]: [channel#22, id#23, sales#85, returns#86, profit#87] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#22, id#23, sales#85, returns#86, profit#87] + +(97) TakeOrderedAndProject +Input [5]: [channel#22, id#23, sales#85, returns#86, profit#87] +Arguments: 100, [channel#22 ASC NULLS FIRST, id#23 ASC NULLS FIRST], [channel#22, id#23, sales#85, returns#86, profit#87] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..20f766cf82 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q77a.native_iceberg_compat/simplified.txt @@ -0,0 +1,122 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (22) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (21) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (5) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometProject [s_store_sk,returns,profit,profit_loss] [channel,id,sales,returns,profit] + CometBroadcastHashJoin [s_store_sk,sales,profit,s_store_sk,returns,profit_loss] + CometHashAggregate [s_store_sk,sales,profit,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + CometExchange [s_store_sk] #3 + CometHashAggregate [s_store_sk,sum,sum,ss_ext_sales_price,ss_net_profit] + CometProject [ss_ext_sales_price,ss_net_profit,s_store_sk] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,s_store_sk] + CometProject [ss_store_sk,ss_ext_sales_price,ss_net_profit] + CometBroadcastHashJoin [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,d_date_sk] + CometFilter [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk] #5 + CometFilter [s_store_sk] + CometScan parquet spark_catalog.default.store [s_store_sk] + CometBroadcastExchange [s_store_sk,returns,profit_loss] #6 + CometHashAggregate [s_store_sk,returns,profit_loss,sum,sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss))] + CometExchange [s_store_sk] #7 + CometHashAggregate [s_store_sk,sum,sum,sr_return_amt,sr_net_loss] + CometProject [sr_return_amt,sr_net_loss,s_store_sk] + CometBroadcastHashJoin [sr_store_sk,sr_return_amt,sr_net_loss,s_store_sk] + CometProject [sr_store_sk,sr_return_amt,sr_net_loss] + CometBroadcastHashJoin [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk,d_date_sk] + CometFilter [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + ReusedExchange [d_date_sk] #4 + ReusedExchange [s_store_sk] #5 + WholeStageCodegen (3) + Project [cs_call_center_sk,sales,returns,profit,profit_loss] + BroadcastNestedLoopJoin + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + CometHashAggregate [cs_call_center_sk,sales,profit,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] + CometExchange [cs_call_center_sk] #9 + CometHashAggregate [cs_call_center_sk,sum,sum,cs_ext_sales_price,cs_net_profit] + CometProject [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + CometBroadcastHashJoin [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + ReusedExchange [d_date_sk] #4 + ColumnarToRow + InputAdapter + CometHashAggregate [returns,profit_loss,sum,sum,sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss))] + CometExchange #10 + CometHashAggregate [sum,sum,cr_return_amount,cr_net_loss] + CometProject [cr_return_amount,cr_net_loss] + CometBroadcastHashJoin [cr_return_amount,cr_net_loss,cr_returned_date_sk,d_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk] + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (4) + ColumnarToRow + InputAdapter + CometProject [wp_web_page_sk,returns,profit,profit_loss] [channel,id,sales,returns,profit] + CometBroadcastHashJoin [wp_web_page_sk,sales,profit,wp_web_page_sk,returns,profit_loss] + CometHashAggregate [wp_web_page_sk,sales,profit,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] + CometExchange [wp_web_page_sk] #11 + CometHashAggregate [wp_web_page_sk,sum,sum,ws_ext_sales_price,ws_net_profit] + CometProject [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + CometBroadcastHashJoin [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + CometProject [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + CometBroadcastHashJoin [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,d_date_sk] + CometFilter [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + ReusedExchange [d_date_sk] #4 + CometBroadcastExchange [wp_web_page_sk] #12 + CometFilter [wp_web_page_sk] + CometScan parquet spark_catalog.default.web_page [wp_web_page_sk] + CometBroadcastExchange [wp_web_page_sk,returns,profit_loss] #13 + CometHashAggregate [wp_web_page_sk,returns,profit_loss,sum,sum,sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss))] + CometExchange [wp_web_page_sk] #14 + CometHashAggregate [wp_web_page_sk,sum,sum,wr_return_amt,wr_net_loss] + CometProject [wr_return_amt,wr_net_loss,wp_web_page_sk] + CometBroadcastHashJoin [wr_web_page_sk,wr_return_amt,wr_net_loss,wp_web_page_sk] + CometProject [wr_web_page_sk,wr_return_amt,wr_net_loss] + CometBroadcastHashJoin [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk,d_date_sk] + CometFilter [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk] + ReusedExchange [d_date_sk] #4 + ReusedExchange [wp_web_page_sk] #12 + WholeStageCodegen (13) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #15 + WholeStageCodegen (12) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (20) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #16 + WholeStageCodegen (19) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_datafusion/explain.txt new file mode 100644 index 0000000000..03ce3dd98b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_datafusion/explain.txt @@ -0,0 +1,250 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Project (47) + +- * ColumnarToRow (46) + +- CometSortMergeJoin (45) + :- CometProject (27) + : +- CometSortMergeJoin (26) + : :- CometSort (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometFilter (11) + : : : +- CometSortMergeJoin (10) + : : : :- CometSort (4) + : : : : +- CometExchange (3) + : : : : +- CometFilter (2) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : +- CometSort (9) + : : : +- CometExchange (8) + : : : +- CometProject (7) + : : : +- CometFilter (6) + : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (5) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (13) + : +- CometSort (25) + : +- CometFilter (24) + : +- CometHashAggregate (23) + : +- ReusedExchange (22) + +- CometSort (44) + +- CometFilter (43) + +- CometHashAggregate (42) + +- CometExchange (41) + +- CometHashAggregate (40) + +- CometProject (39) + +- CometBroadcastHashJoin (38) + :- CometProject (36) + : +- CometFilter (35) + : +- CometSortMergeJoin (34) + : :- CometSort (31) + : : +- CometExchange (30) + : : +- CometFilter (29) + : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (28) + : +- CometSort (33) + : +- ReusedExchange (32) + +- ReusedExchange (37) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) + +(3) CometExchange +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7], [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(5) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(6) CometFilter +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(7) CometProject +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Arguments: [sr_item_sk#8, sr_ticket_number#9], [sr_item_sk#8, sr_ticket_number#9] + +(8) CometExchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [sr_item_sk#8, sr_ticket_number#9], [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [ss_ticket_number#3, ss_item_sk#1], [sr_ticket_number#9, sr_item_sk#8], LeftOuter + +(11) CometFilter +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(12) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7], [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(13) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#11, d_year#12] +Arguments: [d_date_sk#11, d_year#12] + +(14) CometFilter +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(15) CometBroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: [d_date_sk#11, d_year#12] + +(16) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#11, d_year#12] +Arguments: [ss_sold_date_sk#7], [d_date_sk#11], Inner, BuildRight + +(17) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#11, d_year#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12], [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] + +(18) CometHashAggregate +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] + +(19) CometExchange +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#13, sum#14, sum#15] +Arguments: hashpartitioning(d_year#12, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometHashAggregate +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#13, sum#14, sum#15] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] + +(21) CometSort +Input [6]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19], [ss_sold_year#16 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST] + +(22) ReusedExchange [Reuses operator id: 19] +Output [6]: [d_year#20, ws_item_sk#21, ws_bill_customer_sk#22, sum#23, sum#24, sum#25] + +(23) CometHashAggregate +Input [6]: [d_year#20, ws_item_sk#21, ws_bill_customer_sk#22, sum#23, sum#24, sum#25] +Keys [3]: [d_year#20, ws_item_sk#21, ws_bill_customer_sk#22] +Functions [3]: [sum(ws_quantity#26), sum(UnscaledValue(ws_wholesale_cost#27)), sum(UnscaledValue(ws_sales_price#28))] + +(24) CometFilter +Input [6]: [ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30, ws_qty#31, ws_wc#32, ws_sp#33] +Condition : (coalesce(ws_qty#31, 0) > 0) + +(25) CometSort +Input [6]: [ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30, ws_qty#31, ws_wc#32, ws_sp#33] +Arguments: [ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30, ws_qty#31, ws_wc#32, ws_sp#33], [ws_sold_year#29 ASC NULLS FIRST, ws_item_sk#21 ASC NULLS FIRST, ws_customer_sk#30 ASC NULLS FIRST] + +(26) CometSortMergeJoin +Left output [6]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Right output [6]: [ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30, ws_qty#31, ws_wc#32, ws_sp#33] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2], [ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30], Inner + +(27) CometProject +Input [12]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_sold_year#29, ws_item_sk#21, ws_customer_sk#30, ws_qty#31, ws_wc#32, ws_sp#33] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, ws_wc#32, ws_sp#33], [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, ws_wc#32, ws_sp#33] + +(28) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [7]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Arguments: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] + +(29) CometFilter +Input [7]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Condition : (isnotnull(cs_item_sk#35) AND isnotnull(cs_bill_customer_sk#34)) + +(30) CometExchange +Input [7]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Arguments: hashpartitioning(cs_order_number#36, cs_item_sk#35, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(31) CometSort +Input [7]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Arguments: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40], [cs_order_number#36 ASC NULLS FIRST, cs_item_sk#35 ASC NULLS FIRST] + +(32) ReusedExchange [Reuses operator id: 8] +Output [2]: [cr_item_sk#41, cr_order_number#42] + +(33) CometSort +Input [2]: [cr_item_sk#41, cr_order_number#42] +Arguments: [cr_item_sk#41, cr_order_number#42], [cr_order_number#42 ASC NULLS FIRST, cr_item_sk#41 ASC NULLS FIRST] + +(34) CometSortMergeJoin +Left output [7]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Right output [2]: [cr_item_sk#41, cr_order_number#42] +Arguments: [cs_order_number#36, cs_item_sk#35], [cr_order_number#42, cr_item_sk#41], LeftOuter + +(35) CometFilter +Input [9]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40, cr_item_sk#41, cr_order_number#42] +Condition : isnull(cr_order_number#42) + +(36) CometProject +Input [9]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_order_number#36, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40, cr_item_sk#41, cr_order_number#42] +Arguments: [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40], [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] + +(37) ReusedExchange [Reuses operator id: 15] +Output [2]: [d_date_sk#43, d_year#44] + +(38) CometBroadcastHashJoin +Left output [6]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40] +Right output [2]: [d_date_sk#43, d_year#44] +Arguments: [cs_sold_date_sk#40], [d_date_sk#43], Inner, BuildRight + +(39) CometProject +Input [8]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, cs_sold_date_sk#40, d_date_sk#43, d_year#44] +Arguments: [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, d_year#44], [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, d_year#44] + +(40) CometHashAggregate +Input [6]: [cs_bill_customer_sk#34, cs_item_sk#35, cs_quantity#37, cs_wholesale_cost#38, cs_sales_price#39, d_year#44] +Keys [3]: [d_year#44, cs_item_sk#35, cs_bill_customer_sk#34] +Functions [3]: [partial_sum(cs_quantity#37), partial_sum(UnscaledValue(cs_wholesale_cost#38)), partial_sum(UnscaledValue(cs_sales_price#39))] + +(41) CometExchange +Input [6]: [d_year#44, cs_item_sk#35, cs_bill_customer_sk#34, sum#45, sum#46, sum#47] +Arguments: hashpartitioning(d_year#44, cs_item_sk#35, cs_bill_customer_sk#34, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(42) CometHashAggregate +Input [6]: [d_year#44, cs_item_sk#35, cs_bill_customer_sk#34, sum#45, sum#46, sum#47] +Keys [3]: [d_year#44, cs_item_sk#35, cs_bill_customer_sk#34] +Functions [3]: [sum(cs_quantity#37), sum(UnscaledValue(cs_wholesale_cost#38)), sum(UnscaledValue(cs_sales_price#39))] + +(43) CometFilter +Input [6]: [cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52] +Condition : (coalesce(cs_qty#50, 0) > 0) + +(44) CometSort +Input [6]: [cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52] +Arguments: [cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52], [cs_sold_year#48 ASC NULLS FIRST, cs_item_sk#35 ASC NULLS FIRST, cs_customer_sk#49 ASC NULLS FIRST] + +(45) CometSortMergeJoin +Left output [9]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, ws_wc#32, ws_sp#33] +Right output [6]: [cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2], [cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49], Inner + +(46) ColumnarToRow [codegen id : 1] +Input [15]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, ws_wc#32, ws_sp#33, cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52] + +(47) Project [codegen id : 1] +Output [13]: [round((cast(ss_qty#17 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(coalesce((ws_qty#31 + cs_qty#50), 1) as double)))), 2) AS ratio#53, ss_qty#17 AS store_qty#54, ss_wc#18 AS store_wholesale_cost#55, ss_sp#19 AS store_sales_price#56, (coalesce(ws_qty#31, 0) + coalesce(cs_qty#50, 0)) AS other_chan_qty#57, (coalesce(ws_wc#32, 0.00) + coalesce(cs_wc#51, 0.00)) AS other_chan_wholesale_cost#58, (coalesce(ws_sp#33, 0.00) + coalesce(cs_sp#52, 0.00)) AS other_chan_sales_price#59, ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Input [15]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#31, ws_wc#32, ws_sp#33, cs_sold_year#48, cs_item_sk#35, cs_customer_sk#49, cs_qty#50, cs_wc#51, cs_sp#52] + +(48) TakeOrderedAndProject +Input [13]: [ratio#53, store_qty#54, store_wholesale_cost#55, store_sales_price#56, other_chan_qty#57, other_chan_wholesale_cost#58, other_chan_sales_price#59, ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Arguments: 100, [ss_sold_year#16 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST, ss_qty#17 DESC NULLS LAST, ss_wc#18 DESC NULLS LAST, ss_sp#19 DESC NULLS LAST, other_chan_qty#57 ASC NULLS FIRST, other_chan_wholesale_cost#58 ASC NULLS FIRST, other_chan_sales_price#59 ASC NULLS FIRST, ratio#53 ASC NULLS FIRST], [ratio#53, store_qty#54, store_wholesale_cost#55, store_sales_price#56, other_chan_qty#57, other_chan_wholesale_cost#58, other_chan_sales_price#59] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ebcb3c8230 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_datafusion/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ratio,store_qty,store_wholesale_cost,store_sales_price] + WholeStageCodegen (1) + Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp,ss_sold_year,ss_item_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + CometSortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp,cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] + CometSortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometSort [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp] + CometHashAggregate [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,d_year,sum,sum,sum,sum(ss_quantity),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price))] + CometExchange [d_year,ss_item_sk,ss_customer_sk] #1 + CometHashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum,ss_quantity,ss_wholesale_cost,ss_sales_price] + CometProject [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometExchange [ss_ticket_number,ss_item_sk] #2 + CometFilter [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #3 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #4 + CometFilter [d_date_sk,d_year] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_year] + CometSort [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometFilter [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometHashAggregate [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp,d_year,ws_bill_customer_sk,sum,sum,sum,sum(ws_quantity),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price))] + ReusedExchange [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum] #1 + CometSort [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometFilter [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometHashAggregate [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp,d_year,cs_bill_customer_sk,sum,sum,sum,sum(cs_quantity),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price))] + CometExchange [d_year,cs_item_sk,cs_bill_customer_sk] #5 + CometHashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum,cs_quantity,cs_wholesale_cost,cs_sales_price] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,d_date_sk,d_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number] + CometSortMergeJoin [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number] + CometSort [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometExchange [cs_order_number,cs_item_sk] #6 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number] + ReusedExchange [cr_item_sk,cr_order_number] #3 + ReusedExchange [d_date_sk,d_year] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..39968ffc00 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_iceberg_compat/explain.txt @@ -0,0 +1,373 @@ +== Physical Plan == +TakeOrderedAndProject (67) ++- * Project (66) + +- * ColumnarToRow (65) + +- CometSortMergeJoin (64) + :- CometProject (43) + : +- CometSortMergeJoin (42) + : :- CometSort (21) + : : +- CometHashAggregate (20) + : : +- CometExchange (19) + : : +- CometHashAggregate (18) + : : +- CometProject (17) + : : +- CometBroadcastHashJoin (16) + : : :- CometProject (12) + : : : +- CometFilter (11) + : : : +- CometSortMergeJoin (10) + : : : :- CometSort (4) + : : : : +- CometExchange (3) + : : : : +- CometFilter (2) + : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : +- CometSort (9) + : : : +- CometExchange (8) + : : : +- CometProject (7) + : : : +- CometFilter (6) + : : : +- CometScan parquet spark_catalog.default.store_returns (5) + : : +- CometBroadcastExchange (15) + : : +- CometFilter (14) + : : +- CometScan parquet spark_catalog.default.date_dim (13) + : +- CometSort (41) + : +- CometFilter (40) + : +- CometHashAggregate (39) + : +- CometExchange (38) + : +- CometHashAggregate (37) + : +- CometProject (36) + : +- CometBroadcastHashJoin (35) + : :- CometProject (33) + : : +- CometFilter (32) + : : +- CometSortMergeJoin (31) + : : :- CometSort (25) + : : : +- CometExchange (24) + : : : +- CometFilter (23) + : : : +- CometScan parquet spark_catalog.default.web_sales (22) + : : +- CometSort (30) + : : +- CometExchange (29) + : : +- CometProject (28) + : : +- CometFilter (27) + : : +- CometScan parquet spark_catalog.default.web_returns (26) + : +- ReusedExchange (34) + +- CometSort (63) + +- CometFilter (62) + +- CometHashAggregate (61) + +- CometExchange (60) + +- CometHashAggregate (59) + +- CometProject (58) + +- CometBroadcastHashJoin (57) + :- CometProject (55) + : +- CometFilter (54) + : +- CometSortMergeJoin (53) + : :- CometSort (47) + : : +- CometExchange (46) + : : +- CometFilter (45) + : : +- CometScan parquet spark_catalog.default.catalog_sales (44) + : +- CometSort (52) + : +- CometExchange (51) + : +- CometProject (50) + : +- CometFilter (49) + : +- CometScan parquet spark_catalog.default.catalog_returns (48) + +- ReusedExchange (56) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) CometFilter +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) + +(3) CometExchange +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7], [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST] + +(5) CometScan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(6) CometFilter +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(7) CometProject +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Arguments: [sr_item_sk#8, sr_ticket_number#9], [sr_item_sk#8, sr_ticket_number#9] + +(8) CometExchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [sr_item_sk#8, sr_ticket_number#9], [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [ss_ticket_number#3, ss_item_sk#1], [sr_ticket_number#9, sr_item_sk#8], LeftOuter + +(11) CometFilter +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(12) CometProject +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7], [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(13) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#11, d_year#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(14) CometFilter +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(15) CometBroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: [d_date_sk#11, d_year#12] + +(16) CometBroadcastHashJoin +Left output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Right output [2]: [d_date_sk#11, d_year#12] +Arguments: [ss_sold_date_sk#7], [d_date_sk#11], Inner, BuildRight + +(17) CometProject +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#11, d_year#12] +Arguments: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12], [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] + +(18) CometHashAggregate +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] + +(19) CometExchange +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#13, sum#14, sum#15] +Arguments: hashpartitioning(d_year#12, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(20) CometHashAggregate +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#13, sum#14, sum#15] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] + +(21) CometSort +Input [6]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19], [ss_sold_year#16 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST] + +(22) CometScan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#26)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(23) CometFilter +Input [7]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Condition : (isnotnull(ws_item_sk#20) AND isnotnull(ws_bill_customer_sk#21)) + +(24) CometExchange +Input [7]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Arguments: hashpartitioning(ws_order_number#22, ws_item_sk#20, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(25) CometSort +Input [7]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Arguments: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26], [ws_order_number#22 ASC NULLS FIRST, ws_item_sk#20 ASC NULLS FIRST] + +(26) CometScan parquet spark_catalog.default.web_returns +Output [3]: [wr_item_sk#27, wr_order_number#28, wr_returned_date_sk#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(27) CometFilter +Input [3]: [wr_item_sk#27, wr_order_number#28, wr_returned_date_sk#29] +Condition : (isnotnull(wr_order_number#28) AND isnotnull(wr_item_sk#27)) + +(28) CometProject +Input [3]: [wr_item_sk#27, wr_order_number#28, wr_returned_date_sk#29] +Arguments: [wr_item_sk#27, wr_order_number#28], [wr_item_sk#27, wr_order_number#28] + +(29) CometExchange +Input [2]: [wr_item_sk#27, wr_order_number#28] +Arguments: hashpartitioning(wr_order_number#28, wr_item_sk#27, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(30) CometSort +Input [2]: [wr_item_sk#27, wr_order_number#28] +Arguments: [wr_item_sk#27, wr_order_number#28], [wr_order_number#28 ASC NULLS FIRST, wr_item_sk#27 ASC NULLS FIRST] + +(31) CometSortMergeJoin +Left output [7]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Right output [2]: [wr_item_sk#27, wr_order_number#28] +Arguments: [ws_order_number#22, ws_item_sk#20], [wr_order_number#28, wr_item_sk#27], LeftOuter + +(32) CometFilter +Input [9]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26, wr_item_sk#27, wr_order_number#28] +Condition : isnull(wr_order_number#28) + +(33) CometProject +Input [9]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_order_number#22, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26, wr_item_sk#27, wr_order_number#28] +Arguments: [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26], [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] + +(34) ReusedExchange [Reuses operator id: 15] +Output [2]: [d_date_sk#30, d_year#31] + +(35) CometBroadcastHashJoin +Left output [6]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26] +Right output [2]: [d_date_sk#30, d_year#31] +Arguments: [ws_sold_date_sk#26], [d_date_sk#30], Inner, BuildRight + +(36) CometProject +Input [8]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, ws_sold_date_sk#26, d_date_sk#30, d_year#31] +Arguments: [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, d_year#31], [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, d_year#31] + +(37) CometHashAggregate +Input [6]: [ws_item_sk#20, ws_bill_customer_sk#21, ws_quantity#23, ws_wholesale_cost#24, ws_sales_price#25, d_year#31] +Keys [3]: [d_year#31, ws_item_sk#20, ws_bill_customer_sk#21] +Functions [3]: [partial_sum(ws_quantity#23), partial_sum(UnscaledValue(ws_wholesale_cost#24)), partial_sum(UnscaledValue(ws_sales_price#25))] + +(38) CometExchange +Input [6]: [d_year#31, ws_item_sk#20, ws_bill_customer_sk#21, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(d_year#31, ws_item_sk#20, ws_bill_customer_sk#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(39) CometHashAggregate +Input [6]: [d_year#31, ws_item_sk#20, ws_bill_customer_sk#21, sum#32, sum#33, sum#34] +Keys [3]: [d_year#31, ws_item_sk#20, ws_bill_customer_sk#21] +Functions [3]: [sum(ws_quantity#23), sum(UnscaledValue(ws_wholesale_cost#24)), sum(UnscaledValue(ws_sales_price#25))] + +(40) CometFilter +Input [6]: [ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36, ws_qty#37, ws_wc#38, ws_sp#39] +Condition : (coalesce(ws_qty#37, 0) > 0) + +(41) CometSort +Input [6]: [ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36, ws_qty#37, ws_wc#38, ws_sp#39] +Arguments: [ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36, ws_qty#37, ws_wc#38, ws_sp#39], [ws_sold_year#35 ASC NULLS FIRST, ws_item_sk#20 ASC NULLS FIRST, ws_customer_sk#36 ASC NULLS FIRST] + +(42) CometSortMergeJoin +Left output [6]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Right output [6]: [ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36, ws_qty#37, ws_wc#38, ws_sp#39] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2], [ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36], Inner + +(43) CometProject +Input [12]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_sold_year#35, ws_item_sk#20, ws_customer_sk#36, ws_qty#37, ws_wc#38, ws_sp#39] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, ws_wc#38, ws_sp#39], [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, ws_wc#38, ws_sp#39] + +(44) CometScan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#46)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(45) CometFilter +Input [7]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Condition : (isnotnull(cs_item_sk#41) AND isnotnull(cs_bill_customer_sk#40)) + +(46) CometExchange +Input [7]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Arguments: hashpartitioning(cs_order_number#42, cs_item_sk#41, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(47) CometSort +Input [7]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Arguments: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46], [cs_order_number#42 ASC NULLS FIRST, cs_item_sk#41 ASC NULLS FIRST] + +(48) CometScan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#47, cr_order_number#48, cr_returned_date_sk#49] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(49) CometFilter +Input [3]: [cr_item_sk#47, cr_order_number#48, cr_returned_date_sk#49] +Condition : (isnotnull(cr_order_number#48) AND isnotnull(cr_item_sk#47)) + +(50) CometProject +Input [3]: [cr_item_sk#47, cr_order_number#48, cr_returned_date_sk#49] +Arguments: [cr_item_sk#47, cr_order_number#48], [cr_item_sk#47, cr_order_number#48] + +(51) CometExchange +Input [2]: [cr_item_sk#47, cr_order_number#48] +Arguments: hashpartitioning(cr_order_number#48, cr_item_sk#47, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(52) CometSort +Input [2]: [cr_item_sk#47, cr_order_number#48] +Arguments: [cr_item_sk#47, cr_order_number#48], [cr_order_number#48 ASC NULLS FIRST, cr_item_sk#47 ASC NULLS FIRST] + +(53) CometSortMergeJoin +Left output [7]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Right output [2]: [cr_item_sk#47, cr_order_number#48] +Arguments: [cs_order_number#42, cs_item_sk#41], [cr_order_number#48, cr_item_sk#47], LeftOuter + +(54) CometFilter +Input [9]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46, cr_item_sk#47, cr_order_number#48] +Condition : isnull(cr_order_number#48) + +(55) CometProject +Input [9]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46, cr_item_sk#47, cr_order_number#48] +Arguments: [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46], [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] + +(56) ReusedExchange [Reuses operator id: 15] +Output [2]: [d_date_sk#50, d_year#51] + +(57) CometBroadcastHashJoin +Left output [6]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46] +Right output [2]: [d_date_sk#50, d_year#51] +Arguments: [cs_sold_date_sk#46], [d_date_sk#50], Inner, BuildRight + +(58) CometProject +Input [8]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, cs_sold_date_sk#46, d_date_sk#50, d_year#51] +Arguments: [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, d_year#51], [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, d_year#51] + +(59) CometHashAggregate +Input [6]: [cs_bill_customer_sk#40, cs_item_sk#41, cs_quantity#43, cs_wholesale_cost#44, cs_sales_price#45, d_year#51] +Keys [3]: [d_year#51, cs_item_sk#41, cs_bill_customer_sk#40] +Functions [3]: [partial_sum(cs_quantity#43), partial_sum(UnscaledValue(cs_wholesale_cost#44)), partial_sum(UnscaledValue(cs_sales_price#45))] + +(60) CometExchange +Input [6]: [d_year#51, cs_item_sk#41, cs_bill_customer_sk#40, sum#52, sum#53, sum#54] +Arguments: hashpartitioning(d_year#51, cs_item_sk#41, cs_bill_customer_sk#40, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(61) CometHashAggregate +Input [6]: [d_year#51, cs_item_sk#41, cs_bill_customer_sk#40, sum#52, sum#53, sum#54] +Keys [3]: [d_year#51, cs_item_sk#41, cs_bill_customer_sk#40] +Functions [3]: [sum(cs_quantity#43), sum(UnscaledValue(cs_wholesale_cost#44)), sum(UnscaledValue(cs_sales_price#45))] + +(62) CometFilter +Input [6]: [cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59] +Condition : (coalesce(cs_qty#57, 0) > 0) + +(63) CometSort +Input [6]: [cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59] +Arguments: [cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59], [cs_sold_year#55 ASC NULLS FIRST, cs_item_sk#41 ASC NULLS FIRST, cs_customer_sk#56 ASC NULLS FIRST] + +(64) CometSortMergeJoin +Left output [9]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, ws_wc#38, ws_sp#39] +Right output [6]: [cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59] +Arguments: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2], [cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56], Inner + +(65) ColumnarToRow [codegen id : 1] +Input [15]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, ws_wc#38, ws_sp#39, cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59] + +(66) Project [codegen id : 1] +Output [13]: [round((cast(ss_qty#17 as double) / knownfloatingpointnormalized(normalizenanandzero(cast(coalesce((ws_qty#37 + cs_qty#57), 1) as double)))), 2) AS ratio#60, ss_qty#17 AS store_qty#61, ss_wc#18 AS store_wholesale_cost#62, ss_sp#19 AS store_sales_price#63, (coalesce(ws_qty#37, 0) + coalesce(cs_qty#57, 0)) AS other_chan_qty#64, (coalesce(ws_wc#38, 0.00) + coalesce(cs_wc#58, 0.00)) AS other_chan_wholesale_cost#65, (coalesce(ws_sp#39, 0.00) + coalesce(cs_sp#59, 0.00)) AS other_chan_sales_price#66, ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Input [15]: [ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19, ws_qty#37, ws_wc#38, ws_sp#39, cs_sold_year#55, cs_item_sk#41, cs_customer_sk#56, cs_qty#57, cs_wc#58, cs_sp#59] + +(67) TakeOrderedAndProject +Input [13]: [ratio#60, store_qty#61, store_wholesale_cost#62, store_sales_price#63, other_chan_qty#64, other_chan_wholesale_cost#65, other_chan_sales_price#66, ss_sold_year#16, ss_item_sk#1, ss_customer_sk#2, ss_qty#17, ss_wc#18, ss_sp#19] +Arguments: 100, [ss_sold_year#16 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST, ss_qty#17 DESC NULLS LAST, ss_wc#18 DESC NULLS LAST, ss_sp#19 DESC NULLS LAST, other_chan_qty#64 ASC NULLS FIRST, other_chan_wholesale_cost#65 ASC NULLS FIRST, other_chan_sales_price#66 ASC NULLS FIRST, ratio#60 ASC NULLS FIRST], [ratio#60, store_qty#61, store_wholesale_cost#62, store_sales_price#63, other_chan_qty#64, other_chan_wholesale_cost#65, other_chan_sales_price#66] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..bde5d3bf4e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q78.native_iceberg_compat/simplified.txt @@ -0,0 +1,69 @@ +TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ratio,store_qty,store_wholesale_cost,store_sales_price] + WholeStageCodegen (1) + Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp,ss_sold_year,ss_item_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + CometSortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp,cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] + CometSortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometSort [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp] + CometHashAggregate [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,d_year,sum,sum,sum,sum(ss_quantity),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price))] + CometExchange [d_year,ss_item_sk,ss_customer_sk] #1 + CometHashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum,ss_quantity,ss_wholesale_cost,ss_sales_price] + CometProject [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] + CometBroadcastHashJoin [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,d_date_sk,d_year] + CometProject [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometFilter [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometSortMergeJoin [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk,sr_item_sk,sr_ticket_number] + CometSort [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometExchange [ss_ticket_number,ss_item_sk] #2 + CometFilter [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number] + CometExchange [sr_ticket_number,sr_item_sk] #3 + CometProject [sr_item_sk,sr_ticket_number] + CometFilter [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk,d_year] #4 + CometFilter [d_date_sk,d_year] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + CometSort [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometFilter [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp] + CometHashAggregate [ws_sold_year,ws_item_sk,ws_customer_sk,ws_qty,ws_wc,ws_sp,d_year,ws_bill_customer_sk,sum,sum,sum,sum(ws_quantity),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price))] + CometExchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 + CometHashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum,ws_quantity,ws_wholesale_cost,ws_sales_price] + CometProject [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,d_year] + CometBroadcastHashJoin [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk,d_date_sk,d_year] + CometProject [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk,wr_item_sk,wr_order_number] + CometSortMergeJoin [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk,wr_item_sk,wr_order_number] + CometSort [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + CometExchange [ws_order_number,ws_item_sk] #6 + CometFilter [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + CometSort [wr_item_sk,wr_order_number] + CometExchange [wr_order_number,wr_item_sk] #7 + CometProject [wr_item_sk,wr_order_number] + CometFilter [wr_item_sk,wr_order_number,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk] + ReusedExchange [d_date_sk,d_year] #4 + CometSort [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometFilter [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp] + CometHashAggregate [cs_sold_year,cs_item_sk,cs_customer_sk,cs_qty,cs_wc,cs_sp,d_year,cs_bill_customer_sk,sum,sum,sum,sum(cs_quantity),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price))] + CometExchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 + CometHashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum,cs_quantity,cs_wholesale_cost,cs_sales_price] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] + CometBroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,d_date_sk,d_year] + CometProject [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number] + CometSortMergeJoin [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk,cr_item_sk,cr_order_number] + CometSort [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometExchange [cs_order_number,cs_item_sk] #9 + CometFilter [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number] + CometExchange [cr_order_number,cr_item_sk] #10 + CometProject [cr_item_sk,cr_order_number] + CometFilter [cr_item_sk,cr_order_number,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] + ReusedExchange [d_date_sk,d_year] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_datafusion/explain.txt new file mode 100644 index 0000000000..6f7632050f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_datafusion/explain.txt @@ -0,0 +1,459 @@ +== Physical Plan == +TakeOrderedAndProject (84) ++- * HashAggregate (83) + +- Exchange (82) + +- * HashAggregate (81) + +- Union (80) + :- * HashAggregate (69) + : +- Exchange (68) + : +- * HashAggregate (67) + : +- Union (66) + : :- * HashAggregate (38) + : : +- * ColumnarToRow (37) + : : +- CometExchange (36) + : : +- CometHashAggregate (35) + : : +- CometProject (34) + : : +- CometBroadcastHashJoin (33) + : : :- CometProject (28) + : : : +- CometBroadcastHashJoin (27) + : : : :- CometProject (22) + : : : : +- CometBroadcastHashJoin (21) + : : : : :- CometProject (17) + : : : : : +- CometBroadcastHashJoin (16) + : : : : : :- CometProject (11) + : : : : : : +- CometSortMergeJoin (10) + : : : : : : :- CometSort (4) + : : : : : : : +- CometExchange (3) + : : : : : : : +- CometFilter (2) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : : : : : : +- CometSort (9) + : : : : : : +- CometExchange (8) + : : : : : : +- CometProject (7) + : : : : : : +- CometFilter (6) + : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`store_returns` (5) + : : : : : +- CometBroadcastExchange (15) + : : : : : +- CometProject (14) + : : : : : +- CometFilter (13) + : : : : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (12) + : : : : +- CometBroadcastExchange (20) + : : : : +- CometFilter (19) + : : : : +- CometNativeScan: `spark_catalog`.`default`.`store` (18) + : : : +- CometBroadcastExchange (26) + : : : +- CometProject (25) + : : : +- CometFilter (24) + : : : +- CometNativeScan: `spark_catalog`.`default`.`item` (23) + : : +- CometBroadcastExchange (32) + : : +- CometProject (31) + : : +- CometFilter (30) + : : +- CometNativeScan: `spark_catalog`.`default`.`promotion` (29) + : :- * HashAggregate (62) + : : +- * ColumnarToRow (61) + : : +- CometExchange (60) + : : +- CometHashAggregate (59) + : : +- CometProject (58) + : : +- CometBroadcastHashJoin (57) + : : :- CometProject (55) + : : : +- CometBroadcastHashJoin (54) + : : : :- CometProject (52) + : : : : +- CometBroadcastHashJoin (51) + : : : : :- CometProject (49) + : : : : : +- CometBroadcastHashJoin (48) + : : : : : :- CometProject (46) + : : : : : : +- CometSortMergeJoin (45) + : : : : : : :- CometSort (42) + : : : : : : : +- CometExchange (41) + : : : : : : : +- CometFilter (40) + : : : : : : : +- CometNativeScan: `spark_catalog`.`default`.`catalog_sales` (39) + : : : : : : +- CometSort (44) + : : : : : : +- ReusedExchange (43) + : : : : : +- ReusedExchange (47) + : : : : +- ReusedExchange (50) + : : : +- ReusedExchange (53) + : : +- ReusedExchange (56) + : +- * HashAggregate (65) + : +- * ColumnarToRow (64) + : +- ReusedExchange (63) + :- * HashAggregate (74) + : +- Exchange (73) + : +- * HashAggregate (72) + : +- * HashAggregate (71) + : +- ReusedExchange (70) + +- * HashAggregate (79) + +- Exchange (78) + +- * HashAggregate (77) + +- * HashAggregate (76) + +- ReusedExchange (75) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(2) CometFilter +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(3) CometExchange +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7], [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST] + +(5) CometNativeScan: `spark_catalog`.`default`.`store_returns` +Output [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(6) CometFilter +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(7) CometProject +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11], [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(8) CometExchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: hashpartitioning(sr_item_sk#8, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11], [sr_item_sk#8 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [ss_item_sk#1, ss_ticket_number#4], [sr_item_sk#8, sr_ticket_number#9], LeftOuter + +(11) CometProject +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11], [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] + +(12) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#13, d_date#14] +Arguments: [d_date_sk#13, d_date#14] + +(13) CometFilter +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 1998-08-04)) AND (d_date#14 <= 1998-09-03)) AND isnotnull(d_date_sk#13)) + +(14) CometProject +Input [2]: [d_date_sk#13, d_date#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(15) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(16) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#7], [d_date_sk#13], Inner, BuildRight + +(17) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11], [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] + +(18) CometNativeScan: `spark_catalog`.`default`.`store` +Output [2]: [s_store_sk#15, s_store_id#16] +Arguments: [s_store_sk#15, s_store_id#16] + +(19) CometFilter +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) + +(20) CometBroadcastExchange +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: [s_store_sk#15, s_store_id#16] + +(21) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] +Right output [2]: [s_store_sk#15, s_store_id#16] +Arguments: [ss_store_sk#2], [s_store_sk#15], Inner, BuildRight + +(22) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_sk#15, s_store_id#16] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(23) CometNativeScan: `spark_catalog`.`default`.`item` +Output [2]: [i_item_sk#17, i_current_price#18] +Arguments: [i_item_sk#17, i_current_price#18] + +(24) CometFilter +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) + +(25) CometProject +Input [2]: [i_item_sk#17, i_current_price#18] +Arguments: [i_item_sk#17], [i_item_sk#17] + +(26) CometBroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: [i_item_sk#17] + +(27) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Right output [1]: [i_item_sk#17] +Arguments: [ss_item_sk#1], [i_item_sk#17], Inner, BuildRight + +(28) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, i_item_sk#17] +Arguments: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(29) CometNativeScan: `spark_catalog`.`default`.`promotion` +Output [2]: [p_promo_sk#19, p_channel_tv#20] +Arguments: [p_promo_sk#19, p_channel_tv#20] + +(30) CometFilter +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) + +(31) CometProject +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Arguments: [p_promo_sk#19], [p_promo_sk#19] + +(32) CometBroadcastExchange +Input [1]: [p_promo_sk#19] +Arguments: [p_promo_sk#19] + +(33) CometBroadcastHashJoin +Left output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Right output [1]: [p_promo_sk#19] +Arguments: [ss_promo_sk#3], [p_promo_sk#19], Inner, BuildRight + +(34) CometProject +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, p_promo_sk#19] +Arguments: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(35) CometHashAggregate +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] + +(36) CometExchange +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(37) ColumnarToRow [codegen id : 1] +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] + +(38) HashAggregate [codegen id : 1] +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#26, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#27, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#28] +Results [5]: [store channel AS channel#29, concat(store, s_store_id#16) AS id#30, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#26,17,2) AS sales#31, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#27 AS returns#32, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#28 AS profit#33] + +(39) CometNativeScan: `spark_catalog`.`default`.`catalog_sales` +Output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] + +(40) CometFilter +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Condition : ((isnotnull(cs_catalog_page_sk#34) AND isnotnull(cs_item_sk#35)) AND isnotnull(cs_promo_sk#36)) + +(41) CometExchange +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Arguments: hashpartitioning(cs_item_sk#35, cs_order_number#37, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(42) CometSort +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40], [cs_item_sk#35 ASC NULLS FIRST, cs_order_number#37 ASC NULLS FIRST] + +(43) ReusedExchange [Reuses operator id: 8] +Output [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] + +(44) CometSort +Input [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44], [cr_item_sk#41 ASC NULLS FIRST, cr_order_number#42 ASC NULLS FIRST] + +(45) CometSortMergeJoin +Left output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Right output [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cs_item_sk#35, cs_order_number#37], [cr_item_sk#41, cr_order_number#42], LeftOuter + +(46) CometProject +Input [11]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44], [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44] + +(47) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#45] + +(48) CometBroadcastHashJoin +Left output [8]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44] +Right output [1]: [d_date_sk#45] +Arguments: [cs_sold_date_sk#40], [d_date_sk#45], Inner, BuildRight + +(49) CometProject +Input [9]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44, d_date_sk#45] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44], [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44] + +(50) ReusedExchange [Reuses operator id: 20] +Output [2]: [cp_catalog_page_sk#46, cp_catalog_page_id#47] + +(51) CometBroadcastHashJoin +Left output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44] +Right output [2]: [cp_catalog_page_sk#46, cp_catalog_page_id#47] +Arguments: [cs_catalog_page_sk#34], [cp_catalog_page_sk#46], Inner, BuildRight + +(52) CometProject +Input [9]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_sk#46, cp_catalog_page_id#47] +Arguments: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47], [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] + +(53) ReusedExchange [Reuses operator id: 26] +Output [1]: [i_item_sk#48] + +(54) CometBroadcastHashJoin +Left output [7]: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] +Right output [1]: [i_item_sk#48] +Arguments: [cs_item_sk#35], [i_item_sk#48], Inner, BuildRight + +(55) CometProject +Input [8]: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47, i_item_sk#48] +Arguments: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47], [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] + +(56) ReusedExchange [Reuses operator id: 32] +Output [1]: [p_promo_sk#49] + +(57) CometBroadcastHashJoin +Left output [6]: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] +Right output [1]: [p_promo_sk#49] +Arguments: [cs_promo_sk#36], [p_promo_sk#49], Inner, BuildRight + +(58) CometProject +Input [7]: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47, p_promo_sk#49] +Arguments: [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47], [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] + +(59) CometHashAggregate +Input [5]: [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#47] +Keys [1]: [cp_catalog_page_id#47] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#38)), partial_sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))] + +(60) CometExchange +Input [6]: [cp_catalog_page_id#47, sum#50, sum#51, isEmpty#52, sum#53, isEmpty#54] +Arguments: hashpartitioning(cp_catalog_page_id#47, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(61) ColumnarToRow [codegen id : 2] +Input [6]: [cp_catalog_page_id#47, sum#50, sum#51, isEmpty#52, sum#53, isEmpty#54] + +(62) HashAggregate [codegen id : 2] +Input [6]: [cp_catalog_page_id#47, sum#50, sum#51, isEmpty#52, sum#53, isEmpty#54] +Keys [1]: [cp_catalog_page_id#47] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#38)), sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00)), sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#38))#55, sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00))#56, sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))#57] +Results [5]: [catalog channel AS channel#58, concat(catalog_page, cp_catalog_page_id#47) AS id#59, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#38))#55,17,2) AS sales#60, sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00))#56 AS returns#61, sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))#57 AS profit#62] + +(63) ReusedExchange [Reuses operator id: 36] +Output [6]: [web_site_id#63, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] + +(64) ColumnarToRow [codegen id : 3] +Input [6]: [web_site_id#63, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] + +(65) HashAggregate [codegen id : 3] +Input [6]: [web_site_id#63, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Keys [1]: [web_site_id#63] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#69)), sum(coalesce(cast(wr_return_amt#70 as decimal(12,2)), 0.00)), sum((ws_net_profit#71 - coalesce(cast(wr_net_loss#72 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#69))#73, sum(coalesce(cast(wr_return_amt#70 as decimal(12,2)), 0.00))#74, sum((ws_net_profit#71 - coalesce(cast(wr_net_loss#72 as decimal(12,2)), 0.00)))#75] +Results [5]: [web channel AS channel#76, concat(web_site, web_site_id#63) AS id#77, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#69))#73,17,2) AS sales#78, sum(coalesce(cast(wr_return_amt#70 as decimal(12,2)), 0.00))#74 AS returns#79, sum((ws_net_profit#71 - coalesce(cast(wr_net_loss#72 as decimal(12,2)), 0.00)))#75 AS profit#80] + +(66) Union + +(67) HashAggregate [codegen id : 4] +Input [5]: [channel#29, id#30, sales#31, returns#32, profit#33] +Keys [2]: [channel#29, id#30] +Functions [3]: [partial_sum(sales#31), partial_sum(returns#32), partial_sum(profit#33)] +Aggregate Attributes [6]: [sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86] +Results [8]: [channel#29, id#30, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92] + +(68) Exchange +Input [8]: [channel#29, id#30, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92] +Arguments: hashpartitioning(channel#29, id#30, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(69) HashAggregate [codegen id : 5] +Input [8]: [channel#29, id#30, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92] +Keys [2]: [channel#29, id#30] +Functions [3]: [sum(sales#31), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#31)#93, sum(returns#32)#94, sum(profit#33)#95] +Results [5]: [channel#29, id#30, cast(sum(sales#31)#93 as decimal(37,2)) AS sales#96, cast(sum(returns#32)#94 as decimal(38,2)) AS returns#97, cast(sum(profit#33)#95 as decimal(38,2)) AS profit#98] + +(70) ReusedExchange [Reuses operator id: 68] +Output [8]: [channel#29, id#30, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92] + +(71) HashAggregate [codegen id : 10] +Input [8]: [channel#29, id#30, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92] +Keys [2]: [channel#29, id#30] +Functions [3]: [sum(sales#31), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#31)#93, sum(returns#32)#94, sum(profit#33)#95] +Results [4]: [channel#29, sum(sales#31)#93 AS sales#99, sum(returns#32)#94 AS returns#100, sum(profit#33)#95 AS profit#101] + +(72) HashAggregate [codegen id : 10] +Input [4]: [channel#29, sales#99, returns#100, profit#101] +Keys [1]: [channel#29] +Functions [3]: [partial_sum(sales#99), partial_sum(returns#100), partial_sum(profit#101)] +Aggregate Attributes [6]: [sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107] +Results [7]: [channel#29, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113] + +(73) Exchange +Input [7]: [channel#29, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113] +Arguments: hashpartitioning(channel#29, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(74) HashAggregate [codegen id : 11] +Input [7]: [channel#29, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113] +Keys [1]: [channel#29] +Functions [3]: [sum(sales#99), sum(returns#100), sum(profit#101)] +Aggregate Attributes [3]: [sum(sales#99)#114, sum(returns#100)#115, sum(profit#101)#116] +Results [5]: [channel#29, null AS id#117, sum(sales#99)#114 AS sales#118, sum(returns#100)#115 AS returns#119, sum(profit#101)#116 AS profit#120] + +(75) ReusedExchange [Reuses operator id: 68] +Output [8]: [channel#29, id#30, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92] + +(76) HashAggregate [codegen id : 16] +Input [8]: [channel#29, id#30, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92] +Keys [2]: [channel#29, id#30] +Functions [3]: [sum(sales#31), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#31)#93, sum(returns#32)#94, sum(profit#33)#95] +Results [3]: [sum(sales#31)#93 AS sales#121, sum(returns#32)#94 AS returns#122, sum(profit#33)#95 AS profit#123] + +(77) HashAggregate [codegen id : 16] +Input [3]: [sales#121, returns#122, profit#123] +Keys: [] +Functions [3]: [partial_sum(sales#121), partial_sum(returns#122), partial_sum(profit#123)] +Aggregate Attributes [6]: [sum#124, isEmpty#125, sum#126, isEmpty#127, sum#128, isEmpty#129] +Results [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] + +(78) Exchange +Input [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(79) HashAggregate [codegen id : 17] +Input [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] +Keys: [] +Functions [3]: [sum(sales#121), sum(returns#122), sum(profit#123)] +Aggregate Attributes [3]: [sum(sales#121)#136, sum(returns#122)#137, sum(profit#123)#138] +Results [5]: [null AS channel#139, null AS id#140, sum(sales#121)#136 AS sales#141, sum(returns#122)#137 AS returns#142, sum(profit#123)#138 AS profit#143] + +(80) Union + +(81) HashAggregate [codegen id : 18] +Input [5]: [channel#29, id#30, sales#96, returns#97, profit#98] +Keys [5]: [channel#29, id#30, sales#96, returns#97, profit#98] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#29, id#30, sales#96, returns#97, profit#98] + +(82) Exchange +Input [5]: [channel#29, id#30, sales#96, returns#97, profit#98] +Arguments: hashpartitioning(channel#29, id#30, sales#96, returns#97, profit#98, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(83) HashAggregate [codegen id : 19] +Input [5]: [channel#29, id#30, sales#96, returns#97, profit#98] +Keys [5]: [channel#29, id#30, sales#96, returns#97, profit#98] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#29, id#30, sales#96, returns#97, profit#98] + +(84) TakeOrderedAndProject +Input [5]: [channel#29, id#30, sales#96, returns#97, profit#98] +Arguments: 100, [channel#29 ASC NULLS FIRST, id#30 ASC NULLS FIRST], [channel#29, id#30, sales#96, returns#97, profit#98] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..4f7de727bb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_datafusion/simplified.txt @@ -0,0 +1,106 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (19) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (18) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (5) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (4) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (1) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExchange [s_store_id] #3 + CometHashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] + CometProject [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id,p_promo_sk] + CometProject [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id,i_item_sk] + CometProject [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_sk,s_store_id] + CometProject [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss] + CometSortMergeJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometSort [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometExchange [ss_item_sk,ss_ticket_number] #4 + CometFilter [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometExchange [sr_item_sk,sr_ticket_number] #5 + CometProject [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometFilter [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_returns` [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #6 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk,s_store_id] #7 + CometFilter [s_store_sk,s_store_id] + CometNativeScan: `spark_catalog`.`default`.`store` [s_store_sk,s_store_id] + CometBroadcastExchange [i_item_sk] #8 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_current_price] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_current_price] + CometBroadcastExchange [p_promo_sk] #9 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_tv] + CometNativeScan: `spark_catalog`.`default`.`promotion` [p_promo_sk,p_channel_tv] + WholeStageCodegen (2) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExchange [cp_catalog_page_id] #10 + CometHashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] + CometProject [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id,p_promo_sk] + CometProject [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id,i_item_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_sk,cp_catalog_page_id] + CometProject [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + CometBroadcastHashJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss,d_date_sk] + CometProject [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss] + CometSortMergeJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + CometSort [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometExchange [cs_item_sk,cs_order_number] #11 + CometFilter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`catalog_sales` [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + ReusedExchange [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] #5 + ReusedExchange [d_date_sk] #6 + ReusedExchange [cp_catalog_page_sk,cp_catalog_page_id] #7 + ReusedExchange [i_item_sk] #8 + ReusedExchange [p_promo_sk] #9 + WholeStageCodegen (3) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + ReusedExchange [web_site_id,sum,sum,isEmpty,sum,isEmpty] #3 + WholeStageCodegen (11) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #12 + WholeStageCodegen (10) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (17) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #13 + WholeStageCodegen (16) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..9049685e7b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_iceberg_compat/explain.txt @@ -0,0 +1,659 @@ +== Physical Plan == +TakeOrderedAndProject (115) ++- * HashAggregate (114) + +- Exchange (113) + +- * HashAggregate (112) + +- Union (111) + :- * HashAggregate (100) + : +- Exchange (99) + : +- * HashAggregate (98) + : +- Union (97) + : :- * HashAggregate (38) + : : +- * ColumnarToRow (37) + : : +- CometExchange (36) + : : +- CometHashAggregate (35) + : : +- CometProject (34) + : : +- CometBroadcastHashJoin (33) + : : :- CometProject (28) + : : : +- CometBroadcastHashJoin (27) + : : : :- CometProject (22) + : : : : +- CometBroadcastHashJoin (21) + : : : : :- CometProject (17) + : : : : : +- CometBroadcastHashJoin (16) + : : : : : :- CometProject (11) + : : : : : : +- CometSortMergeJoin (10) + : : : : : : :- CometSort (4) + : : : : : : : +- CometExchange (3) + : : : : : : : +- CometFilter (2) + : : : : : : : +- CometScan parquet spark_catalog.default.store_sales (1) + : : : : : : +- CometSort (9) + : : : : : : +- CometExchange (8) + : : : : : : +- CometProject (7) + : : : : : : +- CometFilter (6) + : : : : : : +- CometScan parquet spark_catalog.default.store_returns (5) + : : : : : +- CometBroadcastExchange (15) + : : : : : +- CometProject (14) + : : : : : +- CometFilter (13) + : : : : : +- CometScan parquet spark_catalog.default.date_dim (12) + : : : : +- CometBroadcastExchange (20) + : : : : +- CometFilter (19) + : : : : +- CometScan parquet spark_catalog.default.store (18) + : : : +- CometBroadcastExchange (26) + : : : +- CometProject (25) + : : : +- CometFilter (24) + : : : +- CometScan parquet spark_catalog.default.item (23) + : : +- CometBroadcastExchange (32) + : : +- CometProject (31) + : : +- CometFilter (30) + : : +- CometScan parquet spark_catalog.default.promotion (29) + : :- * HashAggregate (67) + : : +- * ColumnarToRow (66) + : : +- CometExchange (65) + : : +- CometHashAggregate (64) + : : +- CometProject (63) + : : +- CometBroadcastHashJoin (62) + : : :- CometProject (60) + : : : +- CometBroadcastHashJoin (59) + : : : :- CometProject (57) + : : : : +- CometBroadcastHashJoin (56) + : : : : :- CometProject (52) + : : : : : +- CometBroadcastHashJoin (51) + : : : : : :- CometProject (49) + : : : : : : +- CometSortMergeJoin (48) + : : : : : : :- CometSort (42) + : : : : : : : +- CometExchange (41) + : : : : : : : +- CometFilter (40) + : : : : : : : +- CometScan parquet spark_catalog.default.catalog_sales (39) + : : : : : : +- CometSort (47) + : : : : : : +- CometExchange (46) + : : : : : : +- CometProject (45) + : : : : : : +- CometFilter (44) + : : : : : : +- CometScan parquet spark_catalog.default.catalog_returns (43) + : : : : : +- ReusedExchange (50) + : : : : +- CometBroadcastExchange (55) + : : : : +- CometFilter (54) + : : : : +- CometScan parquet spark_catalog.default.catalog_page (53) + : : : +- ReusedExchange (58) + : : +- ReusedExchange (61) + : +- * HashAggregate (96) + : +- * ColumnarToRow (95) + : +- CometExchange (94) + : +- CometHashAggregate (93) + : +- CometProject (92) + : +- CometBroadcastHashJoin (91) + : :- CometProject (89) + : : +- CometBroadcastHashJoin (88) + : : :- CometProject (86) + : : : +- CometBroadcastHashJoin (85) + : : : :- CometProject (81) + : : : : +- CometBroadcastHashJoin (80) + : : : : :- CometProject (78) + : : : : : +- CometSortMergeJoin (77) + : : : : : :- CometSort (71) + : : : : : : +- CometExchange (70) + : : : : : : +- CometFilter (69) + : : : : : : +- CometScan parquet spark_catalog.default.web_sales (68) + : : : : : +- CometSort (76) + : : : : : +- CometExchange (75) + : : : : : +- CometProject (74) + : : : : : +- CometFilter (73) + : : : : : +- CometScan parquet spark_catalog.default.web_returns (72) + : : : : +- ReusedExchange (79) + : : : +- CometBroadcastExchange (84) + : : : +- CometFilter (83) + : : : +- CometScan parquet spark_catalog.default.web_site (82) + : : +- ReusedExchange (87) + : +- ReusedExchange (90) + :- * HashAggregate (105) + : +- Exchange (104) + : +- * HashAggregate (103) + : +- * HashAggregate (102) + : +- ReusedExchange (101) + +- * HashAggregate (110) + +- Exchange (109) + +- * HashAggregate (108) + +- * HashAggregate (107) + +- ReusedExchange (106) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) CometFilter +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(3) CometExchange +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(4) CometSort +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7], [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST] + +(5) CometScan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(6) CometFilter +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(7) CometProject +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11], [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(8) CometExchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: hashpartitioning(sr_item_sk#8, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(9) CometSort +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11], [sr_item_sk#8 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST] + +(10) CometSortMergeJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Right output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [ss_item_sk#1, ss_ticket_number#4], [sr_item_sk#8, sr_ticket_number#9], LeftOuter + +(11) CometProject +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11], [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] + +(12) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) CometFilter +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 1998-08-04)) AND (d_date#14 <= 1998-09-03)) AND isnotnull(d_date_sk#13)) + +(14) CometProject +Input [2]: [d_date_sk#13, d_date#14] +Arguments: [d_date_sk#13], [d_date_sk#13] + +(15) CometBroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: [d_date_sk#13] + +(16) CometBroadcastHashJoin +Left output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] +Right output [1]: [d_date_sk#13] +Arguments: [ss_sold_date_sk#7], [d_date_sk#13], Inner, BuildRight + +(17) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] +Arguments: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11], [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] + +(18) CometScan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_store_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) CometFilter +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) + +(20) CometBroadcastExchange +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: [s_store_sk#15, s_store_id#16] + +(21) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] +Right output [2]: [s_store_sk#15, s_store_id#16] +Arguments: [ss_store_sk#2], [s_store_sk#15], Inner, BuildRight + +(22) CometProject +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_sk#15, s_store_id#16] +Arguments: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(23) CometScan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_current_price#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) CometFilter +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) + +(25) CometProject +Input [2]: [i_item_sk#17, i_current_price#18] +Arguments: [i_item_sk#17], [i_item_sk#17] + +(26) CometBroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: [i_item_sk#17] + +(27) CometBroadcastHashJoin +Left output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Right output [1]: [i_item_sk#17] +Arguments: [ss_item_sk#1], [i_item_sk#17], Inner, BuildRight + +(28) CometProject +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, i_item_sk#17] +Arguments: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(29) CometScan parquet spark_catalog.default.promotion +Output [2]: [p_promo_sk#19, p_channel_tv#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(30) CometFilter +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) + +(31) CometProject +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Arguments: [p_promo_sk#19], [p_promo_sk#19] + +(32) CometBroadcastExchange +Input [1]: [p_promo_sk#19] +Arguments: [p_promo_sk#19] + +(33) CometBroadcastHashJoin +Left output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Right output [1]: [p_promo_sk#19] +Arguments: [ss_promo_sk#3], [p_promo_sk#19], Inner, BuildRight + +(34) CometProject +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, p_promo_sk#19] +Arguments: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16], [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] + +(35) CometHashAggregate +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] + +(36) CometExchange +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(37) ColumnarToRow [codegen id : 1] +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] + +(38) HashAggregate [codegen id : 1] +Input [6]: [s_store_id#16, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#26, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#27, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#28] +Results [5]: [store channel AS channel#29, concat(store, s_store_id#16) AS id#30, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#26,17,2) AS sales#31, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#27 AS returns#32, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#28 AS profit#33] + +(39) CometScan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#40)] +PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(40) CometFilter +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Condition : ((isnotnull(cs_catalog_page_sk#34) AND isnotnull(cs_item_sk#35)) AND isnotnull(cs_promo_sk#36)) + +(41) CometExchange +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Arguments: hashpartitioning(cs_item_sk#35, cs_order_number#37, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(42) CometSort +Input [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40], [cs_item_sk#35 ASC NULLS FIRST, cs_order_number#37 ASC NULLS FIRST] + +(43) CometScan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44, cr_returned_date_sk#45] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(44) CometFilter +Input [5]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44, cr_returned_date_sk#45] +Condition : (isnotnull(cr_item_sk#41) AND isnotnull(cr_order_number#42)) + +(45) CometProject +Input [5]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44, cr_returned_date_sk#45] +Arguments: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44], [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] + +(46) CometExchange +Input [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: hashpartitioning(cr_item_sk#41, cr_order_number#42, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(47) CometSort +Input [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44], [cr_item_sk#41 ASC NULLS FIRST, cr_order_number#42 ASC NULLS FIRST] + +(48) CometSortMergeJoin +Left output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40] +Right output [4]: [cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cs_item_sk#35, cs_order_number#37], [cr_item_sk#41, cr_order_number#42], LeftOuter + +(49) CometProject +Input [11]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_order_number#37, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_item_sk#41, cr_order_number#42, cr_return_amount#43, cr_net_loss#44] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44], [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44] + +(50) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#46] + +(51) CometBroadcastHashJoin +Left output [8]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44] +Right output [1]: [d_date_sk#46] +Arguments: [cs_sold_date_sk#40], [d_date_sk#46], Inner, BuildRight + +(52) CometProject +Input [9]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cs_sold_date_sk#40, cr_return_amount#43, cr_net_loss#44, d_date_sk#46] +Arguments: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44], [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44] + +(53) CometScan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#47, cp_catalog_page_id#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(54) CometFilter +Input [2]: [cp_catalog_page_sk#47, cp_catalog_page_id#48] +Condition : isnotnull(cp_catalog_page_sk#47) + +(55) CometBroadcastExchange +Input [2]: [cp_catalog_page_sk#47, cp_catalog_page_id#48] +Arguments: [cp_catalog_page_sk#47, cp_catalog_page_id#48] + +(56) CometBroadcastHashJoin +Left output [7]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44] +Right output [2]: [cp_catalog_page_sk#47, cp_catalog_page_id#48] +Arguments: [cs_catalog_page_sk#34], [cp_catalog_page_sk#47], Inner, BuildRight + +(57) CometProject +Input [9]: [cs_catalog_page_sk#34, cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_sk#47, cp_catalog_page_id#48] +Arguments: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48], [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] + +(58) ReusedExchange [Reuses operator id: 26] +Output [1]: [i_item_sk#49] + +(59) CometBroadcastHashJoin +Left output [7]: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] +Right output [1]: [i_item_sk#49] +Arguments: [cs_item_sk#35], [i_item_sk#49], Inner, BuildRight + +(60) CometProject +Input [8]: [cs_item_sk#35, cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48, i_item_sk#49] +Arguments: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48], [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] + +(61) ReusedExchange [Reuses operator id: 32] +Output [1]: [p_promo_sk#50] + +(62) CometBroadcastHashJoin +Left output [6]: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] +Right output [1]: [p_promo_sk#50] +Arguments: [cs_promo_sk#36], [p_promo_sk#50], Inner, BuildRight + +(63) CometProject +Input [7]: [cs_promo_sk#36, cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48, p_promo_sk#50] +Arguments: [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48], [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] + +(64) CometHashAggregate +Input [5]: [cs_ext_sales_price#38, cs_net_profit#39, cr_return_amount#43, cr_net_loss#44, cp_catalog_page_id#48] +Keys [1]: [cp_catalog_page_id#48] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#38)), partial_sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))] + +(65) CometExchange +Input [6]: [cp_catalog_page_id#48, sum#51, sum#52, isEmpty#53, sum#54, isEmpty#55] +Arguments: hashpartitioning(cp_catalog_page_id#48, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=6] + +(66) ColumnarToRow [codegen id : 2] +Input [6]: [cp_catalog_page_id#48, sum#51, sum#52, isEmpty#53, sum#54, isEmpty#55] + +(67) HashAggregate [codegen id : 2] +Input [6]: [cp_catalog_page_id#48, sum#51, sum#52, isEmpty#53, sum#54, isEmpty#55] +Keys [1]: [cp_catalog_page_id#48] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#38)), sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00)), sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#38))#56, sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00))#57, sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))#58] +Results [5]: [catalog channel AS channel#59, concat(catalog_page, cp_catalog_page_id#48) AS id#60, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#38))#56,17,2) AS sales#61, sum(coalesce(cast(cr_return_amount#43 as decimal(12,2)), 0.00))#57 AS returns#62, sum((cs_net_profit#39 - coalesce(cast(cr_net_loss#44 as decimal(12,2)), 0.00)))#58 AS profit#63] + +(68) CometScan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#70)] +PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(69) CometFilter +Input [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70] +Condition : ((isnotnull(ws_web_site_sk#65) AND isnotnull(ws_item_sk#64)) AND isnotnull(ws_promo_sk#66)) + +(70) CometExchange +Input [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70] +Arguments: hashpartitioning(ws_item_sk#64, ws_order_number#67, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=7] + +(71) CometSort +Input [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70] +Arguments: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70], [ws_item_sk#64 ASC NULLS FIRST, ws_order_number#67 ASC NULLS FIRST] + +(72) CometScan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74, wr_returned_date_sk#75] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(73) CometFilter +Input [5]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74, wr_returned_date_sk#75] +Condition : (isnotnull(wr_item_sk#71) AND isnotnull(wr_order_number#72)) + +(74) CometProject +Input [5]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74, wr_returned_date_sk#75] +Arguments: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74], [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74] + +(75) CometExchange +Input [4]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74] +Arguments: hashpartitioning(wr_item_sk#71, wr_order_number#72, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=8] + +(76) CometSort +Input [4]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74] +Arguments: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74], [wr_item_sk#71 ASC NULLS FIRST, wr_order_number#72 ASC NULLS FIRST] + +(77) CometSortMergeJoin +Left output [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70] +Right output [4]: [wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74] +Arguments: [ws_item_sk#64, ws_order_number#67], [wr_item_sk#71, wr_order_number#72], LeftOuter + +(78) CometProject +Input [11]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_order_number#67, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70, wr_item_sk#71, wr_order_number#72, wr_return_amt#73, wr_net_loss#74] +Arguments: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70, wr_return_amt#73, wr_net_loss#74], [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70, wr_return_amt#73, wr_net_loss#74] + +(79) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#76] + +(80) CometBroadcastHashJoin +Left output [8]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70, wr_return_amt#73, wr_net_loss#74] +Right output [1]: [d_date_sk#76] +Arguments: [ws_sold_date_sk#70], [d_date_sk#76], Inner, BuildRight + +(81) CometProject +Input [9]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, ws_sold_date_sk#70, wr_return_amt#73, wr_net_loss#74, d_date_sk#76] +Arguments: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74], [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74] + +(82) CometScan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#77, web_site_id#78] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(83) CometFilter +Input [2]: [web_site_sk#77, web_site_id#78] +Condition : isnotnull(web_site_sk#77) + +(84) CometBroadcastExchange +Input [2]: [web_site_sk#77, web_site_id#78] +Arguments: [web_site_sk#77, web_site_id#78] + +(85) CometBroadcastHashJoin +Left output [7]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74] +Right output [2]: [web_site_sk#77, web_site_id#78] +Arguments: [ws_web_site_sk#65], [web_site_sk#77], Inner, BuildRight + +(86) CometProject +Input [9]: [ws_item_sk#64, ws_web_site_sk#65, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_sk#77, web_site_id#78] +Arguments: [ws_item_sk#64, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78], [ws_item_sk#64, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] + +(87) ReusedExchange [Reuses operator id: 26] +Output [1]: [i_item_sk#79] + +(88) CometBroadcastHashJoin +Left output [7]: [ws_item_sk#64, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] +Right output [1]: [i_item_sk#79] +Arguments: [ws_item_sk#64], [i_item_sk#79], Inner, BuildRight + +(89) CometProject +Input [8]: [ws_item_sk#64, ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78, i_item_sk#79] +Arguments: [ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78], [ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] + +(90) ReusedExchange [Reuses operator id: 32] +Output [1]: [p_promo_sk#80] + +(91) CometBroadcastHashJoin +Left output [6]: [ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] +Right output [1]: [p_promo_sk#80] +Arguments: [ws_promo_sk#66], [p_promo_sk#80], Inner, BuildRight + +(92) CometProject +Input [7]: [ws_promo_sk#66, ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78, p_promo_sk#80] +Arguments: [ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78], [ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] + +(93) CometHashAggregate +Input [5]: [ws_ext_sales_price#68, ws_net_profit#69, wr_return_amt#73, wr_net_loss#74, web_site_id#78] +Keys [1]: [web_site_id#78] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#68)), partial_sum(coalesce(cast(wr_return_amt#73 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#69 - coalesce(cast(wr_net_loss#74 as decimal(12,2)), 0.00)))] + +(94) CometExchange +Input [6]: [web_site_id#78, sum#81, sum#82, isEmpty#83, sum#84, isEmpty#85] +Arguments: hashpartitioning(web_site_id#78, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=9] + +(95) ColumnarToRow [codegen id : 3] +Input [6]: [web_site_id#78, sum#81, sum#82, isEmpty#83, sum#84, isEmpty#85] + +(96) HashAggregate [codegen id : 3] +Input [6]: [web_site_id#78, sum#81, sum#82, isEmpty#83, sum#84, isEmpty#85] +Keys [1]: [web_site_id#78] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#68)), sum(coalesce(cast(wr_return_amt#73 as decimal(12,2)), 0.00)), sum((ws_net_profit#69 - coalesce(cast(wr_net_loss#74 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#68))#86, sum(coalesce(cast(wr_return_amt#73 as decimal(12,2)), 0.00))#87, sum((ws_net_profit#69 - coalesce(cast(wr_net_loss#74 as decimal(12,2)), 0.00)))#88] +Results [5]: [web channel AS channel#89, concat(web_site, web_site_id#78) AS id#90, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#68))#86,17,2) AS sales#91, sum(coalesce(cast(wr_return_amt#73 as decimal(12,2)), 0.00))#87 AS returns#92, sum((ws_net_profit#69 - coalesce(cast(wr_net_loss#74 as decimal(12,2)), 0.00)))#88 AS profit#93] + +(97) Union + +(98) HashAggregate [codegen id : 4] +Input [5]: [channel#29, id#30, sales#31, returns#32, profit#33] +Keys [2]: [channel#29, id#30] +Functions [3]: [partial_sum(sales#31), partial_sum(returns#32), partial_sum(profit#33)] +Aggregate Attributes [6]: [sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99] +Results [8]: [channel#29, id#30, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(99) Exchange +Input [8]: [channel#29, id#30, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Arguments: hashpartitioning(channel#29, id#30, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(100) HashAggregate [codegen id : 5] +Input [8]: [channel#29, id#30, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#29, id#30] +Functions [3]: [sum(sales#31), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#31)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [5]: [channel#29, id#30, cast(sum(sales#31)#106 as decimal(37,2)) AS sales#109, cast(sum(returns#32)#107 as decimal(38,2)) AS returns#110, cast(sum(profit#33)#108 as decimal(38,2)) AS profit#111] + +(101) ReusedExchange [Reuses operator id: 99] +Output [8]: [channel#29, id#30, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(102) HashAggregate [codegen id : 10] +Input [8]: [channel#29, id#30, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#29, id#30] +Functions [3]: [sum(sales#31), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#31)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [4]: [channel#29, sum(sales#31)#106 AS sales#112, sum(returns#32)#107 AS returns#113, sum(profit#33)#108 AS profit#114] + +(103) HashAggregate [codegen id : 10] +Input [4]: [channel#29, sales#112, returns#113, profit#114] +Keys [1]: [channel#29] +Functions [3]: [partial_sum(sales#112), partial_sum(returns#113), partial_sum(profit#114)] +Aggregate Attributes [6]: [sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Results [7]: [channel#29, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] + +(104) Exchange +Input [7]: [channel#29, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Arguments: hashpartitioning(channel#29, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(105) HashAggregate [codegen id : 11] +Input [7]: [channel#29, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [1]: [channel#29] +Functions [3]: [sum(sales#112), sum(returns#113), sum(profit#114)] +Aggregate Attributes [3]: [sum(sales#112)#127, sum(returns#113)#128, sum(profit#114)#129] +Results [5]: [channel#29, null AS id#130, sum(sales#112)#127 AS sales#131, sum(returns#113)#128 AS returns#132, sum(profit#114)#129 AS profit#133] + +(106) ReusedExchange [Reuses operator id: 99] +Output [8]: [channel#29, id#30, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(107) HashAggregate [codegen id : 16] +Input [8]: [channel#29, id#30, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#29, id#30] +Functions [3]: [sum(sales#31), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#31)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [3]: [sum(sales#31)#106 AS sales#134, sum(returns#32)#107 AS returns#135, sum(profit#33)#108 AS profit#136] + +(108) HashAggregate [codegen id : 16] +Input [3]: [sales#134, returns#135, profit#136] +Keys: [] +Functions [3]: [partial_sum(sales#134), partial_sum(returns#135), partial_sum(profit#136)] +Aggregate Attributes [6]: [sum#137, isEmpty#138, sum#139, isEmpty#140, sum#141, isEmpty#142] +Results [6]: [sum#143, isEmpty#144, sum#145, isEmpty#146, sum#147, isEmpty#148] + +(109) Exchange +Input [6]: [sum#143, isEmpty#144, sum#145, isEmpty#146, sum#147, isEmpty#148] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] + +(110) HashAggregate [codegen id : 17] +Input [6]: [sum#143, isEmpty#144, sum#145, isEmpty#146, sum#147, isEmpty#148] +Keys: [] +Functions [3]: [sum(sales#134), sum(returns#135), sum(profit#136)] +Aggregate Attributes [3]: [sum(sales#134)#149, sum(returns#135)#150, sum(profit#136)#151] +Results [5]: [null AS channel#152, null AS id#153, sum(sales#134)#149 AS sales#154, sum(returns#135)#150 AS returns#155, sum(profit#136)#151 AS profit#156] + +(111) Union + +(112) HashAggregate [codegen id : 18] +Input [5]: [channel#29, id#30, sales#109, returns#110, profit#111] +Keys [5]: [channel#29, id#30, sales#109, returns#110, profit#111] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#29, id#30, sales#109, returns#110, profit#111] + +(113) Exchange +Input [5]: [channel#29, id#30, sales#109, returns#110, profit#111] +Arguments: hashpartitioning(channel#29, id#30, sales#109, returns#110, profit#111, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(114) HashAggregate [codegen id : 19] +Input [5]: [channel#29, id#30, sales#109, returns#110, profit#111] +Keys [5]: [channel#29, id#30, sales#109, returns#110, profit#111] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#29, id#30, sales#109, returns#110, profit#111] + +(115) TakeOrderedAndProject +Input [5]: [channel#29, id#30, sales#109, returns#110, profit#111] +Arguments: 100, [channel#29 ASC NULLS FIRST, id#30 ASC NULLS FIRST], [channel#29, id#30, sales#109, returns#110, profit#111] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..854839605f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q80a.native_iceberg_compat/simplified.txt @@ -0,0 +1,137 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (19) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (18) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (5) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (4) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (1) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExchange [s_store_id] #3 + CometHashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] + CometProject [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id,p_promo_sk] + CometProject [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id,i_item_sk] + CometProject [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_sk,s_store_id] + CometProject [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + CometBroadcastHashJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss,d_date_sk] + CometProject [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss] + CometSortMergeJoin [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometSort [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometExchange [ss_item_sk,ss_ticket_number] #4 + CometFilter [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + CometSort [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometExchange [sr_item_sk,sr_ticket_number] #5 + CometProject [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + CometFilter [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometScan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + CometBroadcastExchange [d_date_sk] #6 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + CometBroadcastExchange [s_store_sk,s_store_id] #7 + CometFilter [s_store_sk,s_store_id] + CometScan parquet spark_catalog.default.store [s_store_sk,s_store_id] + CometBroadcastExchange [i_item_sk] #8 + CometProject [i_item_sk] + CometFilter [i_item_sk,i_current_price] + CometScan parquet spark_catalog.default.item [i_item_sk,i_current_price] + CometBroadcastExchange [p_promo_sk] #9 + CometProject [p_promo_sk] + CometFilter [p_promo_sk,p_channel_tv] + CometScan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv] + WholeStageCodegen (2) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExchange [cp_catalog_page_id] #10 + CometHashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] + CometProject [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id,p_promo_sk] + CometProject [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id,i_item_sk] + CometProject [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + CometBroadcastHashJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_sk,cp_catalog_page_id] + CometProject [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + CometBroadcastHashJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss,d_date_sk] + CometProject [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss] + CometSortMergeJoin [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + CometSort [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometExchange [cs_item_sk,cs_order_number] #11 + CometFilter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometScan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + CometSort [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + CometExchange [cr_item_sk,cr_order_number] #12 + CometProject [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + CometFilter [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk] + CometScan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk] + ReusedExchange [d_date_sk] #6 + CometBroadcastExchange [cp_catalog_page_sk,cp_catalog_page_id] #13 + CometFilter [cp_catalog_page_sk,cp_catalog_page_id] + CometScan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + ReusedExchange [i_item_sk] #8 + ReusedExchange [p_promo_sk] #9 + WholeStageCodegen (3) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + ColumnarToRow + InputAdapter + CometExchange [web_site_id] #14 + CometHashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] + CometProject [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + CometBroadcastHashJoin [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id,p_promo_sk] + CometProject [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + CometBroadcastHashJoin [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id,i_item_sk] + CometProject [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + CometBroadcastHashJoin [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_sk,web_site_id] + CometProject [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + CometBroadcastHashJoin [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss,d_date_sk] + CometProject [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss] + CometSortMergeJoin [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + CometSort [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometExchange [ws_item_sk,ws_order_number] #15 + CometFilter [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + CometSort [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + CometExchange [wr_item_sk,wr_order_number] #16 + CometProject [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + CometFilter [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + CometScan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + ReusedExchange [d_date_sk] #6 + CometBroadcastExchange [web_site_sk,web_site_id] #17 + CometFilter [web_site_sk,web_site_id] + CometScan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] + ReusedExchange [i_item_sk] #8 + ReusedExchange [p_promo_sk] #9 + WholeStageCodegen (11) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #18 + WholeStageCodegen (10) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (17) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #19 + WholeStageCodegen (16) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_datafusion/explain.txt new file mode 100644 index 0000000000..4fe52ded5c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_datafusion/explain.txt @@ -0,0 +1,193 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * Project (35) + +- Window (34) + +- * ColumnarToRow (33) + +- CometSort (32) + +- CometExchange (31) + +- CometHashAggregate (30) + +- CometExchange (29) + +- CometHashAggregate (28) + +- CometUnion (27) + :- CometHashAggregate (16) + : +- CometExchange (15) + : +- CometHashAggregate (14) + : +- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometNativeScan: `spark_catalog`.`default`.`web_sales` (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (9) + :- CometHashAggregate (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometHashAggregate (18) + : +- ReusedExchange (17) + +- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometHashAggregate (23) + +- ReusedExchange (22) + + +(1) CometNativeScan: `spark_catalog`.`default`.`web_sales` +Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Arguments: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4, d_month_seq#5] + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#4] +Arguments: [ws_item_sk#1, ws_net_paid#2], [ws_item_sk#1, ws_net_paid#2] + +(9) CometNativeScan: `spark_catalog`.`default`.`item` +Output [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [i_item_sk#6, i_class#7, i_category#8] + +(10) CometFilter +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Condition : isnotnull(i_item_sk#6) + +(11) CometBroadcastExchange +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [i_item_sk#6, i_class#7, i_category#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#1, ws_net_paid#2] +Right output [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [ws_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] +Arguments: [ws_net_paid#2, i_class#7, i_category#8], [ws_net_paid#2, i_class#7, i_category#8] + +(14) CometHashAggregate +Input [3]: [ws_net_paid#2, i_class#7, i_category#8] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] + +(15) CometExchange +Input [3]: [i_category#8, i_class#7, sum#9] +Arguments: hashpartitioning(i_category#8, i_class#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [i_category#8, i_class#7, sum#9] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] + +(17) ReusedExchange [Reuses operator id: 15] +Output [3]: [i_category#10, i_class#11, sum#12] + +(18) CometHashAggregate +Input [3]: [i_category#10, i_class#11, sum#12] +Keys [2]: [i_category#10, i_class#11] +Functions [1]: [sum(UnscaledValue(ws_net_paid#13))] + +(19) CometHashAggregate +Input [2]: [total_sum#14, i_category#10] +Keys [1]: [i_category#10] +Functions [1]: [partial_sum(total_sum#14)] + +(20) CometExchange +Input [3]: [i_category#10, sum#15, isEmpty#16] +Arguments: hashpartitioning(i_category#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(21) CometHashAggregate +Input [3]: [i_category#10, sum#15, isEmpty#16] +Keys [1]: [i_category#10] +Functions [1]: [sum(total_sum#14)] + +(22) ReusedExchange [Reuses operator id: 15] +Output [3]: [i_category#17, i_class#18, sum#19] + +(23) CometHashAggregate +Input [3]: [i_category#17, i_class#18, sum#19] +Keys [2]: [i_category#17, i_class#18] +Functions [1]: [sum(UnscaledValue(ws_net_paid#20))] + +(24) CometHashAggregate +Input [1]: [total_sum#21] +Keys: [] +Functions [1]: [partial_sum(total_sum#21)] + +(25) CometExchange +Input [2]: [sum#22, isEmpty#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(26) CometHashAggregate +Input [2]: [sum#22, isEmpty#23] +Keys: [] +Functions [1]: [sum(total_sum#21)] + +(27) CometUnion +Child 0 Input [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Child 1 Input [6]: [total_sum#28, i_category#10, i_class#29, g_category#30, g_class#31, lochierarchy#32] +Child 2 Input [6]: [total_sum#33, i_category#34, i_class#35, g_category#36, g_class#37, lochierarchy#38] + +(28) CometHashAggregate +Input [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Keys [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Functions: [] + +(29) CometExchange +Input [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Arguments: hashpartitioning(total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(30) CometHashAggregate +Input [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Keys [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Functions: [] + +(31) CometExchange +Input [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39] +Arguments: hashpartitioning(lochierarchy#27, _w0#39, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(32) CometSort +Input [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39] +Arguments: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39], [lochierarchy#27 ASC NULLS FIRST, _w0#39 ASC NULLS FIRST, total_sum#24 DESC NULLS LAST] + +(33) ColumnarToRow [codegen id : 1] +Input [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39] + +(34) Window +Input [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39] +Arguments: [rank(total_sum#24) windowspecdefinition(lochierarchy#27, _w0#39, total_sum#24 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#40], [lochierarchy#27, _w0#39], [total_sum#24 DESC NULLS LAST] + +(35) Project [codegen id : 2] +Output [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, rank_within_parent#40] +Input [6]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39, rank_within_parent#40] + +(36) TakeOrderedAndProject +Input [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, rank_within_parent#40] +Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (lochierarchy#27 = 0) THEN i_category#8 END ASC NULLS FIRST, rank_within_parent#40 ASC NULLS FIRST], [total_sum#24, i_category#8, i_class#7, lochierarchy#27, rank_within_parent#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..5803acd613 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_datafusion/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] + WholeStageCodegen (2) + Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [total_sum,lochierarchy,_w0] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [total_sum,i_category,i_class,lochierarchy,_w0] + CometExchange [lochierarchy,_w0] #1 + CometHashAggregate [total_sum,i_category,i_class,lochierarchy,_w0,g_category,g_class] + CometExchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #2 + CometHashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] + CometUnion [total_sum,i_category,i_class,g_category,g_class,lochierarchy] + CometHashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,sum(UnscaledValue(ws_net_paid))] + CometExchange [i_category,i_class] #3 + CometHashAggregate [i_category,i_class,sum,ws_net_paid] + CometProject [ws_net_paid,i_class,i_category] + CometBroadcastHashJoin [ws_item_sk,ws_net_paid,i_item_sk,i_class,i_category] + CometProject [ws_item_sk,ws_net_paid] + CometBroadcastHashJoin [ws_item_sk,ws_net_paid,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_net_paid,ws_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`web_sales` [ws_item_sk,ws_net_paid,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_class,i_category] #5 + CometFilter [i_item_sk,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_class,i_category] + CometHashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,isEmpty,sum(total_sum)] + CometExchange [i_category] #6 + CometHashAggregate [i_category,sum,isEmpty,total_sum] + CometHashAggregate [total_sum,i_category,i_class,sum,sum(UnscaledValue(ws_net_paid))] + ReusedExchange [i_category,i_class,sum] #3 + CometHashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,isEmpty,sum(total_sum)] + CometExchange #7 + CometHashAggregate [sum,isEmpty,total_sum] + CometHashAggregate [total_sum,i_category,i_class,sum,sum(UnscaledValue(ws_net_paid))] + ReusedExchange [i_category,i_class,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..4d13fa7c7a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_iceberg_compat/explain.txt @@ -0,0 +1,203 @@ +== Physical Plan == +TakeOrderedAndProject (36) ++- * Project (35) + +- Window (34) + +- * ColumnarToRow (33) + +- CometSort (32) + +- CometExchange (31) + +- CometHashAggregate (30) + +- CometExchange (29) + +- CometHashAggregate (28) + +- CometUnion (27) + :- CometHashAggregate (16) + : +- CometExchange (15) + : +- CometHashAggregate (14) + : +- CometProject (13) + : +- CometBroadcastHashJoin (12) + : :- CometProject (8) + : : +- CometBroadcastHashJoin (7) + : : :- CometFilter (2) + : : : +- CometScan parquet spark_catalog.default.web_sales (1) + : : +- CometBroadcastExchange (6) + : : +- CometProject (5) + : : +- CometFilter (4) + : : +- CometScan parquet spark_catalog.default.date_dim (3) + : +- CometBroadcastExchange (11) + : +- CometFilter (10) + : +- CometScan parquet spark_catalog.default.item (9) + :- CometHashAggregate (21) + : +- CometExchange (20) + : +- CometHashAggregate (19) + : +- CometHashAggregate (18) + : +- ReusedExchange (17) + +- CometHashAggregate (26) + +- CometExchange (25) + +- CometHashAggregate (24) + +- CometHashAggregate (23) + +- ReusedExchange (22) + + +(1) CometScan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(3) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(4) CometFilter +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(5) CometProject +Input [2]: [d_date_sk#4, d_month_seq#5] +Arguments: [d_date_sk#4], [d_date_sk#4] + +(6) CometBroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: [d_date_sk#4] + +(7) CometBroadcastHashJoin +Left output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Right output [1]: [d_date_sk#4] +Arguments: [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + +(8) CometProject +Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#4] +Arguments: [ws_item_sk#1, ws_net_paid#2], [ws_item_sk#1, ws_net_paid#2] + +(9) CometScan parquet spark_catalog.default.item +Output [3]: [i_item_sk#6, i_class#7, i_category#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(10) CometFilter +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Condition : isnotnull(i_item_sk#6) + +(11) CometBroadcastExchange +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [i_item_sk#6, i_class#7, i_category#8] + +(12) CometBroadcastHashJoin +Left output [2]: [ws_item_sk#1, ws_net_paid#2] +Right output [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: [ws_item_sk#1], [i_item_sk#6], Inner, BuildRight + +(13) CometProject +Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] +Arguments: [ws_net_paid#2, i_class#7, i_category#8], [ws_net_paid#2, i_class#7, i_category#8] + +(14) CometHashAggregate +Input [3]: [ws_net_paid#2, i_class#7, i_category#8] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] + +(15) CometExchange +Input [3]: [i_category#8, i_class#7, sum#9] +Arguments: hashpartitioning(i_category#8, i_class#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [3]: [i_category#8, i_class#7, sum#9] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] + +(17) ReusedExchange [Reuses operator id: 15] +Output [3]: [i_category#10, i_class#11, sum#12] + +(18) CometHashAggregate +Input [3]: [i_category#10, i_class#11, sum#12] +Keys [2]: [i_category#10, i_class#11] +Functions [1]: [sum(UnscaledValue(ws_net_paid#13))] + +(19) CometHashAggregate +Input [2]: [total_sum#14, i_category#10] +Keys [1]: [i_category#10] +Functions [1]: [partial_sum(total_sum#14)] + +(20) CometExchange +Input [3]: [i_category#10, sum#15, isEmpty#16] +Arguments: hashpartitioning(i_category#10, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(21) CometHashAggregate +Input [3]: [i_category#10, sum#15, isEmpty#16] +Keys [1]: [i_category#10] +Functions [1]: [sum(total_sum#14)] + +(22) ReusedExchange [Reuses operator id: 15] +Output [3]: [i_category#17, i_class#18, sum#19] + +(23) CometHashAggregate +Input [3]: [i_category#17, i_class#18, sum#19] +Keys [2]: [i_category#17, i_class#18] +Functions [1]: [sum(UnscaledValue(ws_net_paid#20))] + +(24) CometHashAggregate +Input [1]: [total_sum#21] +Keys: [] +Functions [1]: [partial_sum(total_sum#21)] + +(25) CometExchange +Input [2]: [sum#22, isEmpty#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(26) CometHashAggregate +Input [2]: [sum#22, isEmpty#23] +Keys: [] +Functions [1]: [sum(total_sum#21)] + +(27) CometUnion +Child 0 Input [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Child 1 Input [6]: [total_sum#28, i_category#10, i_class#29, g_category#30, g_class#31, lochierarchy#32] +Child 2 Input [6]: [total_sum#33, i_category#34, i_class#35, g_category#36, g_class#37, lochierarchy#38] + +(28) CometHashAggregate +Input [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Keys [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Functions: [] + +(29) CometExchange +Input [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Arguments: hashpartitioning(total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=4] + +(30) CometHashAggregate +Input [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Keys [6]: [total_sum#24, i_category#8, i_class#7, g_category#25, g_class#26, lochierarchy#27] +Functions: [] + +(31) CometExchange +Input [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39] +Arguments: hashpartitioning(lochierarchy#27, _w0#39, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=5] + +(32) CometSort +Input [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39] +Arguments: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39], [lochierarchy#27 ASC NULLS FIRST, _w0#39 ASC NULLS FIRST, total_sum#24 DESC NULLS LAST] + +(33) ColumnarToRow [codegen id : 1] +Input [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39] + +(34) Window +Input [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39] +Arguments: [rank(total_sum#24) windowspecdefinition(lochierarchy#27, _w0#39, total_sum#24 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#40], [lochierarchy#27, _w0#39], [total_sum#24 DESC NULLS LAST] + +(35) Project [codegen id : 2] +Output [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, rank_within_parent#40] +Input [6]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, _w0#39, rank_within_parent#40] + +(36) TakeOrderedAndProject +Input [5]: [total_sum#24, i_category#8, i_class#7, lochierarchy#27, rank_within_parent#40] +Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (lochierarchy#27 = 0) THEN i_category#8 END ASC NULLS FIRST, rank_within_parent#40 ASC NULLS FIRST], [total_sum#24, i_category#8, i_class#7, lochierarchy#27, rank_within_parent#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..aa9c833020 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q86a.native_iceberg_compat/simplified.txt @@ -0,0 +1,40 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] + WholeStageCodegen (2) + Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [total_sum,lochierarchy,_w0] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [total_sum,i_category,i_class,lochierarchy,_w0] + CometExchange [lochierarchy,_w0] #1 + CometHashAggregate [total_sum,i_category,i_class,lochierarchy,_w0,g_category,g_class] + CometExchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #2 + CometHashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] + CometUnion [total_sum,i_category,i_class,g_category,g_class,lochierarchy] + CometHashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,sum(UnscaledValue(ws_net_paid))] + CometExchange [i_category,i_class] #3 + CometHashAggregate [i_category,i_class,sum,ws_net_paid] + CometProject [ws_net_paid,i_class,i_category] + CometBroadcastHashJoin [ws_item_sk,ws_net_paid,i_item_sk,i_class,i_category] + CometProject [ws_item_sk,ws_net_paid] + CometBroadcastHashJoin [ws_item_sk,ws_net_paid,ws_sold_date_sk,d_date_sk] + CometFilter [ws_item_sk,ws_net_paid,ws_sold_date_sk] + CometScan parquet spark_catalog.default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] + CometBroadcastExchange [d_date_sk] #4 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_month_seq] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + CometBroadcastExchange [i_item_sk,i_class,i_category] #5 + CometFilter [i_item_sk,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + CometHashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,isEmpty,sum(total_sum)] + CometExchange [i_category] #6 + CometHashAggregate [i_category,sum,isEmpty,total_sum] + CometHashAggregate [total_sum,i_category,i_class,sum,sum(UnscaledValue(ws_net_paid))] + ReusedExchange [i_category,i_class,sum] #3 + CometHashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,isEmpty,sum(total_sum)] + CometExchange #7 + CometHashAggregate [sum,isEmpty,total_sum] + CometHashAggregate [total_sum,i_category,i_class,sum,sum(UnscaledValue(ws_net_paid))] + ReusedExchange [i_category,i_class,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_datafusion/explain.txt new file mode 100644 index 0000000000..b2dbca0416 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_datafusion/explain.txt @@ -0,0 +1,125 @@ +== Physical Plan == +* ColumnarToRow (24) ++- CometSort (23) + +- CometColumnarExchange (22) + +- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometNativeScan: `spark_catalog`.`default`.`store_sales` (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometNativeScan: `spark_catalog`.`default`.`item` (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometNativeScan: `spark_catalog`.`default`.`date_dim` (8) + + +(1) CometNativeScan: `spark_catalog`.`default`.`store_sales` +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Arguments: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(2) CometFilter +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(3) CometNativeScan: `spark_catalog`.`default`.`item` +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ss_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometNativeScan: `spark_catalog`.`default`.`date_dim` +Output [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10, d_date#11] + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [ss_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] + +(20) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, _we0#15] + +(22) CometColumnarExchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=3] + +(23) CometSort +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] +Arguments: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16], [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST] + +(24) ColumnarToRow [codegen id : 3] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ff36840a5f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_datafusion/simplified.txt @@ -0,0 +1,30 @@ +WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio] + CometColumnarExchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 + WholeStageCodegen (2) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0] + CometExchange [i_class] #2 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometNativeScan: `spark_catalog`.`default`.`store_sales` [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #4 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometNativeScan: `spark_catalog`.`default`.`item` [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometNativeScan: `spark_catalog`.`default`.`date_dim` [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..0aeed17ce0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_iceberg_compat/explain.txt @@ -0,0 +1,135 @@ +== Physical Plan == +* ColumnarToRow (24) ++- CometSort (23) + +- CometColumnarExchange (22) + +- * Project (21) + +- Window (20) + +- * ColumnarToRow (19) + +- CometSort (18) + +- CometExchange (17) + +- CometHashAggregate (16) + +- CometExchange (15) + +- CometHashAggregate (14) + +- CometProject (13) + +- CometBroadcastHashJoin (12) + :- CometProject (7) + : +- CometBroadcastHashJoin (6) + : :- CometFilter (2) + : : +- CometScan parquet spark_catalog.default.store_sales (1) + : +- CometBroadcastExchange (5) + : +- CometFilter (4) + : +- CometScan parquet spark_catalog.default.item (3) + +- CometBroadcastExchange (11) + +- CometProject (10) + +- CometFilter (9) + +- CometScan parquet spark_catalog.default.date_dim (8) + + +(1) CometScan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) CometFilter +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(3) CometScan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(4) CometFilter +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(5) CometBroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) CometBroadcastHashJoin +Left output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Right output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ss_item_sk#1], [i_item_sk#4], Inner, BuildRight + +(7) CometProject +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(8) CometScan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(9) CometFilter +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(10) CometProject +Input [2]: [d_date_sk#10, d_date#11] +Arguments: [d_date_sk#10], [d_date_sk#10] + +(11) CometBroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: [d_date_sk#10] + +(12) CometBroadcastHashJoin +Left output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Right output [1]: [d_date_sk#10] +Arguments: [ss_sold_date_sk#3], [d_date_sk#10], Inner, BuildRight + +(13) CometProject +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] +Arguments: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9], [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(14) CometHashAggregate +Input [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] + +(15) CometExchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=1] + +(16) CometHashAggregate +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#12] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] + +(17) CometExchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=2] + +(18) CometSort +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14], [i_class#8 ASC NULLS FIRST] + +(19) ColumnarToRow [codegen id : 1] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] + +(20) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14] +Arguments: [sum(_w0#14) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#15], [i_class#8] + +(21) Project [codegen id : 2] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, ((_w0#14 * 100) / _we0#15) AS revenueratio#16] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, _w0#14, _we0#15] + +(22) CometColumnarExchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, CometColumnarShuffle, [plan_id=3] + +(23) CometSort +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] +Arguments: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16], [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#16 ASC NULLS FIRST] + +(24) ColumnarToRow [codegen id : 3] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#13, revenueratio#16] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..97d4ee0d4f --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q98.native_iceberg_compat/simplified.txt @@ -0,0 +1,30 @@ +WholeStageCodegen (3) + ColumnarToRow + InputAdapter + CometSort [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio] + CometColumnarExchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 + WholeStageCodegen (2) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (1) + ColumnarToRow + InputAdapter + CometSort [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0] + CometExchange [i_class] #2 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,sum,sum(UnscaledValue(ss_ext_sales_price))] + CometExchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 + CometHashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum,ss_ext_sales_price] + CometProject [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category,d_date_sk] + CometProject [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastHashJoin [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometFilter [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + CometBroadcastExchange [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] #4 + CometFilter [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometScan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + CometBroadcastExchange [d_date_sk] #5 + CometProject [d_date_sk] + CometFilter [d_date_sk,d_date] + CometScan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_datafusion/explain.txt new file mode 100644 index 0000000000..948656e6bb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_datafusion/explain.txt @@ -0,0 +1,258 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * HashAggregate (44) + +- Exchange (43) + +- * HashAggregate (42) + +- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (28) + : : +- * BroadcastHashJoin LeftSemi BuildRight (27) + : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : +- BroadcastExchange (13) + : : : +- * Project (12) + : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : :- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : +- BroadcastExchange (26) + : : +- Union (25) + : : :- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- * Project (24) + : : +- * BroadcastHashJoin Inner BuildRight (23) + : : :- * ColumnarToRow (21) + : : : +- Scan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (22) + : +- BroadcastExchange (33) + : +- * Project (32) + : +- * Filter (31) + : +- * ColumnarToRow (30) + : +- Scan parquet spark_catalog.default.customer_address (29) + +- BroadcastExchange (39) + +- * Filter (38) + +- * ColumnarToRow (37) + +- Scan parquet spark_catalog.default.customer_demographics (36) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2002)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 7)) AND isnotnull(d_date_sk#6)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(10) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#4] +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#4] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#10)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#11] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#9 AS customer_sk#12] +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] + +(20) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#14)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] + +(22) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#15] + +(23) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#13 AS customer_sk#16] +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] + +(25) Union + +(26) BroadcastExchange +Input [1]: [customer_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(27) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customer_sk#12] +Join type: LeftSemi +Join condition: None + +(28) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(29) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_county#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_county#18] + +(31) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_county#18] +Condition : (ca_county#18 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#17)) + +(32) Project [codegen id : 7] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_county#18] + +(33) BroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(34) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17] + +(36) Scan parquet spark_catalog.default.customer_demographics +Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 8] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(38) Filter [codegen id : 8] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#19) + +(39) BroadcastExchange +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(40) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 9] +Output [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(42) HashAggregate [codegen id : 9] +Input [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] + +(43) Exchange +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Arguments: hashpartitioning(cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(44) HashAggregate [codegen id : 10] +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#30] +Results [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, count(1)#30 AS cnt1#31, cd_purchase_estimate#23, count(1)#30 AS cnt2#32, cd_credit_rating#24, count(1)#30 AS cnt3#33, cd_dep_count#25, count(1)#30 AS cnt4#34, cd_dep_employed_count#26, count(1)#30 AS cnt5#35, cd_dep_college_count#27, count(1)#30 AS cnt6#36] + +(45) TakeOrderedAndProject +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] +Arguments: 100, [cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_education_status#22 ASC NULLS FIRST, cd_purchase_estimate#23 ASC NULLS FIRST, cd_credit_rating#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..49aaf6d2b1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_datafusion/simplified.txt @@ -0,0 +1,68 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (10) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count] [count(1),cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + BroadcastHashJoin [c_customer_sk,customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + Union + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_county,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..948656e6bb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_iceberg_compat/explain.txt @@ -0,0 +1,258 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * HashAggregate (44) + +- Exchange (43) + +- * HashAggregate (42) + +- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (28) + : : +- * BroadcastHashJoin LeftSemi BuildRight (27) + : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : +- BroadcastExchange (13) + : : : +- * Project (12) + : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : :- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : +- BroadcastExchange (26) + : : +- Union (25) + : : :- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- * Project (24) + : : +- * BroadcastHashJoin Inner BuildRight (23) + : : :- * ColumnarToRow (21) + : : : +- Scan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (22) + : +- BroadcastExchange (33) + : +- * Project (32) + : +- * Filter (31) + : +- * ColumnarToRow (30) + : +- Scan parquet spark_catalog.default.customer_address (29) + +- BroadcastExchange (39) + +- * Filter (38) + +- * ColumnarToRow (37) + +- Scan parquet spark_catalog.default.customer_demographics (36) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2002)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 7)) AND isnotnull(d_date_sk#6)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(10) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#4] +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#4] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#10)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#11] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#9 AS customer_sk#12] +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] + +(20) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#14)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] + +(22) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#15] + +(23) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#13 AS customer_sk#16] +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] + +(25) Union + +(26) BroadcastExchange +Input [1]: [customer_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(27) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customer_sk#12] +Join type: LeftSemi +Join condition: None + +(28) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(29) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_county#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_county#18] + +(31) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_county#18] +Condition : (ca_county#18 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#17)) + +(32) Project [codegen id : 7] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_county#18] + +(33) BroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(34) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17] + +(36) Scan parquet spark_catalog.default.customer_demographics +Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 8] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(38) Filter [codegen id : 8] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#19) + +(39) BroadcastExchange +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(40) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 9] +Output [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(42) HashAggregate [codegen id : 9] +Input [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] + +(43) Exchange +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Arguments: hashpartitioning(cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(44) HashAggregate [codegen id : 10] +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#30] +Results [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, count(1)#30 AS cnt1#31, cd_purchase_estimate#23, count(1)#30 AS cnt2#32, cd_credit_rating#24, count(1)#30 AS cnt3#33, cd_dep_count#25, count(1)#30 AS cnt4#34, cd_dep_employed_count#26, count(1)#30 AS cnt5#35, cd_dep_college_count#27, count(1)#30 AS cnt6#36] + +(45) TakeOrderedAndProject +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] +Arguments: 100, [cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_education_status#22 ASC NULLS FIRST, cd_purchase_estimate#23 ASC NULLS FIRST, cd_credit_rating#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..49aaf6d2b1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.native_iceberg_compat/simplified.txt @@ -0,0 +1,68 @@ +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (10) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count] [count(1),cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count] + InputAdapter + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + BroadcastHashJoin [c_customer_sk,customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + Union + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + Project [ca_address_sk] + Filter [ca_county,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_datafusion/explain.txt new file mode 100644 index 0000000000..d487982c71 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_datafusion/explain.txt @@ -0,0 +1,418 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Project (70) + +- * BroadcastHashJoin Inner BuildRight (69) + :- * Project (55) + : +- * BroadcastHashJoin Inner BuildRight (54) + : :- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet spark_catalog.default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet spark_catalog.default.date_dim (26) + : +- BroadcastExchange (53) + : +- * Filter (52) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Project (45) + : : +- * BroadcastHashJoin Inner BuildRight (44) + : : :- * Filter (39) + : : : +- * ColumnarToRow (38) + : : : +- Scan parquet spark_catalog.default.customer (37) + : : +- BroadcastExchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet spark_catalog.default.web_sales (40) + : +- ReusedExchange (46) + +- BroadcastExchange (68) + +- * HashAggregate (67) + +- Exchange (66) + +- * HashAggregate (65) + +- * Project (64) + +- * BroadcastHashJoin Inner BuildRight (63) + :- * Project (61) + : +- * BroadcastHashJoin Inner BuildRight (60) + : :- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet spark_catalog.default.customer (56) + : +- ReusedExchange (59) + +- ReusedExchange (62) + + +(1) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(6) Filter [codegen id : 1] +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_customer_sk#9) + +(7) BroadcastExchange +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14] + +(16) HashAggregate [codegen id : 3] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] +Aggregate Attributes [1]: [sum#15] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] + +(17) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 16] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#17] +Results [2]: [c_customer_id#2 AS customer_id#18, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#17,18,2) AS year_total#19] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#18, year_total#19] +Condition : (isnotnull(year_total#19) AND (year_total#19 > 0.00)) + +(20) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Condition : (isnotnull(c_customer_sk#20) AND isnotnull(c_customer_id#21)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [4]: [ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#20] +Right keys [1]: [ss_customer_sk#28] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] +Input [12]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#32, d_year#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#32, d_year#33] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#32, d_year#33] +Condition : ((isnotnull(d_year#33) AND (d_year#33 = 2002)) AND isnotnull(d_date_sk#32)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#32, d_year#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#31] +Right keys [1]: [d_date_sk#32] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] +Input [12]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31, d_date_sk#32, d_year#33] + +(32) HashAggregate [codegen id : 6] +Input [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] +Keys [8]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))] +Aggregate Attributes [1]: [sum#34] +Results [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] + +(33) Exchange +Input [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] +Arguments: hashpartitioning(c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(34) HashAggregate [codegen id : 7] +Input [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] +Keys [8]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))#17] +Results [5]: [c_customer_id#21 AS customer_id#36, c_first_name#22 AS customer_first_name#37, c_last_name#23 AS customer_last_name#38, c_email_address#27 AS customer_email_address#39, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))#17,18,2) AS year_total#40] + +(35) BroadcastExchange +Input [5]: [customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39, year_total#40] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#36] +Join type: Inner +Join condition: None + +(37) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48] + +(39) Filter [codegen id : 10] +Input [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48] +Condition : (isnotnull(c_customer_sk#41) AND isnotnull(c_customer_id#42)) + +(40) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#52)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] + +(42) Filter [codegen id : 8] +Input [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Condition : isnotnull(ws_bill_customer_sk#49) + +(43) BroadcastExchange +Input [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#41] +Right keys [1]: [ws_bill_customer_sk#49] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 10] +Output [10]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Input [12]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#53, d_year#54] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#52] +Right keys [1]: [d_date_sk#53] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [10]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, d_year#54] +Input [12]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52, d_date_sk#53, d_year#54] + +(49) HashAggregate [codegen id : 10] +Input [10]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, d_year#54] +Keys [8]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))] +Aggregate Attributes [1]: [sum#55] +Results [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56] + +(50) Exchange +Input [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56] +Arguments: hashpartitioning(c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 11] +Input [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56] +Keys [8]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))#57] +Results [2]: [c_customer_id#42 AS customer_id#58, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))#57,18,2) AS year_total#59] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#58, year_total#59] +Condition : (isnotnull(year_total#59) AND (year_total#59 > 0.00)) + +(53) BroadcastExchange +Input [2]: [customer_id#58, year_total#59] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(54) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#58] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 16] +Output [8]: [customer_id#18, year_total#19, customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39, year_total#40, year_total#59] +Input [9]: [customer_id#18, year_total#19, customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39, year_total#40, customer_id#58, year_total#59] + +(56) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67] + +(58) Filter [codegen id : 14] +Input [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67] +Condition : (isnotnull(c_customer_sk#60) AND isnotnull(c_customer_id#61)) + +(59) ReusedExchange [Reuses operator id: 43] +Output [4]: [ws_bill_customer_sk#68, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#60] +Right keys [1]: [ws_bill_customer_sk#68] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 14] +Output [10]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] +Input [12]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_bill_customer_sk#68, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] + +(62) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#72, d_year#73] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#71] +Right keys [1]: [d_date_sk#72] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 14] +Output [10]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, d_year#73] +Input [12]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71, d_date_sk#72, d_year#73] + +(65) HashAggregate [codegen id : 14] +Input [10]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, d_year#73] +Keys [8]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))] +Aggregate Attributes [1]: [sum#74] +Results [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75] + +(66) Exchange +Input [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75] +Arguments: hashpartitioning(c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(67) HashAggregate [codegen id : 15] +Input [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75] +Keys [8]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))#57] +Results [2]: [c_customer_id#61 AS customer_id#76, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))#57,18,2) AS year_total#77] + +(68) BroadcastExchange +Input [2]: [customer_id#76, year_total#77] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(69) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#76] +Join type: Inner +Join condition: (CASE WHEN (year_total#59 > 0.00) THEN (year_total#77 / year_total#59) ELSE 0E-20 END > CASE WHEN (year_total#19 > 0.00) THEN (year_total#40 / year_total#19) ELSE 0E-20 END) + +(70) Project [codegen id : 16] +Output [4]: [customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39] +Input [10]: [customer_id#18, year_total#19, customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39, year_total#40, year_total#59, customer_id#76, year_total#77] + +(71) TakeOrderedAndProject +Input [4]: [customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39] +Arguments: 100, [customer_id#36 ASC NULLS FIRST, customer_first_name#37 ASC NULLS FIRST, customer_last_name#38 ASC NULLS FIRST, customer_email_address#39 ASC NULLS FIRST], [customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_datafusion/simplified.txt new file mode 100644 index 0000000000..999922fcf3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_datafusion/simplified.txt @@ -0,0 +1,106 @@ +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,customer_email_address] + WholeStageCodegen (16) + Project [customer_id,customer_first_name,customer_last_name,customer_email_address] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d487982c71 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_iceberg_compat/explain.txt @@ -0,0 +1,418 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Project (70) + +- * BroadcastHashJoin Inner BuildRight (69) + :- * Project (55) + : +- * BroadcastHashJoin Inner BuildRight (54) + : :- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet spark_catalog.default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet spark_catalog.default.date_dim (26) + : +- BroadcastExchange (53) + : +- * Filter (52) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Project (45) + : : +- * BroadcastHashJoin Inner BuildRight (44) + : : :- * Filter (39) + : : : +- * ColumnarToRow (38) + : : : +- Scan parquet spark_catalog.default.customer (37) + : : +- BroadcastExchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet spark_catalog.default.web_sales (40) + : +- ReusedExchange (46) + +- BroadcastExchange (68) + +- * HashAggregate (67) + +- Exchange (66) + +- * HashAggregate (65) + +- * Project (64) + +- * BroadcastHashJoin Inner BuildRight (63) + :- * Project (61) + : +- * BroadcastHashJoin Inner BuildRight (60) + : :- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet spark_catalog.default.customer (56) + : +- ReusedExchange (59) + +- ReusedExchange (62) + + +(1) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(6) Filter [codegen id : 1] +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_customer_sk#9) + +(7) BroadcastExchange +Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#9] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#13, d_year#14] + +(16) HashAggregate [codegen id : 3] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#14] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] +Aggregate Attributes [1]: [sum#15] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] + +(17) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 16] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#16] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#14, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#17] +Results [2]: [c_customer_id#2 AS customer_id#18, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#17,18,2) AS year_total#19] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#18, year_total#19] +Condition : (isnotnull(year_total#19) AND (year_total#19 > 0.00)) + +(20) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Condition : (isnotnull(c_customer_sk#20) AND isnotnull(c_customer_id#21)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [4]: [ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#20] +Right keys [1]: [ss_customer_sk#28] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] +Input [12]: [c_customer_sk#20, c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_customer_sk#28, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#32, d_year#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#32, d_year#33] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#32, d_year#33] +Condition : ((isnotnull(d_year#33) AND (d_year#33 = 2002)) AND isnotnull(d_date_sk#32)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#32, d_year#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#31] +Right keys [1]: [d_date_sk#32] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] +Input [12]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, ss_sold_date_sk#31, d_date_sk#32, d_year#33] + +(32) HashAggregate [codegen id : 6] +Input [10]: [c_customer_id#21, c_first_name#22, c_last_name#23, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, ss_ext_discount_amt#29, ss_ext_list_price#30, d_year#33] +Keys [8]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))] +Aggregate Attributes [1]: [sum#34] +Results [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] + +(33) Exchange +Input [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] +Arguments: hashpartitioning(c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(34) HashAggregate [codegen id : 7] +Input [9]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27, sum#35] +Keys [8]: [c_customer_id#21, c_first_name#22, c_last_name#23, d_year#33, c_preferred_cust_flag#24, c_birth_country#25, c_login#26, c_email_address#27] +Functions [1]: [sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))#17] +Results [5]: [c_customer_id#21 AS customer_id#36, c_first_name#22 AS customer_first_name#37, c_last_name#23 AS customer_last_name#38, c_email_address#27 AS customer_email_address#39, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#30 - ss_ext_discount_amt#29)))#17,18,2) AS year_total#40] + +(35) BroadcastExchange +Input [5]: [customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39, year_total#40] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#36] +Join type: Inner +Join condition: None + +(37) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48] + +(39) Filter [codegen id : 10] +Input [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48] +Condition : (isnotnull(c_customer_sk#41) AND isnotnull(c_customer_id#42)) + +(40) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#52)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] + +(42) Filter [codegen id : 8] +Input [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Condition : isnotnull(ws_bill_customer_sk#49) + +(43) BroadcastExchange +Input [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#41] +Right keys [1]: [ws_bill_customer_sk#49] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 10] +Output [10]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Input [12]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#53, d_year#54] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#52] +Right keys [1]: [d_date_sk#53] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [10]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, d_year#54] +Input [12]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52, d_date_sk#53, d_year#54] + +(49) HashAggregate [codegen id : 10] +Input [10]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, d_year#54] +Keys [8]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))] +Aggregate Attributes [1]: [sum#55] +Results [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56] + +(50) Exchange +Input [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56] +Arguments: hashpartitioning(c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 11] +Input [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56] +Keys [8]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))#57] +Results [2]: [c_customer_id#42 AS customer_id#58, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))#57,18,2) AS year_total#59] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#58, year_total#59] +Condition : (isnotnull(year_total#59) AND (year_total#59 > 0.00)) + +(53) BroadcastExchange +Input [2]: [customer_id#58, year_total#59] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(54) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#58] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 16] +Output [8]: [customer_id#18, year_total#19, customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39, year_total#40, year_total#59] +Input [9]: [customer_id#18, year_total#19, customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39, year_total#40, customer_id#58, year_total#59] + +(56) Scan parquet spark_catalog.default.customer +Output [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67] + +(58) Filter [codegen id : 14] +Input [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67] +Condition : (isnotnull(c_customer_sk#60) AND isnotnull(c_customer_id#61)) + +(59) ReusedExchange [Reuses operator id: 43] +Output [4]: [ws_bill_customer_sk#68, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#60] +Right keys [1]: [ws_bill_customer_sk#68] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 14] +Output [10]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] +Input [12]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_bill_customer_sk#68, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] + +(62) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#72, d_year#73] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#71] +Right keys [1]: [d_date_sk#72] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 14] +Output [10]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, d_year#73] +Input [12]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71, d_date_sk#72, d_year#73] + +(65) HashAggregate [codegen id : 14] +Input [10]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, d_year#73] +Keys [8]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73] +Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))] +Aggregate Attributes [1]: [sum#74] +Results [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75] + +(66) Exchange +Input [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75] +Arguments: hashpartitioning(c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(67) HashAggregate [codegen id : 15] +Input [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75] +Keys [8]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73] +Functions [1]: [sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))] +Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))#57] +Results [2]: [c_customer_id#61 AS customer_id#76, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))#57,18,2) AS year_total#77] + +(68) BroadcastExchange +Input [2]: [customer_id#76, year_total#77] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(69) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#18] +Right keys [1]: [customer_id#76] +Join type: Inner +Join condition: (CASE WHEN (year_total#59 > 0.00) THEN (year_total#77 / year_total#59) ELSE 0E-20 END > CASE WHEN (year_total#19 > 0.00) THEN (year_total#40 / year_total#19) ELSE 0E-20 END) + +(70) Project [codegen id : 16] +Output [4]: [customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39] +Input [10]: [customer_id#18, year_total#19, customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39, year_total#40, year_total#59, customer_id#76, year_total#77] + +(71) TakeOrderedAndProject +Input [4]: [customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39] +Arguments: 100, [customer_id#36 ASC NULLS FIRST, customer_first_name#37 ASC NULLS FIRST, customer_last_name#38 ASC NULLS FIRST, customer_email_address#39 ASC NULLS FIRST], [customer_id#36, customer_first_name#37, customer_last_name#38, customer_email_address#39] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..999922fcf3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.native_iceberg_compat/simplified.txt @@ -0,0 +1,106 @@ +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,customer_email_address] + WholeStageCodegen (16) + Project [customer_id,customer_first_name,customer_last_name,customer_email_address] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_datafusion/explain.txt new file mode 100644 index 0000000000..0369b95bc7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_datafusion/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.web_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14] +Results [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16] + +(20) Exchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, _we0#17] + +(24) TakeOrderedAndProject +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_datafusion/simplified.txt new file mode 100644 index 0000000000..52d223848e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_datafusion/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (6) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..0369b95bc7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_iceberg_compat/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.web_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ws_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14] +Results [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16] + +(20) Exchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, _we0#17] + +(24) TakeOrderedAndProject +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..52d223848e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.native_iceberg_compat/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (6) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_ext_sales_price,ws_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_datafusion/explain.txt new file mode 100644 index 0000000000..9035879b93 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_datafusion/explain.txt @@ -0,0 +1,724 @@ +== Physical Plan == +TakeOrderedAndProject (96) ++- * BroadcastHashJoin Inner BuildRight (95) + :- * Filter (74) + : +- * HashAggregate (73) + : +- Exchange (72) + : +- * HashAggregate (71) + : +- * Project (70) + : +- * BroadcastHashJoin Inner BuildRight (69) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * BroadcastHashJoin LeftSemi BuildRight (55) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (54) + : : : +- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.item (4) + : : : +- BroadcastExchange (51) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (50) + : : : :- * HashAggregate (39) + : : : : +- Exchange (38) + : : : : +- * HashAggregate (37) + : : : : +- * Project (36) + : : : : +- * BroadcastHashJoin Inner BuildRight (35) + : : : : :- * Project (33) + : : : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : : : :- * Filter (9) + : : : : : : +- * ColumnarToRow (8) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (7) + : : : : : +- BroadcastExchange (31) + : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : : : : :- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet spark_catalog.default.item (10) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Project (28) + : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : :- * Project (21) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : :- * Filter (15) + : : : : : : : +- * ColumnarToRow (14) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : +- BroadcastExchange (19) + : : : : : : +- * Filter (18) + : : : : : : +- * ColumnarToRow (17) + : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : +- BroadcastExchange (26) + : : : : : +- * Project (25) + : : : : : +- * Filter (24) + : : : : : +- * ColumnarToRow (23) + : : : : : +- Scan parquet spark_catalog.default.date_dim (22) + : : : : +- ReusedExchange (34) + : : : +- BroadcastExchange (49) + : : : +- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (45) + : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : :- * Filter (42) + : : : : : +- * ColumnarToRow (41) + : : : : : +- Scan parquet spark_catalog.default.web_sales (40) + : : : : +- ReusedExchange (43) + : : : +- ReusedExchange (46) + : : +- BroadcastExchange (61) + : : +- * BroadcastHashJoin LeftSemi BuildRight (60) + : : :- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet spark_catalog.default.item (56) + : : +- ReusedExchange (59) + : +- BroadcastExchange (68) + : +- * Project (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet spark_catalog.default.date_dim (64) + +- BroadcastExchange (94) + +- * Filter (93) + +- * HashAggregate (92) + +- Exchange (91) + +- * HashAggregate (90) + +- * Project (89) + +- * BroadcastHashJoin Inner BuildRight (88) + :- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * BroadcastHashJoin LeftSemi BuildRight (79) + : : :- * Filter (77) + : : : +- * ColumnarToRow (76) + : : : +- Scan parquet spark_catalog.default.store_sales (75) + : : +- ReusedExchange (78) + : +- ReusedExchange (80) + +- BroadcastExchange (87) + +- * Project (86) + +- * Filter (85) + +- * ColumnarToRow (84) + +- Scan parquet spark_catalog.default.date_dim (83) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] + +(9) Filter [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(10) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(12) Filter [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] + +(15) Filter [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(16) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(18) Filter [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#15] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 3] +Output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] + +(24) Filter [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1998)) AND (d_year#22 <= 2000)) AND isnotnull(d_date_sk#21)) + +(25) Project [codegen id : 2] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_year#22] + +(26) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(27) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 3] +Output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] + +(29) BroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=3] + +(30) BroadcastHashJoin [codegen id : 4] +Left keys [6]: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)] +Right keys [6]: [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)] +Join type: LeftSemi +Join condition: None + +(31) BroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#9] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(34) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#23] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#23] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] + +(37) HashAggregate [codegen id : 6] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(38) Exchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(39) HashAggregate [codegen id : 10] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(40) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] + +(42) Filter [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(43) ReusedExchange [Reuses operator id: 19] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_item_sk#27] +Right keys [1]: [i_item_sk#29] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 9] +Output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(46) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#33] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 9] +Output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] + +(49) BroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=6] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [6]: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)] +Right keys [6]: [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)] +Join type: LeftSemi +Join condition: None + +(51) BroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=7] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#24, class_id#25, category_id#26] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#34] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] + +(54) BroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(55) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(56) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(58) Filter [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : (((isnotnull(i_item_sk#35) AND isnotnull(i_brand_id#36)) AND isnotnull(i_class_id#37)) AND isnotnull(i_category_id#38)) + +(59) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(60) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#35] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(61) BroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(62) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#35] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 25] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(64) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#39, d_week_seq#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 24] +Input [2]: [d_date_sk#39, d_week_seq#40] + +(66) Filter [codegen id : 24] +Input [2]: [d_date_sk#39, d_week_seq#40] +Condition : ((isnotnull(d_week_seq#40) AND (d_week_seq#40 = Subquery scalar-subquery#41, [id=#42])) AND isnotnull(d_date_sk#39)) + +(67) Project [codegen id : 24] +Output [1]: [d_date_sk#39] +Input [2]: [d_date_sk#39, d_week_seq#40] + +(68) BroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#39] +Join type: Inner +Join condition: None + +(70) Project [codegen id : 25] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] + +(71) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] +Aggregate Attributes [3]: [sum#43, isEmpty#44, count#45] +Results [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] + +(72) Exchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#49, count(1)#50] +Results [6]: [store AS channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#49 AS sales#52, count(1)#50 AS number_sales#53] + +(74) Filter [codegen id : 52] +Input [6]: [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53] +Condition : (isnotnull(sales#52) AND (cast(sales#52 as decimal(32,6)) > cast(Subquery scalar-subquery#54, [id=#55] as decimal(32,6)))) + +(75) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#59)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 50] +Input [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] + +(77) Filter [codegen id : 50] +Input [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] +Condition : isnotnull(ss_item_sk#56) + +(78) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(79) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#56] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(80) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] + +(81) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#56] +Right keys [1]: [i_item_sk#60] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 50] +Output [6]: [ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63] +Input [8]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] + +(83) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#64, d_week_seq#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(84) ColumnarToRow [codegen id : 49] +Input [2]: [d_date_sk#64, d_week_seq#65] + +(85) Filter [codegen id : 49] +Input [2]: [d_date_sk#64, d_week_seq#65] +Condition : ((isnotnull(d_week_seq#65) AND (d_week_seq#65 = Subquery scalar-subquery#66, [id=#67])) AND isnotnull(d_date_sk#64)) + +(86) Project [codegen id : 49] +Output [1]: [d_date_sk#64] +Input [2]: [d_date_sk#64, d_week_seq#65] + +(87) BroadcastExchange +Input [1]: [d_date_sk#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(88) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_sold_date_sk#59] +Right keys [1]: [d_date_sk#64] +Join type: Inner +Join condition: None + +(89) Project [codegen id : 50] +Output [5]: [ss_quantity#57, ss_list_price#58, i_brand_id#61, i_class_id#62, i_category_id#63] +Input [7]: [ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, d_date_sk#64] + +(90) HashAggregate [codegen id : 50] +Input [5]: [ss_quantity#57, ss_list_price#58, i_brand_id#61, i_class_id#62, i_category_id#63] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [partial_sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58)), partial_count(1)] +Aggregate Attributes [3]: [sum#68, isEmpty#69, count#70] +Results [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] + +(91) Exchange +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] +Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(92) HashAggregate [codegen id : 51] +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58))#74, count(1)#75] +Results [6]: [store AS channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58))#74 AS sales#77, count(1)#75 AS number_sales#78] + +(93) Filter [codegen id : 51] +Input [6]: [channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Condition : (isnotnull(sales#77) AND (cast(sales#77 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#54, [id=#55] as decimal(32,6)))) + +(94) BroadcastExchange +Input [6]: [channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=14] + +(95) BroadcastHashJoin [codegen id : 52] +Left keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Right keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Join type: Inner +Join condition: None + +(96) TakeOrderedAndProject +Input [12]: [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53, channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Arguments: 100, [i_brand_id#36 ASC NULLS FIRST, i_class_id#37 ASC NULLS FIRST, i_category_id#38 ASC NULLS FIRST], [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53, channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 74 Hosting Expression = Subquery scalar-subquery#54, [id=#55] +* HashAggregate (115) ++- Exchange (114) + +- * HashAggregate (113) + +- Union (112) + :- * Project (101) + : +- * BroadcastHashJoin Inner BuildRight (100) + : :- * ColumnarToRow (98) + : : +- Scan parquet spark_catalog.default.store_sales (97) + : +- ReusedExchange (99) + :- * Project (106) + : +- * BroadcastHashJoin Inner BuildRight (105) + : :- * ColumnarToRow (103) + : : +- Scan parquet spark_catalog.default.catalog_sales (102) + : +- ReusedExchange (104) + +- * Project (111) + +- * BroadcastHashJoin Inner BuildRight (110) + :- * ColumnarToRow (108) + : +- Scan parquet spark_catalog.default.web_sales (107) + +- ReusedExchange (109) + + +(97) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#81)] +ReadSchema: struct + +(98) ColumnarToRow [codegen id : 2] +Input [3]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81] + +(99) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#82] + +(100) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#81] +Right keys [1]: [d_date_sk#82] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 2] +Output [2]: [ss_quantity#79 AS quantity#83, ss_list_price#80 AS list_price#84] +Input [4]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81, d_date_sk#82] + +(102) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#87)] +ReadSchema: struct + +(103) ColumnarToRow [codegen id : 4] +Input [3]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87] + +(104) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#88] + +(105) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#87] +Right keys [1]: [d_date_sk#88] +Join type: Inner +Join condition: None + +(106) Project [codegen id : 4] +Output [2]: [cs_quantity#85 AS quantity#89, cs_list_price#86 AS list_price#90] +Input [4]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87, d_date_sk#88] + +(107) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#93)] +ReadSchema: struct + +(108) ColumnarToRow [codegen id : 6] +Input [3]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93] + +(109) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#94] + +(110) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#93] +Right keys [1]: [d_date_sk#94] +Join type: Inner +Join condition: None + +(111) Project [codegen id : 6] +Output [2]: [ws_quantity#91 AS quantity#95, ws_list_price#92 AS list_price#96] +Input [4]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93, d_date_sk#94] + +(112) Union + +(113) HashAggregate [codegen id : 7] +Input [2]: [quantity#83, list_price#84] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#83 as decimal(10,0)) * list_price#84))] +Aggregate Attributes [2]: [sum#97, count#98] +Results [2]: [sum#99, count#100] + +(114) Exchange +Input [2]: [sum#99, count#100] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(115) HashAggregate [codegen id : 8] +Input [2]: [sum#99, count#100] +Keys: [] +Functions [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))] +Aggregate Attributes [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))#101] +Results [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))#101 AS average_sales#102] + +Subquery:2 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#41, [id=#42] +* Project (119) ++- * Filter (118) + +- * ColumnarToRow (117) + +- Scan parquet spark_catalog.default.date_dim (116) + + +(116) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,16)] +ReadSchema: struct + +(117) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] + +(118) Filter [codegen id : 1] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] +Condition : (((((isnotnull(d_year#104) AND isnotnull(d_moy#105)) AND isnotnull(d_dom#106)) AND (d_year#104 = 1999)) AND (d_moy#105 = 12)) AND (d_dom#106 = 16)) + +(119) Project [codegen id : 1] +Output [1]: [d_week_seq#103] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] + +Subquery:3 Hosting operator id = 93 Hosting Expression = ReusedSubquery Subquery scalar-subquery#54, [id=#55] + +Subquery:4 Hosting operator id = 85 Hosting Expression = Subquery scalar-subquery#66, [id=#67] +* Project (123) ++- * Filter (122) + +- * ColumnarToRow (121) + +- Scan parquet spark_catalog.default.date_dim (120) + + +(120) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1998), EqualTo(d_moy,12), EqualTo(d_dom,16)] +ReadSchema: struct + +(121) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] + +(122) Filter [codegen id : 1] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] +Condition : (((((isnotnull(d_year#108) AND isnotnull(d_moy#109)) AND isnotnull(d_dom#110)) AND (d_year#108 = 1998)) AND (d_moy#109 = 12)) AND (d_dom#110 = 16)) + +(123) Project [codegen id : 1] +Output [1]: [d_week_seq#107] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_datafusion/simplified.txt new file mode 100644 index 0000000000..b9a845cb11 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_datafusion/simplified.txt @@ -0,0 +1,191 @@ +TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + WholeStageCodegen (52) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [sales] + Subquery #2 + WholeStageCodegen (8) + HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #1 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (10) + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #4 + WholeStageCodegen (6) + HashAggregate [brand_id,class_id,category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (51) + Filter [sales] + ReusedSubquery [average_sales] #2 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #14 + WholeStageCodegen (50) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (49) + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #3 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..9035879b93 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_iceberg_compat/explain.txt @@ -0,0 +1,724 @@ +== Physical Plan == +TakeOrderedAndProject (96) ++- * BroadcastHashJoin Inner BuildRight (95) + :- * Filter (74) + : +- * HashAggregate (73) + : +- Exchange (72) + : +- * HashAggregate (71) + : +- * Project (70) + : +- * BroadcastHashJoin Inner BuildRight (69) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * BroadcastHashJoin LeftSemi BuildRight (55) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (54) + : : : +- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.item (4) + : : : +- BroadcastExchange (51) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (50) + : : : :- * HashAggregate (39) + : : : : +- Exchange (38) + : : : : +- * HashAggregate (37) + : : : : +- * Project (36) + : : : : +- * BroadcastHashJoin Inner BuildRight (35) + : : : : :- * Project (33) + : : : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : : : :- * Filter (9) + : : : : : : +- * ColumnarToRow (8) + : : : : : : +- Scan parquet spark_catalog.default.store_sales (7) + : : : : : +- BroadcastExchange (31) + : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : : : : :- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet spark_catalog.default.item (10) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Project (28) + : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : :- * Project (21) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : :- * Filter (15) + : : : : : : : +- * ColumnarToRow (14) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : +- BroadcastExchange (19) + : : : : : : +- * Filter (18) + : : : : : : +- * ColumnarToRow (17) + : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : +- BroadcastExchange (26) + : : : : : +- * Project (25) + : : : : : +- * Filter (24) + : : : : : +- * ColumnarToRow (23) + : : : : : +- Scan parquet spark_catalog.default.date_dim (22) + : : : : +- ReusedExchange (34) + : : : +- BroadcastExchange (49) + : : : +- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (45) + : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : :- * Filter (42) + : : : : : +- * ColumnarToRow (41) + : : : : : +- Scan parquet spark_catalog.default.web_sales (40) + : : : : +- ReusedExchange (43) + : : : +- ReusedExchange (46) + : : +- BroadcastExchange (61) + : : +- * BroadcastHashJoin LeftSemi BuildRight (60) + : : :- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet spark_catalog.default.item (56) + : : +- ReusedExchange (59) + : +- BroadcastExchange (68) + : +- * Project (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet spark_catalog.default.date_dim (64) + +- BroadcastExchange (94) + +- * Filter (93) + +- * HashAggregate (92) + +- Exchange (91) + +- * HashAggregate (90) + +- * Project (89) + +- * BroadcastHashJoin Inner BuildRight (88) + :- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * BroadcastHashJoin LeftSemi BuildRight (79) + : : :- * Filter (77) + : : : +- * ColumnarToRow (76) + : : : +- Scan parquet spark_catalog.default.store_sales (75) + : : +- ReusedExchange (78) + : +- ReusedExchange (80) + +- BroadcastExchange (87) + +- * Project (86) + +- * Filter (85) + +- * ColumnarToRow (84) + +- Scan parquet spark_catalog.default.date_dim (83) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] + +(9) Filter [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(10) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(12) Filter [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] + +(15) Filter [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(16) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(18) Filter [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#15] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 3] +Output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] + +(24) Filter [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1998)) AND (d_year#22 <= 2000)) AND isnotnull(d_date_sk#21)) + +(25) Project [codegen id : 2] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_year#22] + +(26) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(27) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 3] +Output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] + +(29) BroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=3] + +(30) BroadcastHashJoin [codegen id : 4] +Left keys [6]: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)] +Right keys [6]: [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)] +Join type: LeftSemi +Join condition: None + +(31) BroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#9] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(34) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#23] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#23] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] + +(37) HashAggregate [codegen id : 6] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(38) Exchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(39) HashAggregate [codegen id : 10] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(40) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] + +(42) Filter [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(43) ReusedExchange [Reuses operator id: 19] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_item_sk#27] +Right keys [1]: [i_item_sk#29] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 9] +Output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(46) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#33] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 9] +Output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] + +(49) BroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=6] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [6]: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)] +Right keys [6]: [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)] +Join type: LeftSemi +Join condition: None + +(51) BroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=7] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#24, class_id#25, category_id#26] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#34] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] + +(54) BroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(55) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(56) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(58) Filter [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : (((isnotnull(i_item_sk#35) AND isnotnull(i_brand_id#36)) AND isnotnull(i_class_id#37)) AND isnotnull(i_category_id#38)) + +(59) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(60) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#35] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(61) BroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(62) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#35] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 25] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(64) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#39, d_week_seq#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 24] +Input [2]: [d_date_sk#39, d_week_seq#40] + +(66) Filter [codegen id : 24] +Input [2]: [d_date_sk#39, d_week_seq#40] +Condition : ((isnotnull(d_week_seq#40) AND (d_week_seq#40 = Subquery scalar-subquery#41, [id=#42])) AND isnotnull(d_date_sk#39)) + +(67) Project [codegen id : 24] +Output [1]: [d_date_sk#39] +Input [2]: [d_date_sk#39, d_week_seq#40] + +(68) BroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#39] +Join type: Inner +Join condition: None + +(70) Project [codegen id : 25] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] + +(71) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] +Aggregate Attributes [3]: [sum#43, isEmpty#44, count#45] +Results [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] + +(72) Exchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#46, isEmpty#47, count#48] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#49, count(1)#50] +Results [6]: [store AS channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#49 AS sales#52, count(1)#50 AS number_sales#53] + +(74) Filter [codegen id : 52] +Input [6]: [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53] +Condition : (isnotnull(sales#52) AND (cast(sales#52 as decimal(32,6)) > cast(Subquery scalar-subquery#54, [id=#55] as decimal(32,6)))) + +(75) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#59)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 50] +Input [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] + +(77) Filter [codegen id : 50] +Input [4]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59] +Condition : isnotnull(ss_item_sk#56) + +(78) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(79) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#56] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(80) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] + +(81) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#56] +Right keys [1]: [i_item_sk#60] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 50] +Output [6]: [ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63] +Input [8]: [ss_item_sk#56, ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] + +(83) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#64, d_week_seq#65] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(84) ColumnarToRow [codegen id : 49] +Input [2]: [d_date_sk#64, d_week_seq#65] + +(85) Filter [codegen id : 49] +Input [2]: [d_date_sk#64, d_week_seq#65] +Condition : ((isnotnull(d_week_seq#65) AND (d_week_seq#65 = Subquery scalar-subquery#66, [id=#67])) AND isnotnull(d_date_sk#64)) + +(86) Project [codegen id : 49] +Output [1]: [d_date_sk#64] +Input [2]: [d_date_sk#64, d_week_seq#65] + +(87) BroadcastExchange +Input [1]: [d_date_sk#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(88) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_sold_date_sk#59] +Right keys [1]: [d_date_sk#64] +Join type: Inner +Join condition: None + +(89) Project [codegen id : 50] +Output [5]: [ss_quantity#57, ss_list_price#58, i_brand_id#61, i_class_id#62, i_category_id#63] +Input [7]: [ss_quantity#57, ss_list_price#58, ss_sold_date_sk#59, i_brand_id#61, i_class_id#62, i_category_id#63, d_date_sk#64] + +(90) HashAggregate [codegen id : 50] +Input [5]: [ss_quantity#57, ss_list_price#58, i_brand_id#61, i_class_id#62, i_category_id#63] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [partial_sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58)), partial_count(1)] +Aggregate Attributes [3]: [sum#68, isEmpty#69, count#70] +Results [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] + +(91) Exchange +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] +Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(92) HashAggregate [codegen id : 51] +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#71, isEmpty#72, count#73] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58))#74, count(1)#75] +Results [6]: [store AS channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sum((cast(ss_quantity#57 as decimal(10,0)) * ss_list_price#58))#74 AS sales#77, count(1)#75 AS number_sales#78] + +(93) Filter [codegen id : 51] +Input [6]: [channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Condition : (isnotnull(sales#77) AND (cast(sales#77 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#54, [id=#55] as decimal(32,6)))) + +(94) BroadcastExchange +Input [6]: [channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=14] + +(95) BroadcastHashJoin [codegen id : 52] +Left keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Right keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Join type: Inner +Join condition: None + +(96) TakeOrderedAndProject +Input [12]: [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53, channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] +Arguments: 100, [i_brand_id#36 ASC NULLS FIRST, i_class_id#37 ASC NULLS FIRST, i_category_id#38 ASC NULLS FIRST], [channel#51, i_brand_id#36, i_class_id#37, i_category_id#38, sales#52, number_sales#53, channel#76, i_brand_id#61, i_class_id#62, i_category_id#63, sales#77, number_sales#78] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 74 Hosting Expression = Subquery scalar-subquery#54, [id=#55] +* HashAggregate (115) ++- Exchange (114) + +- * HashAggregate (113) + +- Union (112) + :- * Project (101) + : +- * BroadcastHashJoin Inner BuildRight (100) + : :- * ColumnarToRow (98) + : : +- Scan parquet spark_catalog.default.store_sales (97) + : +- ReusedExchange (99) + :- * Project (106) + : +- * BroadcastHashJoin Inner BuildRight (105) + : :- * ColumnarToRow (103) + : : +- Scan parquet spark_catalog.default.catalog_sales (102) + : +- ReusedExchange (104) + +- * Project (111) + +- * BroadcastHashJoin Inner BuildRight (110) + :- * ColumnarToRow (108) + : +- Scan parquet spark_catalog.default.web_sales (107) + +- ReusedExchange (109) + + +(97) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#81)] +ReadSchema: struct + +(98) ColumnarToRow [codegen id : 2] +Input [3]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81] + +(99) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#82] + +(100) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#81] +Right keys [1]: [d_date_sk#82] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 2] +Output [2]: [ss_quantity#79 AS quantity#83, ss_list_price#80 AS list_price#84] +Input [4]: [ss_quantity#79, ss_list_price#80, ss_sold_date_sk#81, d_date_sk#82] + +(102) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#87)] +ReadSchema: struct + +(103) ColumnarToRow [codegen id : 4] +Input [3]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87] + +(104) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#88] + +(105) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#87] +Right keys [1]: [d_date_sk#88] +Join type: Inner +Join condition: None + +(106) Project [codegen id : 4] +Output [2]: [cs_quantity#85 AS quantity#89, cs_list_price#86 AS list_price#90] +Input [4]: [cs_quantity#85, cs_list_price#86, cs_sold_date_sk#87, d_date_sk#88] + +(107) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#93)] +ReadSchema: struct + +(108) ColumnarToRow [codegen id : 6] +Input [3]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93] + +(109) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#94] + +(110) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#93] +Right keys [1]: [d_date_sk#94] +Join type: Inner +Join condition: None + +(111) Project [codegen id : 6] +Output [2]: [ws_quantity#91 AS quantity#95, ws_list_price#92 AS list_price#96] +Input [4]: [ws_quantity#91, ws_list_price#92, ws_sold_date_sk#93, d_date_sk#94] + +(112) Union + +(113) HashAggregate [codegen id : 7] +Input [2]: [quantity#83, list_price#84] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#83 as decimal(10,0)) * list_price#84))] +Aggregate Attributes [2]: [sum#97, count#98] +Results [2]: [sum#99, count#100] + +(114) Exchange +Input [2]: [sum#99, count#100] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(115) HashAggregate [codegen id : 8] +Input [2]: [sum#99, count#100] +Keys: [] +Functions [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))] +Aggregate Attributes [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))#101] +Results [1]: [avg((cast(quantity#83 as decimal(10,0)) * list_price#84))#101 AS average_sales#102] + +Subquery:2 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#41, [id=#42] +* Project (119) ++- * Filter (118) + +- * ColumnarToRow (117) + +- Scan parquet spark_catalog.default.date_dim (116) + + +(116) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,16)] +ReadSchema: struct + +(117) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] + +(118) Filter [codegen id : 1] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] +Condition : (((((isnotnull(d_year#104) AND isnotnull(d_moy#105)) AND isnotnull(d_dom#106)) AND (d_year#104 = 1999)) AND (d_moy#105 = 12)) AND (d_dom#106 = 16)) + +(119) Project [codegen id : 1] +Output [1]: [d_week_seq#103] +Input [4]: [d_week_seq#103, d_year#104, d_moy#105, d_dom#106] + +Subquery:3 Hosting operator id = 93 Hosting Expression = ReusedSubquery Subquery scalar-subquery#54, [id=#55] + +Subquery:4 Hosting operator id = 85 Hosting Expression = Subquery scalar-subquery#66, [id=#67] +* Project (123) ++- * Filter (122) + +- * ColumnarToRow (121) + +- Scan parquet spark_catalog.default.date_dim (120) + + +(120) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1998), EqualTo(d_moy,12), EqualTo(d_dom,16)] +ReadSchema: struct + +(121) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] + +(122) Filter [codegen id : 1] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] +Condition : (((((isnotnull(d_year#108) AND isnotnull(d_moy#109)) AND isnotnull(d_dom#110)) AND (d_year#108 = 1998)) AND (d_moy#109 = 12)) AND (d_dom#110 = 16)) + +(123) Project [codegen id : 1] +Output [1]: [d_week_seq#107] +Input [4]: [d_week_seq#107, d_year#108, d_moy#109, d_dom#110] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..b9a845cb11 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.native_iceberg_compat/simplified.txt @@ -0,0 +1,191 @@ +TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + WholeStageCodegen (52) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [sales] + Subquery #2 + WholeStageCodegen (8) + HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #8 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #1 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (10) + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #4 + WholeStageCodegen (6) + HashAggregate [brand_id,class_id,category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (9) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #7 + InputAdapter + ReusedExchange [d_date_sk] #8 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (51) + Filter [sales] + ReusedSubquery [average_sales] #2 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #14 + WholeStageCodegen (50) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #2 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (49) + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #3 + WholeStageCodegen (1) + Project [d_week_seq] + Filter [d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_datafusion/explain.txt new file mode 100644 index 0000000000..5f5d49ab21 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_datafusion/explain.txt @@ -0,0 +1,931 @@ +== Physical Plan == +TakeOrderedAndProject (133) ++- * HashAggregate (132) + +- Exchange (131) + +- * HashAggregate (130) + +- Union (129) + :- * HashAggregate (108) + : +- Exchange (107) + : +- * HashAggregate (106) + : +- Union (105) + : :- * Filter (74) + : : +- * HashAggregate (73) + : : +- Exchange (72) + : : +- * HashAggregate (71) + : : +- * Project (70) + : : +- * BroadcastHashJoin Inner BuildRight (69) + : : :- * Project (63) + : : : +- * BroadcastHashJoin Inner BuildRight (62) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (55) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- BroadcastExchange (54) + : : : : +- * Project (53) + : : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.item (4) + : : : : +- BroadcastExchange (51) + : : : : +- * BroadcastHashJoin LeftSemi BuildRight (50) + : : : : :- * HashAggregate (39) + : : : : : +- Exchange (38) + : : : : : +- * HashAggregate (37) + : : : : : +- * Project (36) + : : : : : +- * BroadcastHashJoin Inner BuildRight (35) + : : : : : :- * Project (33) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : : : : :- * Filter (9) + : : : : : : : +- * ColumnarToRow (8) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (7) + : : : : : : +- BroadcastExchange (31) + : : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : : : : : :- * Filter (12) + : : : : : : : +- * ColumnarToRow (11) + : : : : : : : +- Scan parquet spark_catalog.default.item (10) + : : : : : : +- BroadcastExchange (29) + : : : : : : +- * Project (28) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : : :- * Project (21) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : : :- * Filter (15) + : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : : +- BroadcastExchange (19) + : : : : : : : +- * Filter (18) + : : : : : : : +- * ColumnarToRow (17) + : : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : : +- BroadcastExchange (26) + : : : : : : +- * Project (25) + : : : : : : +- * Filter (24) + : : : : : : +- * ColumnarToRow (23) + : : : : : : +- Scan parquet spark_catalog.default.date_dim (22) + : : : : : +- ReusedExchange (34) + : : : : +- BroadcastExchange (49) + : : : : +- * Project (48) + : : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : : :- * Project (45) + : : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : : :- * Filter (42) + : : : : : : +- * ColumnarToRow (41) + : : : : : : +- Scan parquet spark_catalog.default.web_sales (40) + : : : : : +- ReusedExchange (43) + : : : : +- ReusedExchange (46) + : : : +- BroadcastExchange (61) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (60) + : : : :- * Filter (58) + : : : : +- * ColumnarToRow (57) + : : : : +- Scan parquet spark_catalog.default.item (56) + : : : +- ReusedExchange (59) + : : +- BroadcastExchange (68) + : : +- * Project (67) + : : +- * Filter (66) + : : +- * ColumnarToRow (65) + : : +- Scan parquet spark_catalog.default.date_dim (64) + : :- * Filter (89) + : : +- * HashAggregate (88) + : : +- Exchange (87) + : : +- * HashAggregate (86) + : : +- * Project (85) + : : +- * BroadcastHashJoin Inner BuildRight (84) + : : :- * Project (82) + : : : +- * BroadcastHashJoin Inner BuildRight (81) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (79) + : : : : :- * Filter (77) + : : : : : +- * ColumnarToRow (76) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (75) + : : : : +- ReusedExchange (78) + : : : +- ReusedExchange (80) + : : +- ReusedExchange (83) + : +- * Filter (104) + : +- * HashAggregate (103) + : +- Exchange (102) + : +- * HashAggregate (101) + : +- * Project (100) + : +- * BroadcastHashJoin Inner BuildRight (99) + : :- * Project (97) + : : +- * BroadcastHashJoin Inner BuildRight (96) + : : :- * BroadcastHashJoin LeftSemi BuildRight (94) + : : : :- * Filter (92) + : : : : +- * ColumnarToRow (91) + : : : : +- Scan parquet spark_catalog.default.web_sales (90) + : : : +- ReusedExchange (93) + : : +- ReusedExchange (95) + : +- ReusedExchange (98) + :- * HashAggregate (113) + : +- Exchange (112) + : +- * HashAggregate (111) + : +- * HashAggregate (110) + : +- ReusedExchange (109) + :- * HashAggregate (118) + : +- Exchange (117) + : +- * HashAggregate (116) + : +- * HashAggregate (115) + : +- ReusedExchange (114) + :- * HashAggregate (123) + : +- Exchange (122) + : +- * HashAggregate (121) + : +- * HashAggregate (120) + : +- ReusedExchange (119) + +- * HashAggregate (128) + +- Exchange (127) + +- * HashAggregate (126) + +- * HashAggregate (125) + +- ReusedExchange (124) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] + +(9) Filter [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(10) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(12) Filter [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] + +(15) Filter [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(16) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(18) Filter [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#15] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 3] +Output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] + +(24) Filter [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(25) Project [codegen id : 2] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_year#22] + +(26) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(27) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 3] +Output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] + +(29) BroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=3] + +(30) BroadcastHashJoin [codegen id : 4] +Left keys [6]: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)] +Right keys [6]: [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)] +Join type: LeftSemi +Join condition: None + +(31) BroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#9] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(34) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#23] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#23] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] + +(37) HashAggregate [codegen id : 6] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(38) Exchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(39) HashAggregate [codegen id : 10] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(40) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] + +(42) Filter [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(43) ReusedExchange [Reuses operator id: 19] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_item_sk#27] +Right keys [1]: [i_item_sk#29] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 9] +Output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(46) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#33] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 9] +Output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] + +(49) BroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=6] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [6]: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)] +Right keys [6]: [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)] +Join type: LeftSemi +Join condition: None + +(51) BroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=7] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#24, class_id#25, category_id#26] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#34] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] + +(54) BroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(55) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(56) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(58) Filter [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : isnotnull(i_item_sk#35) + +(59) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(60) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#35] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(61) BroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(62) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#35] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 25] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(64) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#39, d_year#40, d_moy#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 24] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] + +(66) Filter [codegen id : 24] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] +Condition : ((((isnotnull(d_year#40) AND isnotnull(d_moy#41)) AND (d_year#40 = 2000)) AND (d_moy#41 = 11)) AND isnotnull(d_date_sk#39)) + +(67) Project [codegen id : 24] +Output [1]: [d_date_sk#39] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] + +(68) BroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#39] +Join type: Inner +Join condition: None + +(70) Project [codegen id : 25] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] + +(71) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] +Aggregate Attributes [3]: [sum#42, isEmpty#43, count#44] +Results [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] + +(72) Exchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 26] +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#48, count(1)#49] +Results [6]: [store AS channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#48 AS sales#51, count(1)#49 AS number_sales#52] + +(74) Filter [codegen id : 26] +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sales#51, number_sales#52] +Condition : (isnotnull(sales#51) AND (cast(sales#51 as decimal(32,6)) > cast(Subquery scalar-subquery#53, [id=#54] as decimal(32,6)))) + +(75) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#55, cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#58)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 51] +Input [4]: [cs_item_sk#55, cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58] + +(77) Filter [codegen id : 51] +Input [4]: [cs_item_sk#55, cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58] +Condition : isnotnull(cs_item_sk#55) + +(78) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(79) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#55] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(80) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] + +(81) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#55] +Right keys [1]: [i_item_sk#59] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 51] +Output [6]: [cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [8]: [cs_item_sk#55, cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58, i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] + +(83) ReusedExchange [Reuses operator id: 68] +Output [1]: [d_date_sk#63] + +(84) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_sold_date_sk#58] +Right keys [1]: [d_date_sk#63] +Join type: Inner +Join condition: None + +(85) Project [codegen id : 51] +Output [5]: [cs_quantity#56, cs_list_price#57, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [7]: [cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58, i_brand_id#60, i_class_id#61, i_category_id#62, d_date_sk#63] + +(86) HashAggregate [codegen id : 51] +Input [5]: [cs_quantity#56, cs_list_price#57, i_brand_id#60, i_class_id#61, i_category_id#62] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [partial_sum((cast(cs_quantity#56 as decimal(10,0)) * cs_list_price#57)), partial_count(1)] +Aggregate Attributes [3]: [sum#64, isEmpty#65, count#66] +Results [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] + +(87) Exchange +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] +Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(88) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [sum((cast(cs_quantity#56 as decimal(10,0)) * cs_list_price#57)), count(1)] +Aggregate Attributes [2]: [sum((cast(cs_quantity#56 as decimal(10,0)) * cs_list_price#57))#70, count(1)#71] +Results [6]: [catalog AS channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sum((cast(cs_quantity#56 as decimal(10,0)) * cs_list_price#57))#70 AS sales#73, count(1)#71 AS number_sales#74] + +(89) Filter [codegen id : 52] +Input [6]: [channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74] +Condition : (isnotnull(sales#73) AND (cast(sales#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#53, [id=#54] as decimal(32,6)))) + +(90) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#75, ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#78)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(91) ColumnarToRow [codegen id : 77] +Input [4]: [ws_item_sk#75, ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78] + +(92) Filter [codegen id : 77] +Input [4]: [ws_item_sk#75, ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78] +Condition : isnotnull(ws_item_sk#75) + +(93) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(94) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#75] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(95) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82] + +(96) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#75] +Right keys [1]: [i_item_sk#79] +Join type: Inner +Join condition: None + +(97) Project [codegen id : 77] +Output [6]: [ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78, i_brand_id#80, i_class_id#81, i_category_id#82] +Input [8]: [ws_item_sk#75, ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78, i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82] + +(98) ReusedExchange [Reuses operator id: 68] +Output [1]: [d_date_sk#83] + +(99) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_sold_date_sk#78] +Right keys [1]: [d_date_sk#83] +Join type: Inner +Join condition: None + +(100) Project [codegen id : 77] +Output [5]: [ws_quantity#76, ws_list_price#77, i_brand_id#80, i_class_id#81, i_category_id#82] +Input [7]: [ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78, i_brand_id#80, i_class_id#81, i_category_id#82, d_date_sk#83] + +(101) HashAggregate [codegen id : 77] +Input [5]: [ws_quantity#76, ws_list_price#77, i_brand_id#80, i_class_id#81, i_category_id#82] +Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82] +Functions [2]: [partial_sum((cast(ws_quantity#76 as decimal(10,0)) * ws_list_price#77)), partial_count(1)] +Aggregate Attributes [3]: [sum#84, isEmpty#85, count#86] +Results [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#87, isEmpty#88, count#89] + +(102) Exchange +Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#87, isEmpty#88, count#89] +Arguments: hashpartitioning(i_brand_id#80, i_class_id#81, i_category_id#82, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(103) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#87, isEmpty#88, count#89] +Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82] +Functions [2]: [sum((cast(ws_quantity#76 as decimal(10,0)) * ws_list_price#77)), count(1)] +Aggregate Attributes [2]: [sum((cast(ws_quantity#76 as decimal(10,0)) * ws_list_price#77))#90, count(1)#91] +Results [6]: [web AS channel#92, i_brand_id#80, i_class_id#81, i_category_id#82, sum((cast(ws_quantity#76 as decimal(10,0)) * ws_list_price#77))#90 AS sales#93, count(1)#91 AS number_sales#94] + +(104) Filter [codegen id : 78] +Input [6]: [channel#92, i_brand_id#80, i_class_id#81, i_category_id#82, sales#93, number_sales#94] +Condition : (isnotnull(sales#93) AND (cast(sales#93 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#53, [id=#54] as decimal(32,6)))) + +(105) Union + +(106) HashAggregate [codegen id : 79] +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sales#51, number_sales#52] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum(sales#51), partial_sum(number_sales#52)] +Aggregate Attributes [3]: [sum#95, isEmpty#96, sum#97] +Results [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] + +(107) Exchange +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Arguments: hashpartitioning(channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(108) HashAggregate [codegen id : 80] +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#51), sum(number_sales#52)] +Aggregate Attributes [2]: [sum(sales#51)#101, sum(number_sales#52)#102] +Results [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum(sales#51)#101 AS sum_sales#103, sum(number_sales#52)#102 AS number_sales#104] + +(109) ReusedExchange [Reuses operator id: 107] +Output [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] + +(110) HashAggregate [codegen id : 160] +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#51), sum(number_sales#52)] +Aggregate Attributes [2]: [sum(sales#51)#101, sum(number_sales#52)#102] +Results [5]: [channel#50, i_brand_id#36, i_class_id#37, sum(sales#51)#101 AS sum_sales#103, sum(number_sales#52)#102 AS number_sales#104] + +(111) HashAggregate [codegen id : 160] +Input [5]: [channel#50, i_brand_id#36, i_class_id#37, sum_sales#103, number_sales#104] +Keys [3]: [channel#50, i_brand_id#36, i_class_id#37] +Functions [2]: [partial_sum(sum_sales#103), partial_sum(number_sales#104)] +Aggregate Attributes [3]: [sum#105, isEmpty#106, sum#107] +Results [6]: [channel#50, i_brand_id#36, i_class_id#37, sum#108, isEmpty#109, sum#110] + +(112) Exchange +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, sum#108, isEmpty#109, sum#110] +Arguments: hashpartitioning(channel#50, i_brand_id#36, i_class_id#37, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(113) HashAggregate [codegen id : 161] +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, sum#108, isEmpty#109, sum#110] +Keys [3]: [channel#50, i_brand_id#36, i_class_id#37] +Functions [2]: [sum(sum_sales#103), sum(number_sales#104)] +Aggregate Attributes [2]: [sum(sum_sales#103)#111, sum(number_sales#104)#112] +Results [6]: [channel#50, i_brand_id#36, i_class_id#37, null AS i_category_id#113, sum(sum_sales#103)#111 AS sum(sum_sales)#114, sum(number_sales#104)#112 AS sum(number_sales)#115] + +(114) ReusedExchange [Reuses operator id: 107] +Output [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] + +(115) HashAggregate [codegen id : 241] +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#51), sum(number_sales#52)] +Aggregate Attributes [2]: [sum(sales#51)#101, sum(number_sales#52)#102] +Results [4]: [channel#50, i_brand_id#36, sum(sales#51)#101 AS sum_sales#103, sum(number_sales#52)#102 AS number_sales#104] + +(116) HashAggregate [codegen id : 241] +Input [4]: [channel#50, i_brand_id#36, sum_sales#103, number_sales#104] +Keys [2]: [channel#50, i_brand_id#36] +Functions [2]: [partial_sum(sum_sales#103), partial_sum(number_sales#104)] +Aggregate Attributes [3]: [sum#116, isEmpty#117, sum#118] +Results [5]: [channel#50, i_brand_id#36, sum#119, isEmpty#120, sum#121] + +(117) Exchange +Input [5]: [channel#50, i_brand_id#36, sum#119, isEmpty#120, sum#121] +Arguments: hashpartitioning(channel#50, i_brand_id#36, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(118) HashAggregate [codegen id : 242] +Input [5]: [channel#50, i_brand_id#36, sum#119, isEmpty#120, sum#121] +Keys [2]: [channel#50, i_brand_id#36] +Functions [2]: [sum(sum_sales#103), sum(number_sales#104)] +Aggregate Attributes [2]: [sum(sum_sales#103)#122, sum(number_sales#104)#123] +Results [6]: [channel#50, i_brand_id#36, null AS i_class_id#124, null AS i_category_id#125, sum(sum_sales#103)#122 AS sum(sum_sales)#126, sum(number_sales#104)#123 AS sum(number_sales)#127] + +(119) ReusedExchange [Reuses operator id: 107] +Output [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] + +(120) HashAggregate [codegen id : 322] +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#51), sum(number_sales#52)] +Aggregate Attributes [2]: [sum(sales#51)#101, sum(number_sales#52)#102] +Results [3]: [channel#50, sum(sales#51)#101 AS sum_sales#103, sum(number_sales#52)#102 AS number_sales#104] + +(121) HashAggregate [codegen id : 322] +Input [3]: [channel#50, sum_sales#103, number_sales#104] +Keys [1]: [channel#50] +Functions [2]: [partial_sum(sum_sales#103), partial_sum(number_sales#104)] +Aggregate Attributes [3]: [sum#128, isEmpty#129, sum#130] +Results [4]: [channel#50, sum#131, isEmpty#132, sum#133] + +(122) Exchange +Input [4]: [channel#50, sum#131, isEmpty#132, sum#133] +Arguments: hashpartitioning(channel#50, 5), ENSURE_REQUIREMENTS, [plan_id=17] + +(123) HashAggregate [codegen id : 323] +Input [4]: [channel#50, sum#131, isEmpty#132, sum#133] +Keys [1]: [channel#50] +Functions [2]: [sum(sum_sales#103), sum(number_sales#104)] +Aggregate Attributes [2]: [sum(sum_sales#103)#134, sum(number_sales#104)#135] +Results [6]: [channel#50, null AS i_brand_id#136, null AS i_class_id#137, null AS i_category_id#138, sum(sum_sales#103)#134 AS sum(sum_sales)#139, sum(number_sales#104)#135 AS sum(number_sales)#140] + +(124) ReusedExchange [Reuses operator id: 107] +Output [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] + +(125) HashAggregate [codegen id : 403] +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#51), sum(number_sales#52)] +Aggregate Attributes [2]: [sum(sales#51)#101, sum(number_sales#52)#102] +Results [2]: [sum(sales#51)#101 AS sum_sales#103, sum(number_sales#52)#102 AS number_sales#104] + +(126) HashAggregate [codegen id : 403] +Input [2]: [sum_sales#103, number_sales#104] +Keys: [] +Functions [2]: [partial_sum(sum_sales#103), partial_sum(number_sales#104)] +Aggregate Attributes [3]: [sum#141, isEmpty#142, sum#143] +Results [3]: [sum#144, isEmpty#145, sum#146] + +(127) Exchange +Input [3]: [sum#144, isEmpty#145, sum#146] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] + +(128) HashAggregate [codegen id : 404] +Input [3]: [sum#144, isEmpty#145, sum#146] +Keys: [] +Functions [2]: [sum(sum_sales#103), sum(number_sales#104)] +Aggregate Attributes [2]: [sum(sum_sales#103)#147, sum(number_sales#104)#148] +Results [6]: [null AS channel#149, null AS i_brand_id#150, null AS i_class_id#151, null AS i_category_id#152, sum(sum_sales#103)#147 AS sum(sum_sales)#153, sum(number_sales#104)#148 AS sum(number_sales)#154] + +(129) Union + +(130) HashAggregate [codegen id : 405] +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Keys [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] + +(131) Exchange +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Arguments: hashpartitioning(channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104, 5), ENSURE_REQUIREMENTS, [plan_id=19] + +(132) HashAggregate [codegen id : 406] +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Keys [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] + +(133) TakeOrderedAndProject +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Arguments: 100, [channel#50 ASC NULLS FIRST, i_brand_id#36 ASC NULLS FIRST, i_class_id#37 ASC NULLS FIRST, i_category_id#38 ASC NULLS FIRST], [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 74 Hosting Expression = Subquery scalar-subquery#53, [id=#54] +* HashAggregate (156) ++- Exchange (155) + +- * HashAggregate (154) + +- Union (153) + :- * Project (138) + : +- * BroadcastHashJoin Inner BuildRight (137) + : :- * ColumnarToRow (135) + : : +- Scan parquet spark_catalog.default.store_sales (134) + : +- ReusedExchange (136) + :- * Project (147) + : +- * BroadcastHashJoin Inner BuildRight (146) + : :- * ColumnarToRow (140) + : : +- Scan parquet spark_catalog.default.catalog_sales (139) + : +- BroadcastExchange (145) + : +- * Project (144) + : +- * Filter (143) + : +- * ColumnarToRow (142) + : +- Scan parquet spark_catalog.default.date_dim (141) + +- * Project (152) + +- * BroadcastHashJoin Inner BuildRight (151) + :- * ColumnarToRow (149) + : +- Scan parquet spark_catalog.default.web_sales (148) + +- ReusedExchange (150) + + +(134) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#155, ss_list_price#156, ss_sold_date_sk#157] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#157)] +ReadSchema: struct + +(135) ColumnarToRow [codegen id : 2] +Input [3]: [ss_quantity#155, ss_list_price#156, ss_sold_date_sk#157] + +(136) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#158] + +(137) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#157] +Right keys [1]: [d_date_sk#158] +Join type: Inner +Join condition: None + +(138) Project [codegen id : 2] +Output [2]: [ss_quantity#155 AS quantity#159, ss_list_price#156 AS list_price#160] +Input [4]: [ss_quantity#155, ss_list_price#156, ss_sold_date_sk#157, d_date_sk#158] + +(139) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#161, cs_list_price#162, cs_sold_date_sk#163] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#163)] +ReadSchema: struct + +(140) ColumnarToRow [codegen id : 4] +Input [3]: [cs_quantity#161, cs_list_price#162, cs_sold_date_sk#163] + +(141) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#164, d_year#165] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(142) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#164, d_year#165] + +(143) Filter [codegen id : 3] +Input [2]: [d_date_sk#164, d_year#165] +Condition : (((isnotnull(d_year#165) AND (d_year#165 >= 1998)) AND (d_year#165 <= 2000)) AND isnotnull(d_date_sk#164)) + +(144) Project [codegen id : 3] +Output [1]: [d_date_sk#164] +Input [2]: [d_date_sk#164, d_year#165] + +(145) BroadcastExchange +Input [1]: [d_date_sk#164] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] + +(146) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#163] +Right keys [1]: [d_date_sk#164] +Join type: Inner +Join condition: None + +(147) Project [codegen id : 4] +Output [2]: [cs_quantity#161 AS quantity#166, cs_list_price#162 AS list_price#167] +Input [4]: [cs_quantity#161, cs_list_price#162, cs_sold_date_sk#163, d_date_sk#164] + +(148) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#168, ws_list_price#169, ws_sold_date_sk#170] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#170)] +ReadSchema: struct + +(149) ColumnarToRow [codegen id : 6] +Input [3]: [ws_quantity#168, ws_list_price#169, ws_sold_date_sk#170] + +(150) ReusedExchange [Reuses operator id: 145] +Output [1]: [d_date_sk#171] + +(151) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#170] +Right keys [1]: [d_date_sk#171] +Join type: Inner +Join condition: None + +(152) Project [codegen id : 6] +Output [2]: [ws_quantity#168 AS quantity#172, ws_list_price#169 AS list_price#173] +Input [4]: [ws_quantity#168, ws_list_price#169, ws_sold_date_sk#170, d_date_sk#171] + +(153) Union + +(154) HashAggregate [codegen id : 7] +Input [2]: [quantity#159, list_price#160] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#159 as decimal(10,0)) * list_price#160))] +Aggregate Attributes [2]: [sum#174, count#175] +Results [2]: [sum#176, count#177] + +(155) Exchange +Input [2]: [sum#176, count#177] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=21] + +(156) HashAggregate [codegen id : 8] +Input [2]: [sum#176, count#177] +Keys: [] +Functions [1]: [avg((cast(quantity#159 as decimal(10,0)) * list_price#160))] +Aggregate Attributes [1]: [avg((cast(quantity#159 as decimal(10,0)) * list_price#160))#178] +Results [1]: [avg((cast(quantity#159 as decimal(10,0)) * list_price#160))#178 AS average_sales#179] + +Subquery:2 Hosting operator id = 89 Hosting Expression = ReusedSubquery Subquery scalar-subquery#53, [id=#54] + +Subquery:3 Hosting operator id = 104 Hosting Expression = ReusedSubquery Subquery scalar-subquery#53, [id=#54] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..be14bd5141 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_datafusion/simplified.txt @@ -0,0 +1,249 @@ +TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + WholeStageCodegen (406) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #1 + WholeStageCodegen (405) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + InputAdapter + Union + WholeStageCodegen (80) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id] #2 + WholeStageCodegen (79) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + InputAdapter + Union + WholeStageCodegen (26) + Filter [sales] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count] + InputAdapter + Exchange #14 + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #10 + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #15 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #3 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (10) + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #6 + WholeStageCodegen (6) + HashAggregate [brand_id,class_id,category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (3) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk] #10 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (9) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + InputAdapter + ReusedExchange [d_date_sk] #10 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (52) + Filter [sales] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #16 + WholeStageCodegen (51) + HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_list_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12 + InputAdapter + ReusedExchange [d_date_sk] #13 + WholeStageCodegen (78) + Filter [sales] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #17 + WholeStageCodegen (77) + HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,ss_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12 + InputAdapter + ReusedExchange [d_date_sk] #13 + WholeStageCodegen (161) + HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id] #18 + WholeStageCodegen (160) + HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + WholeStageCodegen (242) + HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id] #19 + WholeStageCodegen (241) + HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + WholeStageCodegen (323) + HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel] #20 + WholeStageCodegen (322) + HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + WholeStageCodegen (404) + HashAggregate [sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange #21 + WholeStageCodegen (403) + HashAggregate [sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..5f5d49ab21 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_iceberg_compat/explain.txt @@ -0,0 +1,931 @@ +== Physical Plan == +TakeOrderedAndProject (133) ++- * HashAggregate (132) + +- Exchange (131) + +- * HashAggregate (130) + +- Union (129) + :- * HashAggregate (108) + : +- Exchange (107) + : +- * HashAggregate (106) + : +- Union (105) + : :- * Filter (74) + : : +- * HashAggregate (73) + : : +- Exchange (72) + : : +- * HashAggregate (71) + : : +- * Project (70) + : : +- * BroadcastHashJoin Inner BuildRight (69) + : : :- * Project (63) + : : : +- * BroadcastHashJoin Inner BuildRight (62) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (55) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- BroadcastExchange (54) + : : : : +- * Project (53) + : : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.item (4) + : : : : +- BroadcastExchange (51) + : : : : +- * BroadcastHashJoin LeftSemi BuildRight (50) + : : : : :- * HashAggregate (39) + : : : : : +- Exchange (38) + : : : : : +- * HashAggregate (37) + : : : : : +- * Project (36) + : : : : : +- * BroadcastHashJoin Inner BuildRight (35) + : : : : : :- * Project (33) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : : : : :- * Filter (9) + : : : : : : : +- * ColumnarToRow (8) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (7) + : : : : : : +- BroadcastExchange (31) + : : : : : : +- * BroadcastHashJoin LeftSemi BuildRight (30) + : : : : : : :- * Filter (12) + : : : : : : : +- * ColumnarToRow (11) + : : : : : : : +- Scan parquet spark_catalog.default.item (10) + : : : : : : +- BroadcastExchange (29) + : : : : : : +- * Project (28) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : : :- * Project (21) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : : :- * Filter (15) + : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : : +- BroadcastExchange (19) + : : : : : : : +- * Filter (18) + : : : : : : : +- * ColumnarToRow (17) + : : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : : +- BroadcastExchange (26) + : : : : : : +- * Project (25) + : : : : : : +- * Filter (24) + : : : : : : +- * ColumnarToRow (23) + : : : : : : +- Scan parquet spark_catalog.default.date_dim (22) + : : : : : +- ReusedExchange (34) + : : : : +- BroadcastExchange (49) + : : : : +- * Project (48) + : : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : : :- * Project (45) + : : : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : : : :- * Filter (42) + : : : : : : +- * ColumnarToRow (41) + : : : : : : +- Scan parquet spark_catalog.default.web_sales (40) + : : : : : +- ReusedExchange (43) + : : : : +- ReusedExchange (46) + : : : +- BroadcastExchange (61) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (60) + : : : :- * Filter (58) + : : : : +- * ColumnarToRow (57) + : : : : +- Scan parquet spark_catalog.default.item (56) + : : : +- ReusedExchange (59) + : : +- BroadcastExchange (68) + : : +- * Project (67) + : : +- * Filter (66) + : : +- * ColumnarToRow (65) + : : +- Scan parquet spark_catalog.default.date_dim (64) + : :- * Filter (89) + : : +- * HashAggregate (88) + : : +- Exchange (87) + : : +- * HashAggregate (86) + : : +- * Project (85) + : : +- * BroadcastHashJoin Inner BuildRight (84) + : : :- * Project (82) + : : : +- * BroadcastHashJoin Inner BuildRight (81) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (79) + : : : : :- * Filter (77) + : : : : : +- * ColumnarToRow (76) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (75) + : : : : +- ReusedExchange (78) + : : : +- ReusedExchange (80) + : : +- ReusedExchange (83) + : +- * Filter (104) + : +- * HashAggregate (103) + : +- Exchange (102) + : +- * HashAggregate (101) + : +- * Project (100) + : +- * BroadcastHashJoin Inner BuildRight (99) + : :- * Project (97) + : : +- * BroadcastHashJoin Inner BuildRight (96) + : : :- * BroadcastHashJoin LeftSemi BuildRight (94) + : : : :- * Filter (92) + : : : : +- * ColumnarToRow (91) + : : : : +- Scan parquet spark_catalog.default.web_sales (90) + : : : +- ReusedExchange (93) + : : +- ReusedExchange (95) + : +- ReusedExchange (98) + :- * HashAggregate (113) + : +- Exchange (112) + : +- * HashAggregate (111) + : +- * HashAggregate (110) + : +- ReusedExchange (109) + :- * HashAggregate (118) + : +- Exchange (117) + : +- * HashAggregate (116) + : +- * HashAggregate (115) + : +- ReusedExchange (114) + :- * HashAggregate (123) + : +- Exchange (122) + : +- * HashAggregate (121) + : +- * HashAggregate (120) + : +- ReusedExchange (119) + +- * HashAggregate (128) + +- Exchange (127) + +- * HashAggregate (126) + +- * HashAggregate (125) + +- ReusedExchange (124) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#10)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] + +(9) Filter [codegen id : 6] +Input [2]: [ss_item_sk#9, ss_sold_date_sk#10] +Condition : isnotnull(ss_item_sk#9) + +(10) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(12) Filter [codegen id : 4] +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Condition : (((isnotnull(i_item_sk#11) AND isnotnull(i_brand_id#12)) AND isnotnull(i_class_id#13)) AND isnotnull(i_category_id#14)) + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#16)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] + +(15) Filter [codegen id : 3] +Input [2]: [cs_item_sk#15, cs_sold_date_sk#16] +Condition : isnotnull(cs_item_sk#15) + +(16) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(18) Filter [codegen id : 1] +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [4]: [i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#15] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 3] +Output [4]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20] +Input [6]: [cs_item_sk#15, cs_sold_date_sk#16, i_item_sk#17, i_brand_id#18, i_class_id#19, i_category_id#20] + +(22) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_year#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] + +(24) Filter [codegen id : 2] +Input [2]: [d_date_sk#21, d_year#22] +Condition : (((isnotnull(d_year#22) AND (d_year#22 >= 1999)) AND (d_year#22 <= 2001)) AND isnotnull(d_date_sk#21)) + +(25) Project [codegen id : 2] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_year#22] + +(26) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(27) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 3] +Output [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Input [5]: [cs_sold_date_sk#16, i_brand_id#18, i_class_id#19, i_category_id#20, d_date_sk#21] + +(29) BroadcastExchange +Input [3]: [i_brand_id#18, i_class_id#19, i_category_id#20] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=3] + +(30) BroadcastHashJoin [codegen id : 4] +Left keys [6]: [coalesce(i_brand_id#12, 0), isnull(i_brand_id#12), coalesce(i_class_id#13, 0), isnull(i_class_id#13), coalesce(i_category_id#14, 0), isnull(i_category_id#14)] +Right keys [6]: [coalesce(i_brand_id#18, 0), isnull(i_brand_id#18), coalesce(i_class_id#19, 0), isnull(i_class_id#19), coalesce(i_category_id#20, 0), isnull(i_category_id#20)] +Join type: LeftSemi +Join condition: None + +(31) BroadcastExchange +Input [4]: [i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#9] +Right keys [1]: [i_item_sk#11] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 6] +Output [4]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14] +Input [6]: [ss_item_sk#9, ss_sold_date_sk#10, i_item_sk#11, i_brand_id#12, i_class_id#13, i_category_id#14] + +(34) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#23] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#23] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [i_brand_id#12 AS brand_id#24, i_class_id#13 AS class_id#25, i_category_id#14 AS category_id#26] +Input [5]: [ss_sold_date_sk#10, i_brand_id#12, i_class_id#13, i_category_id#14, d_date_sk#23] + +(37) HashAggregate [codegen id : 6] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(38) Exchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: hashpartitioning(brand_id#24, class_id#25, category_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(39) HashAggregate [codegen id : 10] +Input [3]: [brand_id#24, class_id#25, category_id#26] +Keys [3]: [brand_id#24, class_id#25, category_id#26] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#24, class_id#25, category_id#26] + +(40) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#28)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] + +(42) Filter [codegen id : 9] +Input [2]: [ws_item_sk#27, ws_sold_date_sk#28] +Condition : isnotnull(ws_item_sk#27) + +(43) ReusedExchange [Reuses operator id: 19] +Output [4]: [i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_item_sk#27] +Right keys [1]: [i_item_sk#29] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 9] +Output [4]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32] +Input [6]: [ws_item_sk#27, ws_sold_date_sk#28, i_item_sk#29, i_brand_id#30, i_class_id#31, i_category_id#32] + +(46) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#33] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#33] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 9] +Output [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Input [5]: [ws_sold_date_sk#28, i_brand_id#30, i_class_id#31, i_category_id#32, d_date_sk#33] + +(49) BroadcastExchange +Input [3]: [i_brand_id#30, i_class_id#31, i_category_id#32] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=6] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [6]: [coalesce(brand_id#24, 0), isnull(brand_id#24), coalesce(class_id#25, 0), isnull(class_id#25), coalesce(category_id#26, 0), isnull(category_id#26)] +Right keys [6]: [coalesce(i_brand_id#30, 0), isnull(i_brand_id#30), coalesce(i_class_id#31, 0), isnull(i_class_id#31), coalesce(i_category_id#32, 0), isnull(i_category_id#32)] +Join type: LeftSemi +Join condition: None + +(51) BroadcastExchange +Input [3]: [brand_id#24, class_id#25, category_id#26] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=7] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#24, class_id#25, category_id#26] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#34] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#24, class_id#25, category_id#26] + +(54) BroadcastExchange +Input [1]: [ss_item_sk#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] + +(55) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(56) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(58) Filter [codegen id : 23] +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Condition : isnotnull(i_item_sk#35) + +(59) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(60) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#35] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(61) BroadcastExchange +Input [4]: [i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(62) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#35] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 25] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#35, i_brand_id#36, i_class_id#37, i_category_id#38] + +(64) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#39, d_year#40, d_moy#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 24] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] + +(66) Filter [codegen id : 24] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] +Condition : ((((isnotnull(d_year#40) AND isnotnull(d_moy#41)) AND (d_year#40 = 2000)) AND (d_moy#41 = 11)) AND isnotnull(d_date_sk#39)) + +(67) Project [codegen id : 24] +Output [1]: [d_date_sk#39] +Input [3]: [d_date_sk#39, d_year#40, d_moy#41] + +(68) BroadcastExchange +Input [1]: [d_date_sk#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#39] +Join type: Inner +Join condition: None + +(70) Project [codegen id : 25] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#36, i_class_id#37, i_category_id#38, d_date_sk#39] + +(71) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#36, i_class_id#37, i_category_id#38] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)] +Aggregate Attributes [3]: [sum#42, isEmpty#43, count#44] +Results [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] + +(72) Exchange +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] +Arguments: hashpartitioning(i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(73) HashAggregate [codegen id : 26] +Input [6]: [i_brand_id#36, i_class_id#37, i_category_id#38, sum#45, isEmpty#46, count#47] +Keys [3]: [i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)] +Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#48, count(1)#49] +Results [6]: [store AS channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#48 AS sales#51, count(1)#49 AS number_sales#52] + +(74) Filter [codegen id : 26] +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sales#51, number_sales#52] +Condition : (isnotnull(sales#51) AND (cast(sales#51 as decimal(32,6)) > cast(Subquery scalar-subquery#53, [id=#54] as decimal(32,6)))) + +(75) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#55, cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#58)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 51] +Input [4]: [cs_item_sk#55, cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58] + +(77) Filter [codegen id : 51] +Input [4]: [cs_item_sk#55, cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58] +Condition : isnotnull(cs_item_sk#55) + +(78) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(79) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#55] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(80) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] + +(81) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#55] +Right keys [1]: [i_item_sk#59] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 51] +Output [6]: [cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [8]: [cs_item_sk#55, cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58, i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] + +(83) ReusedExchange [Reuses operator id: 68] +Output [1]: [d_date_sk#63] + +(84) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_sold_date_sk#58] +Right keys [1]: [d_date_sk#63] +Join type: Inner +Join condition: None + +(85) Project [codegen id : 51] +Output [5]: [cs_quantity#56, cs_list_price#57, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [7]: [cs_quantity#56, cs_list_price#57, cs_sold_date_sk#58, i_brand_id#60, i_class_id#61, i_category_id#62, d_date_sk#63] + +(86) HashAggregate [codegen id : 51] +Input [5]: [cs_quantity#56, cs_list_price#57, i_brand_id#60, i_class_id#61, i_category_id#62] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [partial_sum((cast(cs_quantity#56 as decimal(10,0)) * cs_list_price#57)), partial_count(1)] +Aggregate Attributes [3]: [sum#64, isEmpty#65, count#66] +Results [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] + +(87) Exchange +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] +Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(88) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [sum((cast(cs_quantity#56 as decimal(10,0)) * cs_list_price#57)), count(1)] +Aggregate Attributes [2]: [sum((cast(cs_quantity#56 as decimal(10,0)) * cs_list_price#57))#70, count(1)#71] +Results [6]: [catalog AS channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sum((cast(cs_quantity#56 as decimal(10,0)) * cs_list_price#57))#70 AS sales#73, count(1)#71 AS number_sales#74] + +(89) Filter [codegen id : 52] +Input [6]: [channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74] +Condition : (isnotnull(sales#73) AND (cast(sales#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#53, [id=#54] as decimal(32,6)))) + +(90) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#75, ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#78)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(91) ColumnarToRow [codegen id : 77] +Input [4]: [ws_item_sk#75, ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78] + +(92) Filter [codegen id : 77] +Input [4]: [ws_item_sk#75, ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78] +Condition : isnotnull(ws_item_sk#75) + +(93) ReusedExchange [Reuses operator id: 54] +Output [1]: [ss_item_sk#34] + +(94) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#75] +Right keys [1]: [ss_item_sk#34] +Join type: LeftSemi +Join condition: None + +(95) ReusedExchange [Reuses operator id: 61] +Output [4]: [i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82] + +(96) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#75] +Right keys [1]: [i_item_sk#79] +Join type: Inner +Join condition: None + +(97) Project [codegen id : 77] +Output [6]: [ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78, i_brand_id#80, i_class_id#81, i_category_id#82] +Input [8]: [ws_item_sk#75, ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78, i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82] + +(98) ReusedExchange [Reuses operator id: 68] +Output [1]: [d_date_sk#83] + +(99) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_sold_date_sk#78] +Right keys [1]: [d_date_sk#83] +Join type: Inner +Join condition: None + +(100) Project [codegen id : 77] +Output [5]: [ws_quantity#76, ws_list_price#77, i_brand_id#80, i_class_id#81, i_category_id#82] +Input [7]: [ws_quantity#76, ws_list_price#77, ws_sold_date_sk#78, i_brand_id#80, i_class_id#81, i_category_id#82, d_date_sk#83] + +(101) HashAggregate [codegen id : 77] +Input [5]: [ws_quantity#76, ws_list_price#77, i_brand_id#80, i_class_id#81, i_category_id#82] +Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82] +Functions [2]: [partial_sum((cast(ws_quantity#76 as decimal(10,0)) * ws_list_price#77)), partial_count(1)] +Aggregate Attributes [3]: [sum#84, isEmpty#85, count#86] +Results [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#87, isEmpty#88, count#89] + +(102) Exchange +Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#87, isEmpty#88, count#89] +Arguments: hashpartitioning(i_brand_id#80, i_class_id#81, i_category_id#82, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(103) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#87, isEmpty#88, count#89] +Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82] +Functions [2]: [sum((cast(ws_quantity#76 as decimal(10,0)) * ws_list_price#77)), count(1)] +Aggregate Attributes [2]: [sum((cast(ws_quantity#76 as decimal(10,0)) * ws_list_price#77))#90, count(1)#91] +Results [6]: [web AS channel#92, i_brand_id#80, i_class_id#81, i_category_id#82, sum((cast(ws_quantity#76 as decimal(10,0)) * ws_list_price#77))#90 AS sales#93, count(1)#91 AS number_sales#94] + +(104) Filter [codegen id : 78] +Input [6]: [channel#92, i_brand_id#80, i_class_id#81, i_category_id#82, sales#93, number_sales#94] +Condition : (isnotnull(sales#93) AND (cast(sales#93 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#53, [id=#54] as decimal(32,6)))) + +(105) Union + +(106) HashAggregate [codegen id : 79] +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sales#51, number_sales#52] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [partial_sum(sales#51), partial_sum(number_sales#52)] +Aggregate Attributes [3]: [sum#95, isEmpty#96, sum#97] +Results [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] + +(107) Exchange +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Arguments: hashpartitioning(channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(108) HashAggregate [codegen id : 80] +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#51), sum(number_sales#52)] +Aggregate Attributes [2]: [sum(sales#51)#101, sum(number_sales#52)#102] +Results [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum(sales#51)#101 AS sum_sales#103, sum(number_sales#52)#102 AS number_sales#104] + +(109) ReusedExchange [Reuses operator id: 107] +Output [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] + +(110) HashAggregate [codegen id : 160] +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#51), sum(number_sales#52)] +Aggregate Attributes [2]: [sum(sales#51)#101, sum(number_sales#52)#102] +Results [5]: [channel#50, i_brand_id#36, i_class_id#37, sum(sales#51)#101 AS sum_sales#103, sum(number_sales#52)#102 AS number_sales#104] + +(111) HashAggregate [codegen id : 160] +Input [5]: [channel#50, i_brand_id#36, i_class_id#37, sum_sales#103, number_sales#104] +Keys [3]: [channel#50, i_brand_id#36, i_class_id#37] +Functions [2]: [partial_sum(sum_sales#103), partial_sum(number_sales#104)] +Aggregate Attributes [3]: [sum#105, isEmpty#106, sum#107] +Results [6]: [channel#50, i_brand_id#36, i_class_id#37, sum#108, isEmpty#109, sum#110] + +(112) Exchange +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, sum#108, isEmpty#109, sum#110] +Arguments: hashpartitioning(channel#50, i_brand_id#36, i_class_id#37, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(113) HashAggregate [codegen id : 161] +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, sum#108, isEmpty#109, sum#110] +Keys [3]: [channel#50, i_brand_id#36, i_class_id#37] +Functions [2]: [sum(sum_sales#103), sum(number_sales#104)] +Aggregate Attributes [2]: [sum(sum_sales#103)#111, sum(number_sales#104)#112] +Results [6]: [channel#50, i_brand_id#36, i_class_id#37, null AS i_category_id#113, sum(sum_sales#103)#111 AS sum(sum_sales)#114, sum(number_sales#104)#112 AS sum(number_sales)#115] + +(114) ReusedExchange [Reuses operator id: 107] +Output [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] + +(115) HashAggregate [codegen id : 241] +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#51), sum(number_sales#52)] +Aggregate Attributes [2]: [sum(sales#51)#101, sum(number_sales#52)#102] +Results [4]: [channel#50, i_brand_id#36, sum(sales#51)#101 AS sum_sales#103, sum(number_sales#52)#102 AS number_sales#104] + +(116) HashAggregate [codegen id : 241] +Input [4]: [channel#50, i_brand_id#36, sum_sales#103, number_sales#104] +Keys [2]: [channel#50, i_brand_id#36] +Functions [2]: [partial_sum(sum_sales#103), partial_sum(number_sales#104)] +Aggregate Attributes [3]: [sum#116, isEmpty#117, sum#118] +Results [5]: [channel#50, i_brand_id#36, sum#119, isEmpty#120, sum#121] + +(117) Exchange +Input [5]: [channel#50, i_brand_id#36, sum#119, isEmpty#120, sum#121] +Arguments: hashpartitioning(channel#50, i_brand_id#36, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(118) HashAggregate [codegen id : 242] +Input [5]: [channel#50, i_brand_id#36, sum#119, isEmpty#120, sum#121] +Keys [2]: [channel#50, i_brand_id#36] +Functions [2]: [sum(sum_sales#103), sum(number_sales#104)] +Aggregate Attributes [2]: [sum(sum_sales#103)#122, sum(number_sales#104)#123] +Results [6]: [channel#50, i_brand_id#36, null AS i_class_id#124, null AS i_category_id#125, sum(sum_sales#103)#122 AS sum(sum_sales)#126, sum(number_sales#104)#123 AS sum(number_sales)#127] + +(119) ReusedExchange [Reuses operator id: 107] +Output [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] + +(120) HashAggregate [codegen id : 322] +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#51), sum(number_sales#52)] +Aggregate Attributes [2]: [sum(sales#51)#101, sum(number_sales#52)#102] +Results [3]: [channel#50, sum(sales#51)#101 AS sum_sales#103, sum(number_sales#52)#102 AS number_sales#104] + +(121) HashAggregate [codegen id : 322] +Input [3]: [channel#50, sum_sales#103, number_sales#104] +Keys [1]: [channel#50] +Functions [2]: [partial_sum(sum_sales#103), partial_sum(number_sales#104)] +Aggregate Attributes [3]: [sum#128, isEmpty#129, sum#130] +Results [4]: [channel#50, sum#131, isEmpty#132, sum#133] + +(122) Exchange +Input [4]: [channel#50, sum#131, isEmpty#132, sum#133] +Arguments: hashpartitioning(channel#50, 5), ENSURE_REQUIREMENTS, [plan_id=17] + +(123) HashAggregate [codegen id : 323] +Input [4]: [channel#50, sum#131, isEmpty#132, sum#133] +Keys [1]: [channel#50] +Functions [2]: [sum(sum_sales#103), sum(number_sales#104)] +Aggregate Attributes [2]: [sum(sum_sales#103)#134, sum(number_sales#104)#135] +Results [6]: [channel#50, null AS i_brand_id#136, null AS i_class_id#137, null AS i_category_id#138, sum(sum_sales#103)#134 AS sum(sum_sales)#139, sum(number_sales#104)#135 AS sum(number_sales)#140] + +(124) ReusedExchange [Reuses operator id: 107] +Output [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] + +(125) HashAggregate [codegen id : 403] +Input [7]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum#98, isEmpty#99, sum#100] +Keys [4]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38] +Functions [2]: [sum(sales#51), sum(number_sales#52)] +Aggregate Attributes [2]: [sum(sales#51)#101, sum(number_sales#52)#102] +Results [2]: [sum(sales#51)#101 AS sum_sales#103, sum(number_sales#52)#102 AS number_sales#104] + +(126) HashAggregate [codegen id : 403] +Input [2]: [sum_sales#103, number_sales#104] +Keys: [] +Functions [2]: [partial_sum(sum_sales#103), partial_sum(number_sales#104)] +Aggregate Attributes [3]: [sum#141, isEmpty#142, sum#143] +Results [3]: [sum#144, isEmpty#145, sum#146] + +(127) Exchange +Input [3]: [sum#144, isEmpty#145, sum#146] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] + +(128) HashAggregate [codegen id : 404] +Input [3]: [sum#144, isEmpty#145, sum#146] +Keys: [] +Functions [2]: [sum(sum_sales#103), sum(number_sales#104)] +Aggregate Attributes [2]: [sum(sum_sales#103)#147, sum(number_sales#104)#148] +Results [6]: [null AS channel#149, null AS i_brand_id#150, null AS i_class_id#151, null AS i_category_id#152, sum(sum_sales#103)#147 AS sum(sum_sales)#153, sum(number_sales#104)#148 AS sum(number_sales)#154] + +(129) Union + +(130) HashAggregate [codegen id : 405] +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Keys [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] + +(131) Exchange +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Arguments: hashpartitioning(channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104, 5), ENSURE_REQUIREMENTS, [plan_id=19] + +(132) HashAggregate [codegen id : 406] +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Keys [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Functions: [] +Aggregate Attributes: [] +Results [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] + +(133) TakeOrderedAndProject +Input [6]: [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] +Arguments: 100, [channel#50 ASC NULLS FIRST, i_brand_id#36 ASC NULLS FIRST, i_class_id#37 ASC NULLS FIRST, i_category_id#38 ASC NULLS FIRST], [channel#50, i_brand_id#36, i_class_id#37, i_category_id#38, sum_sales#103, number_sales#104] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 74 Hosting Expression = Subquery scalar-subquery#53, [id=#54] +* HashAggregate (156) ++- Exchange (155) + +- * HashAggregate (154) + +- Union (153) + :- * Project (138) + : +- * BroadcastHashJoin Inner BuildRight (137) + : :- * ColumnarToRow (135) + : : +- Scan parquet spark_catalog.default.store_sales (134) + : +- ReusedExchange (136) + :- * Project (147) + : +- * BroadcastHashJoin Inner BuildRight (146) + : :- * ColumnarToRow (140) + : : +- Scan parquet spark_catalog.default.catalog_sales (139) + : +- BroadcastExchange (145) + : +- * Project (144) + : +- * Filter (143) + : +- * ColumnarToRow (142) + : +- Scan parquet spark_catalog.default.date_dim (141) + +- * Project (152) + +- * BroadcastHashJoin Inner BuildRight (151) + :- * ColumnarToRow (149) + : +- Scan parquet spark_catalog.default.web_sales (148) + +- ReusedExchange (150) + + +(134) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_quantity#155, ss_list_price#156, ss_sold_date_sk#157] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#157)] +ReadSchema: struct + +(135) ColumnarToRow [codegen id : 2] +Input [3]: [ss_quantity#155, ss_list_price#156, ss_sold_date_sk#157] + +(136) ReusedExchange [Reuses operator id: 26] +Output [1]: [d_date_sk#158] + +(137) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#157] +Right keys [1]: [d_date_sk#158] +Join type: Inner +Join condition: None + +(138) Project [codegen id : 2] +Output [2]: [ss_quantity#155 AS quantity#159, ss_list_price#156 AS list_price#160] +Input [4]: [ss_quantity#155, ss_list_price#156, ss_sold_date_sk#157, d_date_sk#158] + +(139) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_quantity#161, cs_list_price#162, cs_sold_date_sk#163] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#163)] +ReadSchema: struct + +(140) ColumnarToRow [codegen id : 4] +Input [3]: [cs_quantity#161, cs_list_price#162, cs_sold_date_sk#163] + +(141) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#164, d_year#165] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(142) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#164, d_year#165] + +(143) Filter [codegen id : 3] +Input [2]: [d_date_sk#164, d_year#165] +Condition : (((isnotnull(d_year#165) AND (d_year#165 >= 1998)) AND (d_year#165 <= 2000)) AND isnotnull(d_date_sk#164)) + +(144) Project [codegen id : 3] +Output [1]: [d_date_sk#164] +Input [2]: [d_date_sk#164, d_year#165] + +(145) BroadcastExchange +Input [1]: [d_date_sk#164] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] + +(146) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#163] +Right keys [1]: [d_date_sk#164] +Join type: Inner +Join condition: None + +(147) Project [codegen id : 4] +Output [2]: [cs_quantity#161 AS quantity#166, cs_list_price#162 AS list_price#167] +Input [4]: [cs_quantity#161, cs_list_price#162, cs_sold_date_sk#163, d_date_sk#164] + +(148) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_quantity#168, ws_list_price#169, ws_sold_date_sk#170] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#170)] +ReadSchema: struct + +(149) ColumnarToRow [codegen id : 6] +Input [3]: [ws_quantity#168, ws_list_price#169, ws_sold_date_sk#170] + +(150) ReusedExchange [Reuses operator id: 145] +Output [1]: [d_date_sk#171] + +(151) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#170] +Right keys [1]: [d_date_sk#171] +Join type: Inner +Join condition: None + +(152) Project [codegen id : 6] +Output [2]: [ws_quantity#168 AS quantity#172, ws_list_price#169 AS list_price#173] +Input [4]: [ws_quantity#168, ws_list_price#169, ws_sold_date_sk#170, d_date_sk#171] + +(153) Union + +(154) HashAggregate [codegen id : 7] +Input [2]: [quantity#159, list_price#160] +Keys: [] +Functions [1]: [partial_avg((cast(quantity#159 as decimal(10,0)) * list_price#160))] +Aggregate Attributes [2]: [sum#174, count#175] +Results [2]: [sum#176, count#177] + +(155) Exchange +Input [2]: [sum#176, count#177] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=21] + +(156) HashAggregate [codegen id : 8] +Input [2]: [sum#176, count#177] +Keys: [] +Functions [1]: [avg((cast(quantity#159 as decimal(10,0)) * list_price#160))] +Aggregate Attributes [1]: [avg((cast(quantity#159 as decimal(10,0)) * list_price#160))#178] +Results [1]: [avg((cast(quantity#159 as decimal(10,0)) * list_price#160))#178 AS average_sales#179] + +Subquery:2 Hosting operator id = 89 Hosting Expression = ReusedSubquery Subquery scalar-subquery#53, [id=#54] + +Subquery:3 Hosting operator id = 104 Hosting Expression = ReusedSubquery Subquery scalar-subquery#53, [id=#54] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..be14bd5141 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.native_iceberg_compat/simplified.txt @@ -0,0 +1,249 @@ +TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + WholeStageCodegen (406) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #1 + WholeStageCodegen (405) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] + InputAdapter + Union + WholeStageCodegen (80) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id,i_category_id] #2 + WholeStageCodegen (79) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + InputAdapter + Union + WholeStageCodegen (26) + Filter [sales] + Subquery #1 + WholeStageCodegen (8) + HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count] + InputAdapter + Exchange #14 + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #10 + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #15 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #3 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (11) + Project [i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (10) + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #6 + WholeStageCodegen (6) + HashAggregate [brand_id,class_id,category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (4) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (3) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk] #10 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (9) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + InputAdapter + ReusedExchange [d_date_sk] #10 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (23) + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (24) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (52) + Filter [sales] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #16 + WholeStageCodegen (51) + HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_list_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12 + InputAdapter + ReusedExchange [d_date_sk] #13 + WholeStageCodegen (78) + Filter [sales] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #17 + WholeStageCodegen (77) + HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,ss_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [ss_item_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12 + InputAdapter + ReusedExchange [d_date_sk] #13 + WholeStageCodegen (161) + HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id,i_class_id] #18 + WholeStageCodegen (160) + HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + WholeStageCodegen (242) + HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel,i_brand_id] #19 + WholeStageCodegen (241) + HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + WholeStageCodegen (323) + HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange [channel] #20 + WholeStageCodegen (322) + HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 + WholeStageCodegen (404) + HashAggregate [sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum] + InputAdapter + Exchange #21 + WholeStageCodegen (403) + HashAggregate [sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum] + InputAdapter + ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_datafusion/explain.txt new file mode 100644 index 0000000000..e17c1e724e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_datafusion/explain.txt @@ -0,0 +1,891 @@ +== Physical Plan == +TakeOrderedAndProject (157) ++- Union (156) + :- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (23) + : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : :- * Project (17) + : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : : : +- BroadcastExchange (8) + : : : : : : +- * Project (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : : : : +- BroadcastExchange (15) + : : : : : +- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet spark_catalog.default.customer (11) + : : : : +- BroadcastExchange (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet spark_catalog.default.customer_demographics (18) + : : : +- BroadcastExchange (27) + : : : +- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet spark_catalog.default.customer_address (24) + : : +- BroadcastExchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet spark_catalog.default.date_dim (30) + : +- BroadcastExchange (40) + : +- * Filter (39) + : +- * ColumnarToRow (38) + : +- Scan parquet spark_catalog.default.item (37) + :- * HashAggregate (72) + : +- Exchange (71) + : +- * HashAggregate (70) + : +- * Project (69) + : +- * BroadcastHashJoin Inner BuildRight (68) + : :- * Project (66) + : : +- * BroadcastHashJoin Inner BuildRight (65) + : : :- * Project (63) + : : : +- * BroadcastHashJoin Inner BuildRight (62) + : : : :- * Project (57) + : : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : : :- * Project (54) + : : : : : +- * BroadcastHashJoin Inner BuildRight (53) + : : : : : :- * Project (51) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : : : :- * Filter (48) + : : : : : : : +- * ColumnarToRow (47) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (46) + : : : : : : +- ReusedExchange (49) + : : : : : +- ReusedExchange (52) + : : : : +- ReusedExchange (55) + : : : +- BroadcastExchange (61) + : : : +- * Filter (60) + : : : +- * ColumnarToRow (59) + : : : +- Scan parquet spark_catalog.default.customer_address (58) + : : +- ReusedExchange (64) + : +- ReusedExchange (67) + :- * HashAggregate (100) + : +- Exchange (99) + : +- * HashAggregate (98) + : +- * Project (97) + : +- * BroadcastHashJoin Inner BuildRight (96) + : :- * Project (94) + : : +- * BroadcastHashJoin Inner BuildRight (93) + : : :- * Project (91) + : : : +- * BroadcastHashJoin Inner BuildRight (90) + : : : :- * Project (84) + : : : : +- * BroadcastHashJoin Inner BuildRight (83) + : : : : :- * Project (81) + : : : : : +- * BroadcastHashJoin Inner BuildRight (80) + : : : : : :- * Project (78) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (77) + : : : : : : :- * Filter (75) + : : : : : : : +- * ColumnarToRow (74) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (73) + : : : : : : +- ReusedExchange (76) + : : : : : +- ReusedExchange (79) + : : : : +- ReusedExchange (82) + : : : +- BroadcastExchange (89) + : : : +- * Project (88) + : : : +- * Filter (87) + : : : +- * ColumnarToRow (86) + : : : +- Scan parquet spark_catalog.default.customer_address (85) + : : +- ReusedExchange (92) + : +- ReusedExchange (95) + :- * HashAggregate (128) + : +- Exchange (127) + : +- * HashAggregate (126) + : +- * Project (125) + : +- * BroadcastHashJoin Inner BuildRight (124) + : :- * Project (122) + : : +- * BroadcastHashJoin Inner BuildRight (121) + : : :- * Project (119) + : : : +- * BroadcastHashJoin Inner BuildRight (118) + : : : :- * Project (112) + : : : : +- * BroadcastHashJoin Inner BuildRight (111) + : : : : :- * Project (109) + : : : : : +- * BroadcastHashJoin Inner BuildRight (108) + : : : : : :- * Project (106) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (105) + : : : : : : :- * Filter (103) + : : : : : : : +- * ColumnarToRow (102) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (101) + : : : : : : +- ReusedExchange (104) + : : : : : +- ReusedExchange (107) + : : : : +- ReusedExchange (110) + : : : +- BroadcastExchange (117) + : : : +- * Project (116) + : : : +- * Filter (115) + : : : +- * ColumnarToRow (114) + : : : +- Scan parquet spark_catalog.default.customer_address (113) + : : +- ReusedExchange (120) + : +- ReusedExchange (123) + +- * HashAggregate (155) + +- Exchange (154) + +- * HashAggregate (153) + +- * Project (152) + +- * BroadcastHashJoin Inner BuildRight (151) + :- * Project (146) + : +- * BroadcastHashJoin Inner BuildRight (145) + : :- * Project (143) + : : +- * BroadcastHashJoin Inner BuildRight (142) + : : :- * Project (140) + : : : +- * BroadcastHashJoin Inner BuildRight (139) + : : : :- * Project (137) + : : : : +- * BroadcastHashJoin Inner BuildRight (136) + : : : : :- * Project (134) + : : : : : +- * BroadcastHashJoin Inner BuildRight (133) + : : : : : :- * Filter (131) + : : : : : : +- * ColumnarToRow (130) + : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (129) + : : : : : +- ReusedExchange (132) + : : : : +- ReusedExchange (135) + : : : +- ReusedExchange (138) + : : +- ReusedExchange (141) + : +- ReusedExchange (144) + +- BroadcastExchange (150) + +- * Filter (149) + +- * ColumnarToRow (148) + +- Scan parquet spark_catalog.default.item (147) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(3) Filter [codegen id : 7] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = M)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#10)) + +(7) Project [codegen id : 1] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(8) BroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 7] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(11) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [In(c_birth_month, [1,10,12,4,5,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(13) Filter [codegen id : 2] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Condition : (((c_birth_month#17 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#14)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_addr_sk#16)) + +(14) Project [codegen id : 2] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(15) BroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 7] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(18) Scan parquet spark_catalog.default.customer_demographics +Output [1]: [cd_demo_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [cd_demo_sk#19] + +(20) Filter [codegen id : 3] +Input [1]: [cd_demo_sk#19] +Condition : isnotnull(cd_demo_sk#19) + +(21) BroadcastExchange +Input [1]: [cd_demo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 7] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(24) Scan parquet spark_catalog.default.customer_address +Output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(26) Filter [codegen id : 4] +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#20)) + +(27) BroadcastExchange +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 7] +Output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(30) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#24, d_year#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#24, d_year#25] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#24, d_year#25] +Condition : ((isnotnull(d_year#25) AND (d_year#25 = 2001)) AND isnotnull(d_date_sk#24)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_year#25] + +(34) BroadcastExchange +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 7] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, d_date_sk#24] + +(37) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#26, i_item_id#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#26, i_item_id#27] + +(39) Filter [codegen id : 6] +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) + +(40) BroadcastExchange +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 7] +Output [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] + +(43) HashAggregate [codegen id : 7] +Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [4]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Results [18]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] + +(44) Exchange +Input [18]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] +Arguments: hashpartitioning(i_item_id#27, ca_country#23, ca_state#22, ca_county#21, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 8] +Input [18]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] +Keys [4]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#63, avg(agg2#29)#64, avg(agg3#30)#65, avg(agg4#31)#66, avg(agg5#32)#67, avg(agg6#33)#68, avg(agg7#34)#69] +Results [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, avg(agg1#28)#63 AS agg1#70, avg(agg2#29)#64 AS agg2#71, avg(agg3#30)#65 AS agg3#72, avg(agg4#31)#66 AS agg4#73, avg(agg5#32)#67 AS agg5#74, avg(agg6#33)#68 AS agg6#75, avg(agg7#34)#69 AS agg7#76] + +(46) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 15] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(48) Filter [codegen id : 15] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(49) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(50) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 15] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(52) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(53) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(54) Project [codegen id : 15] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(55) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#19] + +(56) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(57) Project [codegen id : 15] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(58) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#20, ca_state#22, ca_country#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 12] +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] + +(60) Filter [codegen id : 12] +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#20)) + +(61) BroadcastExchange +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(62) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 15] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_state#22, ca_country#23] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_state#22, ca_country#23] + +(64) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#24] + +(65) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 15] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_state#22, ca_country#23] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_state#22, ca_country#23, d_date_sk#24] + +(67) ReusedExchange [Reuses operator id: 40] +Output [2]: [i_item_sk#26, i_item_id#27] + +(68) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(69) Project [codegen id : 15] +Output [10]: [i_item_id#27, ca_country#23, ca_state#22, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] + +(70) HashAggregate [codegen id : 15] +Input [10]: [i_item_id#27, ca_country#23, ca_state#22, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [3]: [i_item_id#27, ca_country#23, ca_state#22] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#77, count#78, sum#79, count#80, sum#81, count#82, sum#83, count#84, sum#85, count#86, sum#87, count#88, sum#89, count#90] +Results [17]: [i_item_id#27, ca_country#23, ca_state#22, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] + +(71) Exchange +Input [17]: [i_item_id#27, ca_country#23, ca_state#22, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] +Arguments: hashpartitioning(i_item_id#27, ca_country#23, ca_state#22, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(72) HashAggregate [codegen id : 16] +Input [17]: [i_item_id#27, ca_country#23, ca_state#22, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] +Keys [3]: [i_item_id#27, ca_country#23, ca_state#22] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#105, avg(agg2#29)#106, avg(agg3#30)#107, avg(agg4#31)#108, avg(agg5#32)#109, avg(agg6#33)#110, avg(agg7#34)#111] +Results [11]: [i_item_id#27, ca_country#23, ca_state#22, null AS county#112, avg(agg1#28)#105 AS agg1#113, avg(agg2#29)#106 AS agg2#114, avg(agg3#30)#107 AS agg3#115, avg(agg4#31)#108 AS agg4#116, avg(agg5#32)#109 AS agg5#117, avg(agg6#33)#110 AS agg6#118, avg(agg7#34)#111 AS agg7#119] + +(73) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(74) ColumnarToRow [codegen id : 23] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(75) Filter [codegen id : 23] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(76) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(77) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(78) Project [codegen id : 23] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(79) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(80) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(81) Project [codegen id : 23] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(82) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#19] + +(83) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(84) Project [codegen id : 23] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(85) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#20, ca_state#22, ca_country#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(86) ColumnarToRow [codegen id : 20] +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] + +(87) Filter [codegen id : 20] +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#20)) + +(88) Project [codegen id : 20] +Output [2]: [ca_address_sk#20, ca_country#23] +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] + +(89) BroadcastExchange +Input [2]: [ca_address_sk#20, ca_country#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(90) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(91) Project [codegen id : 23] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_country#23] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_country#23] + +(92) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#24] + +(93) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(94) Project [codegen id : 23] +Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_country#23] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_country#23, d_date_sk#24] + +(95) ReusedExchange [Reuses operator id: 40] +Output [2]: [i_item_sk#26, i_item_id#27] + +(96) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(97) Project [codegen id : 23] +Output [9]: [i_item_id#27, ca_country#23, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_country#23, i_item_sk#26, i_item_id#27] + +(98) HashAggregate [codegen id : 23] +Input [9]: [i_item_id#27, ca_country#23, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [2]: [i_item_id#27, ca_country#23] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#120, count#121, sum#122, count#123, sum#124, count#125, sum#126, count#127, sum#128, count#129, sum#130, count#131, sum#132, count#133] +Results [16]: [i_item_id#27, ca_country#23, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] + +(99) Exchange +Input [16]: [i_item_id#27, ca_country#23, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] +Arguments: hashpartitioning(i_item_id#27, ca_country#23, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(100) HashAggregate [codegen id : 24] +Input [16]: [i_item_id#27, ca_country#23, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] +Keys [2]: [i_item_id#27, ca_country#23] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#148, avg(agg2#29)#149, avg(agg3#30)#150, avg(agg4#31)#151, avg(agg5#32)#152, avg(agg6#33)#153, avg(agg7#34)#154] +Results [11]: [i_item_id#27, ca_country#23, null AS ca_state#155, null AS county#156, avg(agg1#28)#148 AS agg1#157, avg(agg2#29)#149 AS agg2#158, avg(agg3#30)#150 AS agg3#159, avg(agg4#31)#151 AS agg4#160, avg(agg5#32)#152 AS agg5#161, avg(agg6#33)#153 AS agg6#162, avg(agg7#34)#154 AS agg7#163] + +(101) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(102) ColumnarToRow [codegen id : 31] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(103) Filter [codegen id : 31] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(104) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(105) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(106) Project [codegen id : 31] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(107) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(108) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(109) Project [codegen id : 31] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(110) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#19] + +(111) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(112) Project [codegen id : 31] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(113) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#20, ca_state#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(114) ColumnarToRow [codegen id : 28] +Input [2]: [ca_address_sk#20, ca_state#22] + +(115) Filter [codegen id : 28] +Input [2]: [ca_address_sk#20, ca_state#22] +Condition : (ca_state#22 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#20)) + +(116) Project [codegen id : 28] +Output [1]: [ca_address_sk#20] +Input [2]: [ca_address_sk#20, ca_state#22] + +(117) BroadcastExchange +Input [1]: [ca_address_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(118) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(119) Project [codegen id : 31] +Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20] + +(120) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#24] + +(121) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(122) Project [codegen id : 31] +Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18] +Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, d_date_sk#24] + +(123) ReusedExchange [Reuses operator id: 40] +Output [2]: [i_item_sk#26, i_item_id#27] + +(124) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(125) Project [codegen id : 31] +Output [8]: [i_item_id#27, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] +Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_sk#26, i_item_id#27] + +(126) HashAggregate [codegen id : 31] +Input [8]: [i_item_id#27, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [1]: [i_item_id#27] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#164, count#165, sum#166, count#167, sum#168, count#169, sum#170, count#171, sum#172, count#173, sum#174, count#175, sum#176, count#177] +Results [15]: [i_item_id#27, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] + +(127) Exchange +Input [15]: [i_item_id#27, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] +Arguments: hashpartitioning(i_item_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(128) HashAggregate [codegen id : 32] +Input [15]: [i_item_id#27, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] +Keys [1]: [i_item_id#27] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#192, avg(agg2#29)#193, avg(agg3#30)#194, avg(agg4#31)#195, avg(agg5#32)#196, avg(agg6#33)#197, avg(agg7#34)#198] +Results [11]: [i_item_id#27, null AS ca_country#199, null AS ca_state#200, null AS county#201, avg(agg1#28)#192 AS agg1#202, avg(agg2#29)#193 AS agg2#203, avg(agg3#30)#194 AS agg3#204, avg(agg4#31)#195 AS agg4#205, avg(agg5#32)#196 AS agg5#206, avg(agg6#33)#197 AS agg6#207, avg(agg7#34)#198 AS agg7#208] + +(129) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(130) ColumnarToRow [codegen id : 39] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(131) Filter [codegen id : 39] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(132) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(133) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(134) Project [codegen id : 39] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(135) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(136) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(137) Project [codegen id : 39] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(138) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#19] + +(139) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(140) Project [codegen id : 39] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(141) ReusedExchange [Reuses operator id: 117] +Output [1]: [ca_address_sk#20] + +(142) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(143) Project [codegen id : 39] +Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20] + +(144) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#24] + +(145) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(146) Project [codegen id : 39] +Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18] +Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, d_date_sk#24] + +(147) Scan parquet spark_catalog.default.item +Output [1]: [i_item_sk#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(148) ColumnarToRow [codegen id : 38] +Input [1]: [i_item_sk#26] + +(149) Filter [codegen id : 38] +Input [1]: [i_item_sk#26] +Condition : isnotnull(i_item_sk#26) + +(150) BroadcastExchange +Input [1]: [i_item_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] + +(151) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(152) Project [codegen id : 39] +Output [7]: [cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] +Input [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_sk#26] + +(153) HashAggregate [codegen id : 39] +Input [7]: [agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys: [] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#209, count#210, sum#211, count#212, sum#213, count#214, sum#215, count#216, sum#217, count#218, sum#219, count#220, sum#221, count#222] +Results [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] + +(154) Exchange +Input [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(155) HashAggregate [codegen id : 40] +Input [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] +Keys: [] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#237, avg(agg2#29)#238, avg(agg3#30)#239, avg(agg4#31)#240, avg(agg5#32)#241, avg(agg6#33)#242, avg(agg7#34)#243] +Results [11]: [null AS i_item_id#244, null AS ca_country#245, null AS ca_state#246, null AS county#247, avg(agg1#28)#237 AS agg1#248, avg(agg2#29)#238 AS agg2#249, avg(agg3#30)#239 AS agg3#250, avg(agg4#31)#240 AS agg4#251, avg(agg5#32)#241 AS agg5#252, avg(agg6#33)#242 AS agg6#253, avg(agg7#34)#243 AS agg7#254] + +(156) Union + +(157) TakeOrderedAndProject +Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#70, agg2#71, agg3#72, agg4#73, agg5#74, agg6#75, agg7#76] +Arguments: 100, [ca_country#23 ASC NULLS FIRST, ca_state#22 ASC NULLS FIRST, ca_county#21 ASC NULLS FIRST, i_item_id#27 ASC NULLS FIRST], [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#70, agg2#71, agg3#72, agg4#73, agg5#74, agg6#75, agg7#76] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..fa297f1803 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_datafusion/simplified.txt @@ -0,0 +1,227 @@ +TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + Union + WholeStageCodegen (8) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,ca_country,ca_state,ca_county] #1 + WholeStageCodegen (7) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,ca_country,ca_state,ca_county,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk,cd_dep_count] + Filter [cd_gender,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + WholeStageCodegen (16) + HashAggregate [i_item_id,ca_country,ca_state,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,ca_country,ca_state] #8 + WholeStageCodegen (15) + HashAggregate [i_item_id,ca_country,ca_state,agg1,agg2,agg3,agg4,agg5,agg6,agg7] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,ca_country,ca_state,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_state,ca_country] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_state,ca_country] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (12) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #7 + WholeStageCodegen (24) + HashAggregate [i_item_id,ca_country,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,ca_country] #10 + WholeStageCodegen (23) + HashAggregate [i_item_id,ca_country,agg1,agg2,agg3,agg4,agg5,agg6,agg7] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,ca_country,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_country] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_country] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (20) + Project [ca_address_sk,ca_country] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #7 + WholeStageCodegen (32) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id] #12 + WholeStageCodegen (31) + HashAggregate [i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (28) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #7 + WholeStageCodegen (40) + HashAggregate [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),i_item_id,ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange #14 + WholeStageCodegen (39) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + ReusedExchange [ca_address_sk] #13 + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (38) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..e17c1e724e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_iceberg_compat/explain.txt @@ -0,0 +1,891 @@ +== Physical Plan == +TakeOrderedAndProject (157) ++- Union (156) + :- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (23) + : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : :- * Project (17) + : : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : : :- * Project (10) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : : : +- BroadcastExchange (8) + : : : : : : +- * Project (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : : : : +- BroadcastExchange (15) + : : : : : +- * Project (14) + : : : : : +- * Filter (13) + : : : : : +- * ColumnarToRow (12) + : : : : : +- Scan parquet spark_catalog.default.customer (11) + : : : : +- BroadcastExchange (21) + : : : : +- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet spark_catalog.default.customer_demographics (18) + : : : +- BroadcastExchange (27) + : : : +- * Filter (26) + : : : +- * ColumnarToRow (25) + : : : +- Scan parquet spark_catalog.default.customer_address (24) + : : +- BroadcastExchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet spark_catalog.default.date_dim (30) + : +- BroadcastExchange (40) + : +- * Filter (39) + : +- * ColumnarToRow (38) + : +- Scan parquet spark_catalog.default.item (37) + :- * HashAggregate (72) + : +- Exchange (71) + : +- * HashAggregate (70) + : +- * Project (69) + : +- * BroadcastHashJoin Inner BuildRight (68) + : :- * Project (66) + : : +- * BroadcastHashJoin Inner BuildRight (65) + : : :- * Project (63) + : : : +- * BroadcastHashJoin Inner BuildRight (62) + : : : :- * Project (57) + : : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : : :- * Project (54) + : : : : : +- * BroadcastHashJoin Inner BuildRight (53) + : : : : : :- * Project (51) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : : : :- * Filter (48) + : : : : : : : +- * ColumnarToRow (47) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (46) + : : : : : : +- ReusedExchange (49) + : : : : : +- ReusedExchange (52) + : : : : +- ReusedExchange (55) + : : : +- BroadcastExchange (61) + : : : +- * Filter (60) + : : : +- * ColumnarToRow (59) + : : : +- Scan parquet spark_catalog.default.customer_address (58) + : : +- ReusedExchange (64) + : +- ReusedExchange (67) + :- * HashAggregate (100) + : +- Exchange (99) + : +- * HashAggregate (98) + : +- * Project (97) + : +- * BroadcastHashJoin Inner BuildRight (96) + : :- * Project (94) + : : +- * BroadcastHashJoin Inner BuildRight (93) + : : :- * Project (91) + : : : +- * BroadcastHashJoin Inner BuildRight (90) + : : : :- * Project (84) + : : : : +- * BroadcastHashJoin Inner BuildRight (83) + : : : : :- * Project (81) + : : : : : +- * BroadcastHashJoin Inner BuildRight (80) + : : : : : :- * Project (78) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (77) + : : : : : : :- * Filter (75) + : : : : : : : +- * ColumnarToRow (74) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (73) + : : : : : : +- ReusedExchange (76) + : : : : : +- ReusedExchange (79) + : : : : +- ReusedExchange (82) + : : : +- BroadcastExchange (89) + : : : +- * Project (88) + : : : +- * Filter (87) + : : : +- * ColumnarToRow (86) + : : : +- Scan parquet spark_catalog.default.customer_address (85) + : : +- ReusedExchange (92) + : +- ReusedExchange (95) + :- * HashAggregate (128) + : +- Exchange (127) + : +- * HashAggregate (126) + : +- * Project (125) + : +- * BroadcastHashJoin Inner BuildRight (124) + : :- * Project (122) + : : +- * BroadcastHashJoin Inner BuildRight (121) + : : :- * Project (119) + : : : +- * BroadcastHashJoin Inner BuildRight (118) + : : : :- * Project (112) + : : : : +- * BroadcastHashJoin Inner BuildRight (111) + : : : : :- * Project (109) + : : : : : +- * BroadcastHashJoin Inner BuildRight (108) + : : : : : :- * Project (106) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (105) + : : : : : : :- * Filter (103) + : : : : : : : +- * ColumnarToRow (102) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (101) + : : : : : : +- ReusedExchange (104) + : : : : : +- ReusedExchange (107) + : : : : +- ReusedExchange (110) + : : : +- BroadcastExchange (117) + : : : +- * Project (116) + : : : +- * Filter (115) + : : : +- * ColumnarToRow (114) + : : : +- Scan parquet spark_catalog.default.customer_address (113) + : : +- ReusedExchange (120) + : +- ReusedExchange (123) + +- * HashAggregate (155) + +- Exchange (154) + +- * HashAggregate (153) + +- * Project (152) + +- * BroadcastHashJoin Inner BuildRight (151) + :- * Project (146) + : +- * BroadcastHashJoin Inner BuildRight (145) + : :- * Project (143) + : : +- * BroadcastHashJoin Inner BuildRight (142) + : : :- * Project (140) + : : : +- * BroadcastHashJoin Inner BuildRight (139) + : : : :- * Project (137) + : : : : +- * BroadcastHashJoin Inner BuildRight (136) + : : : : :- * Project (134) + : : : : : +- * BroadcastHashJoin Inner BuildRight (133) + : : : : : :- * Filter (131) + : : : : : : +- * ColumnarToRow (130) + : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (129) + : : : : : +- ReusedExchange (132) + : : : : +- ReusedExchange (135) + : : : +- ReusedExchange (138) + : : +- ReusedExchange (141) + : +- ReusedExchange (144) + +- BroadcastExchange (150) + +- * Filter (149) + +- * ColumnarToRow (148) + +- Scan parquet spark_catalog.default.item (147) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(3) Filter [codegen id : 7] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_education_status,College ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = M)) AND (cd_education_status#12 = College )) AND isnotnull(cd_demo_sk#10)) + +(7) Project [codegen id : 1] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(8) BroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 7] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(11) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [In(c_birth_month, [1,10,12,4,5,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(13) Filter [codegen id : 2] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] +Condition : (((c_birth_month#17 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#14)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_addr_sk#16)) + +(14) Project [codegen id : 2] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [5]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_month#17, c_birth_year#18] + +(15) BroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 7] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(18) Scan parquet spark_catalog.default.customer_demographics +Output [1]: [cd_demo_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [cd_demo_sk#19] + +(20) Filter [codegen id : 3] +Input [1]: [cd_demo_sk#19] +Condition : isnotnull(cd_demo_sk#19) + +(21) BroadcastExchange +Input [1]: [cd_demo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 7] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(24) Scan parquet spark_catalog.default.customer_address +Output [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(26) Filter [codegen id : 4] +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#20)) + +(27) BroadcastExchange +Input [4]: [ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 7] +Output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_county#21, ca_state#22, ca_country#23] + +(30) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#24, d_year#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#24, d_year#25] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#24, d_year#25] +Condition : ((isnotnull(d_year#25) AND (d_year#25 = 2001)) AND isnotnull(d_date_sk#24)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_year#25] + +(34) BroadcastExchange +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 7] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, d_date_sk#24] + +(37) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#26, i_item_id#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#26, i_item_id#27] + +(39) Filter [codegen id : 6] +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) + +(40) BroadcastExchange +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 7] +Output [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_county#21, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] + +(43) HashAggregate [codegen id : 7] +Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [4]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Results [18]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] + +(44) Exchange +Input [18]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] +Arguments: hashpartitioning(i_item_id#27, ca_country#23, ca_state#22, ca_county#21, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 8] +Input [18]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] +Keys [4]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#63, avg(agg2#29)#64, avg(agg3#30)#65, avg(agg4#31)#66, avg(agg5#32)#67, avg(agg6#33)#68, avg(agg7#34)#69] +Results [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, avg(agg1#28)#63 AS agg1#70, avg(agg2#29)#64 AS agg2#71, avg(agg3#30)#65 AS agg3#72, avg(agg4#31)#66 AS agg4#73, avg(agg5#32)#67 AS agg5#74, avg(agg6#33)#68 AS agg6#75, avg(agg7#34)#69 AS agg7#76] + +(46) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 15] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(48) Filter [codegen id : 15] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(49) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(50) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 15] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(52) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(53) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(54) Project [codegen id : 15] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(55) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#19] + +(56) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(57) Project [codegen id : 15] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(58) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#20, ca_state#22, ca_country#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 12] +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] + +(60) Filter [codegen id : 12] +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#20)) + +(61) BroadcastExchange +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(62) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 15] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_state#22, ca_country#23] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_state#22, ca_country#23] + +(64) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#24] + +(65) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 15] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_state#22, ca_country#23] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_state#22, ca_country#23, d_date_sk#24] + +(67) ReusedExchange [Reuses operator id: 40] +Output [2]: [i_item_sk#26, i_item_id#27] + +(68) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(69) Project [codegen id : 15] +Output [10]: [i_item_id#27, ca_country#23, ca_state#22, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_state#22, ca_country#23, i_item_sk#26, i_item_id#27] + +(70) HashAggregate [codegen id : 15] +Input [10]: [i_item_id#27, ca_country#23, ca_state#22, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [3]: [i_item_id#27, ca_country#23, ca_state#22] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#77, count#78, sum#79, count#80, sum#81, count#82, sum#83, count#84, sum#85, count#86, sum#87, count#88, sum#89, count#90] +Results [17]: [i_item_id#27, ca_country#23, ca_state#22, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] + +(71) Exchange +Input [17]: [i_item_id#27, ca_country#23, ca_state#22, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] +Arguments: hashpartitioning(i_item_id#27, ca_country#23, ca_state#22, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(72) HashAggregate [codegen id : 16] +Input [17]: [i_item_id#27, ca_country#23, ca_state#22, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] +Keys [3]: [i_item_id#27, ca_country#23, ca_state#22] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#105, avg(agg2#29)#106, avg(agg3#30)#107, avg(agg4#31)#108, avg(agg5#32)#109, avg(agg6#33)#110, avg(agg7#34)#111] +Results [11]: [i_item_id#27, ca_country#23, ca_state#22, null AS county#112, avg(agg1#28)#105 AS agg1#113, avg(agg2#29)#106 AS agg2#114, avg(agg3#30)#107 AS agg3#115, avg(agg4#31)#108 AS agg4#116, avg(agg5#32)#109 AS agg5#117, avg(agg6#33)#110 AS agg6#118, avg(agg7#34)#111 AS agg7#119] + +(73) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(74) ColumnarToRow [codegen id : 23] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(75) Filter [codegen id : 23] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(76) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(77) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(78) Project [codegen id : 23] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(79) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(80) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(81) Project [codegen id : 23] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(82) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#19] + +(83) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(84) Project [codegen id : 23] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(85) Scan parquet spark_catalog.default.customer_address +Output [3]: [ca_address_sk#20, ca_state#22, ca_country#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(86) ColumnarToRow [codegen id : 20] +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] + +(87) Filter [codegen id : 20] +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] +Condition : (ca_state#22 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#20)) + +(88) Project [codegen id : 20] +Output [2]: [ca_address_sk#20, ca_country#23] +Input [3]: [ca_address_sk#20, ca_state#22, ca_country#23] + +(89) BroadcastExchange +Input [2]: [ca_address_sk#20, ca_country#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] + +(90) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(91) Project [codegen id : 23] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_country#23] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20, ca_country#23] + +(92) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#24] + +(93) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(94) Project [codegen id : 23] +Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_country#23] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, ca_country#23, d_date_sk#24] + +(95) ReusedExchange [Reuses operator id: 40] +Output [2]: [i_item_sk#26, i_item_id#27] + +(96) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(97) Project [codegen id : 23] +Output [9]: [i_item_id#27, ca_country#23, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, ca_country#23, i_item_sk#26, i_item_id#27] + +(98) HashAggregate [codegen id : 23] +Input [9]: [i_item_id#27, ca_country#23, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [2]: [i_item_id#27, ca_country#23] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#120, count#121, sum#122, count#123, sum#124, count#125, sum#126, count#127, sum#128, count#129, sum#130, count#131, sum#132, count#133] +Results [16]: [i_item_id#27, ca_country#23, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] + +(99) Exchange +Input [16]: [i_item_id#27, ca_country#23, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] +Arguments: hashpartitioning(i_item_id#27, ca_country#23, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(100) HashAggregate [codegen id : 24] +Input [16]: [i_item_id#27, ca_country#23, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] +Keys [2]: [i_item_id#27, ca_country#23] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#148, avg(agg2#29)#149, avg(agg3#30)#150, avg(agg4#31)#151, avg(agg5#32)#152, avg(agg6#33)#153, avg(agg7#34)#154] +Results [11]: [i_item_id#27, ca_country#23, null AS ca_state#155, null AS county#156, avg(agg1#28)#148 AS agg1#157, avg(agg2#29)#149 AS agg2#158, avg(agg3#30)#150 AS agg3#159, avg(agg4#31)#151 AS agg4#160, avg(agg5#32)#152 AS agg5#161, avg(agg6#33)#153 AS agg6#162, avg(agg7#34)#154 AS agg7#163] + +(101) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(102) ColumnarToRow [codegen id : 31] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(103) Filter [codegen id : 31] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(104) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(105) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(106) Project [codegen id : 31] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(107) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(108) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(109) Project [codegen id : 31] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(110) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#19] + +(111) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(112) Project [codegen id : 31] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(113) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#20, ca_state#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(114) ColumnarToRow [codegen id : 28] +Input [2]: [ca_address_sk#20, ca_state#22] + +(115) Filter [codegen id : 28] +Input [2]: [ca_address_sk#20, ca_state#22] +Condition : (ca_state#22 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#20)) + +(116) Project [codegen id : 28] +Output [1]: [ca_address_sk#20] +Input [2]: [ca_address_sk#20, ca_state#22] + +(117) BroadcastExchange +Input [1]: [ca_address_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(118) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(119) Project [codegen id : 31] +Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20] + +(120) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#24] + +(121) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(122) Project [codegen id : 31] +Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18] +Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, d_date_sk#24] + +(123) ReusedExchange [Reuses operator id: 40] +Output [2]: [i_item_sk#26, i_item_id#27] + +(124) BroadcastHashJoin [codegen id : 31] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(125) Project [codegen id : 31] +Output [8]: [i_item_id#27, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] +Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_sk#26, i_item_id#27] + +(126) HashAggregate [codegen id : 31] +Input [8]: [i_item_id#27, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [1]: [i_item_id#27] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#164, count#165, sum#166, count#167, sum#168, count#169, sum#170, count#171, sum#172, count#173, sum#174, count#175, sum#176, count#177] +Results [15]: [i_item_id#27, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] + +(127) Exchange +Input [15]: [i_item_id#27, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] +Arguments: hashpartitioning(i_item_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(128) HashAggregate [codegen id : 32] +Input [15]: [i_item_id#27, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] +Keys [1]: [i_item_id#27] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#192, avg(agg2#29)#193, avg(agg3#30)#194, avg(agg4#31)#195, avg(agg5#32)#196, avg(agg6#33)#197, avg(agg7#34)#198] +Results [11]: [i_item_id#27, null AS ca_country#199, null AS ca_state#200, null AS county#201, avg(agg1#28)#192 AS agg1#202, avg(agg2#29)#193 AS agg2#203, avg(agg3#30)#194 AS agg3#204, avg(agg4#31)#195 AS agg4#205, avg(agg5#32)#196 AS agg5#206, avg(agg6#33)#197 AS agg6#207, avg(agg7#34)#198 AS agg7#208] + +(129) Scan parquet spark_catalog.default.catalog_sales +Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#9)] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(130) ColumnarToRow [codegen id : 39] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] + +(131) Filter [codegen id : 39] +Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] +Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3)) + +(132) ReusedExchange [Reuses operator id: 8] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] + +(133) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join type: Inner +Join condition: None + +(134) Project [codegen id : 39] +Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13] +Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#10, cd_dep_count#13] + +(135) ReusedExchange [Reuses operator id: 15] +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(136) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_bill_customer_sk#1] +Right keys [1]: [c_customer_sk#14] +Join type: Inner +Join condition: None + +(137) Project [codegen id : 39] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_customer_sk#14, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18] + +(138) ReusedExchange [Reuses operator id: 21] +Output [1]: [cd_demo_sk#19] + +(139) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(140) Project [codegen id : 39] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_cdemo_sk#15, c_current_addr_sk#16, c_birth_year#18, cd_demo_sk#19] + +(141) ReusedExchange [Reuses operator id: 117] +Output [1]: [ca_address_sk#20] + +(142) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#20] +Join type: Inner +Join condition: None + +(143) Project [codegen id : 39] +Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_current_addr_sk#16, c_birth_year#18, ca_address_sk#20] + +(144) ReusedExchange [Reuses operator id: 34] +Output [1]: [d_date_sk#24] + +(145) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#24] +Join type: Inner +Join condition: None + +(146) Project [codegen id : 39] +Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18] +Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#13, c_birth_year#18, d_date_sk#24] + +(147) Scan parquet spark_catalog.default.item +Output [1]: [i_item_sk#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(148) ColumnarToRow [codegen id : 38] +Input [1]: [i_item_sk#26] + +(149) Filter [codegen id : 38] +Input [1]: [i_item_sk#26] +Condition : isnotnull(i_item_sk#26) + +(150) BroadcastExchange +Input [1]: [i_item_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] + +(151) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(152) Project [codegen id : 39] +Output [7]: [cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#18 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#34] +Input [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#13, c_birth_year#18, i_item_sk#26] + +(153) HashAggregate [codegen id : 39] +Input [7]: [agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys: [] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#209, count#210, sum#211, count#212, sum#213, count#214, sum#215, count#216, sum#217, count#218, sum#219, count#220, sum#221, count#222] +Results [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] + +(154) Exchange +Input [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(155) HashAggregate [codegen id : 40] +Input [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] +Keys: [] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#237, avg(agg2#29)#238, avg(agg3#30)#239, avg(agg4#31)#240, avg(agg5#32)#241, avg(agg6#33)#242, avg(agg7#34)#243] +Results [11]: [null AS i_item_id#244, null AS ca_country#245, null AS ca_state#246, null AS county#247, avg(agg1#28)#237 AS agg1#248, avg(agg2#29)#238 AS agg2#249, avg(agg3#30)#239 AS agg3#250, avg(agg4#31)#240 AS agg4#251, avg(agg5#32)#241 AS agg5#252, avg(agg6#33)#242 AS agg6#253, avg(agg7#34)#243 AS agg7#254] + +(156) Union + +(157) TakeOrderedAndProject +Input [11]: [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#70, agg2#71, agg3#72, agg4#73, agg5#74, agg6#75, agg7#76] +Arguments: 100, [ca_country#23 ASC NULLS FIRST, ca_state#22 ASC NULLS FIRST, ca_county#21 ASC NULLS FIRST, i_item_id#27 ASC NULLS FIRST], [i_item_id#27, ca_country#23, ca_state#22, ca_county#21, agg1#70, agg2#71, agg3#72, agg4#73, agg5#74, agg6#75, agg7#76] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..fa297f1803 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.native_iceberg_compat/simplified.txt @@ -0,0 +1,227 @@ +TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + Union + WholeStageCodegen (8) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,ca_country,ca_state,ca_county] #1 + WholeStageCodegen (7) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,agg1,agg2,agg3,agg4,agg5,agg6,agg7] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,ca_country,ca_state,ca_county,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk,cd_dep_count] + Filter [cd_gender,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + WholeStageCodegen (16) + HashAggregate [i_item_id,ca_country,ca_state,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,ca_country,ca_state] #8 + WholeStageCodegen (15) + HashAggregate [i_item_id,ca_country,ca_state,agg1,agg2,agg3,agg4,agg5,agg6,agg7] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,ca_country,ca_state,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_state,ca_country] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_state,ca_country] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (12) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #7 + WholeStageCodegen (24) + HashAggregate [i_item_id,ca_country,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,ca_country] #10 + WholeStageCodegen (23) + HashAggregate [i_item_id,ca_country,agg1,agg2,agg3,agg4,agg5,agg6,agg7] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,ca_country,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_country] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year,ca_country] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (20) + Project [ca_address_sk,ca_country] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #7 + WholeStageCodegen (32) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id] #12 + WholeStageCodegen (31) + HashAggregate [i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (28) + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #7 + WholeStageCodegen (40) + HashAggregate [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),i_item_id,ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange #14 + WholeStageCodegen (39) + HashAggregate [agg1,agg2,agg3,agg4,agg5,agg6,agg7] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_birth_year] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk,cd_dep_count] #2 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] #3 + InputAdapter + ReusedExchange [cd_demo_sk] #4 + InputAdapter + ReusedExchange [ca_address_sk] #13 + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (38) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_datafusion/explain.txt new file mode 100644 index 0000000000..8d513788d8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_datafusion/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14] +Results [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16] + +(20) Exchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, _we0#17] + +(24) TakeOrderedAndProject +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_datafusion/simplified.txt new file mode 100644 index 0000000000..1e7108fcba --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_datafusion/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (6) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..8d513788d8 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_iceberg_compat/explain.txt @@ -0,0 +1,140 @@ +== Physical Plan == +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#3)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] +Condition : isnotnull(cs_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [cs_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14] +Results [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16] + +(20) Exchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, _we0#17] + +(24) TakeOrderedAndProject +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..1e7108fcba --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.native_iceberg_compat/simplified.txt @@ -0,0 +1,38 @@ +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (6) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_datafusion/explain.txt new file mode 100644 index 0000000000..174908acf7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_datafusion/explain.txt @@ -0,0 +1,151 @@ +== Physical Plan == +TakeOrderedAndProject (26) ++- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Expand (22) + +- * Project (21) + +- * BroadcastNestedLoopJoin Inner BuildRight (20) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.inventory (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.item (11) + +- BroadcastExchange (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.warehouse (17) + + +(1) Scan parquet spark_catalog.default.inventory +Output [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#3)] +PushedFilters: [IsNotNull(inv_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] +Condition : isnotnull(inv_item_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [inv_item_sk#1, inv_quantity_on_hand#2] +Input [4]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(13) Filter [codegen id : 2] +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Condition : isnotnull(i_item_sk#6) + +(14) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Input [7]: [inv_item_sk#1, inv_quantity_on_hand#2, i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(17) Scan parquet spark_catalog.default.warehouse +Output: [] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +ReadSchema: struct<> + +(18) ColumnarToRow [codegen id : 3] +Input: [] + +(19) BroadcastExchange +Input: [] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(20) BroadcastNestedLoopJoin [codegen id : 4] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] +Input [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(22) Expand [codegen id : 4] +Input [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] +Arguments: [[inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9, 0], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, null, 1], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, null, null, 3], [inv_quantity_on_hand#2, i_product_name#10, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] + +(23) HashAggregate [codegen id : 4] +Input [6]: [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Functions [1]: [partial_avg(inv_quantity_on_hand#2)] +Aggregate Attributes [2]: [sum#16, count#17] +Results [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] + +(24) Exchange +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Arguments: hashpartitioning(i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Functions [1]: [avg(inv_quantity_on_hand#2)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#20] +Results [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, avg(inv_quantity_on_hand#2)#20 AS qoh#21] + +(26) TakeOrderedAndProject +Input [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] +Arguments: 100, [qoh#21 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, i_class#13 ASC NULLS FIRST, i_category#14 ASC NULLS FIRST], [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_datafusion/simplified.txt new file mode 100644 index 0000000000..526ebfb48e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_datafusion/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] + WholeStageCodegen (5) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 + WholeStageCodegen (4) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count] + Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + Project [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + BroadcastNestedLoopJoin + Project [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Filter [inv_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..174908acf7 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_iceberg_compat/explain.txt @@ -0,0 +1,151 @@ +== Physical Plan == +TakeOrderedAndProject (26) ++- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Expand (22) + +- * Project (21) + +- * BroadcastNestedLoopJoin Inner BuildRight (20) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.inventory (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.item (11) + +- BroadcastExchange (19) + +- * ColumnarToRow (18) + +- Scan parquet spark_catalog.default.warehouse (17) + + +(1) Scan parquet spark_catalog.default.inventory +Output [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#3)] +PushedFilters: [IsNotNull(inv_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3] +Condition : isnotnull(inv_item_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [inv_item_sk#1, inv_quantity_on_hand#2] +Input [4]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(13) Filter [codegen id : 2] +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Condition : isnotnull(i_item_sk#6) + +(14) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Input [7]: [inv_item_sk#1, inv_quantity_on_hand#2, i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(17) Scan parquet spark_catalog.default.warehouse +Output: [] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +ReadSchema: struct<> + +(18) ColumnarToRow [codegen id : 3] +Input: [] + +(19) BroadcastExchange +Input: [] +Arguments: IdentityBroadcastMode, [plan_id=3] + +(20) BroadcastNestedLoopJoin [codegen id : 4] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] +Input [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] + +(22) Expand [codegen id : 4] +Input [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] +Arguments: [[inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9, 0], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, null, 1], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, null, null, 3], [inv_quantity_on_hand#2, i_product_name#10, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] + +(23) HashAggregate [codegen id : 4] +Input [6]: [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Functions [1]: [partial_avg(inv_quantity_on_hand#2)] +Aggregate Attributes [2]: [sum#16, count#17] +Results [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] + +(24) Exchange +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Arguments: hashpartitioning(i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Functions [1]: [avg(inv_quantity_on_hand#2)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#20] +Results [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, avg(inv_quantity_on_hand#2)#20 AS qoh#21] + +(26) TakeOrderedAndProject +Input [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] +Arguments: 100, [qoh#21 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, i_class#13 ASC NULLS FIRST, i_category#14 ASC NULLS FIRST], [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..526ebfb48e --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.native_iceberg_compat/simplified.txt @@ -0,0 +1,39 @@ +TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] + WholeStageCodegen (5) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 + WholeStageCodegen (4) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count] + Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + Project [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + BroadcastNestedLoopJoin + Project [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Filter [inv_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_datafusion/explain.txt new file mode 100644 index 0000000000..33ed9945bb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_datafusion/explain.txt @@ -0,0 +1,305 @@ +== Physical Plan == +TakeOrderedAndProject (49) ++- Union (48) + :- * HashAggregate (27) + : +- * HashAggregate (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.inventory (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.item (11) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.warehouse (17) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * HashAggregate (29) + : +- ReusedExchange (28) + :- * HashAggregate (37) + : +- Exchange (36) + : +- * HashAggregate (35) + : +- * HashAggregate (34) + : +- ReusedExchange (33) + :- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * HashAggregate (39) + : +- ReusedExchange (38) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * HashAggregate (44) + +- ReusedExchange (43) + + +(1) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(13) Filter [codegen id : 2] +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Condition : isnotnull(i_item_sk#7) + +(14) BroadcastExchange +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(17) Scan parquet spark_catalog.default.warehouse +Output [1]: [w_warehouse_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [1]: [w_warehouse_sk#12] + +(19) Filter [codegen id : 3] +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) + +(20) BroadcastExchange +Input [1]: [w_warehouse_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [sum#13, count#14] +Results [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] + +(24) Exchange +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, i_class#9, i_category#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, avg(inv_quantity_on_hand#3)#17 AS qoh#18] + +(26) HashAggregate [codegen id : 5] +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#18] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#19, count#20] +Results [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#21, count#22] + +(27) HashAggregate [codegen id : 5] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#21, count#22] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#23] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, avg(qoh#18)#23 AS qoh#24] + +(28) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] + +(29) HashAggregate [codegen id : 10] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [4]: [i_product_name#11, i_brand#8, i_class#9, avg(inv_quantity_on_hand#3)#17 AS qoh#18] + +(30) HashAggregate [codegen id : 10] +Input [4]: [i_product_name#11, i_brand#8, i_class#9, qoh#18] +Keys [3]: [i_product_name#11, i_brand#8, i_class#9] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#25, count#26] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, sum#27, count#28] + +(31) Exchange +Input [5]: [i_product_name#11, i_brand#8, i_class#9, sum#27, count#28] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 11] +Input [5]: [i_product_name#11, i_brand#8, i_class#9, sum#27, count#28] +Keys [3]: [i_product_name#11, i_brand#8, i_class#9] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#29] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, null AS i_category#30, avg(qoh#18)#29 AS qoh#31] + +(33) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] + +(34) HashAggregate [codegen id : 16] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [3]: [i_product_name#11, i_brand#8, avg(inv_quantity_on_hand#3)#17 AS qoh#18] + +(35) HashAggregate [codegen id : 16] +Input [3]: [i_product_name#11, i_brand#8, qoh#18] +Keys [2]: [i_product_name#11, i_brand#8] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#32, count#33] +Results [4]: [i_product_name#11, i_brand#8, sum#34, count#35] + +(36) Exchange +Input [4]: [i_product_name#11, i_brand#8, sum#34, count#35] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(37) HashAggregate [codegen id : 17] +Input [4]: [i_product_name#11, i_brand#8, sum#34, count#35] +Keys [2]: [i_product_name#11, i_brand#8] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#36] +Results [5]: [i_product_name#11, i_brand#8, null AS i_class#37, null AS i_category#38, avg(qoh#18)#36 AS qoh#39] + +(38) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] + +(39) HashAggregate [codegen id : 22] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [2]: [i_product_name#11, avg(inv_quantity_on_hand#3)#17 AS qoh#18] + +(40) HashAggregate [codegen id : 22] +Input [2]: [i_product_name#11, qoh#18] +Keys [1]: [i_product_name#11] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#40, count#41] +Results [3]: [i_product_name#11, sum#42, count#43] + +(41) Exchange +Input [3]: [i_product_name#11, sum#42, count#43] +Arguments: hashpartitioning(i_product_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(42) HashAggregate [codegen id : 23] +Input [3]: [i_product_name#11, sum#42, count#43] +Keys [1]: [i_product_name#11] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#44] +Results [5]: [i_product_name#11, null AS i_brand#45, null AS i_class#46, null AS i_category#47, avg(qoh#18)#44 AS qoh#48] + +(43) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] + +(44) HashAggregate [codegen id : 28] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [1]: [avg(inv_quantity_on_hand#3)#17 AS qoh#18] + +(45) HashAggregate [codegen id : 28] +Input [1]: [qoh#18] +Keys: [] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#49, count#50] +Results [2]: [sum#51, count#52] + +(46) Exchange +Input [2]: [sum#51, count#52] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(47) HashAggregate [codegen id : 29] +Input [2]: [sum#51, count#52] +Keys: [] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#53] +Results [5]: [null AS i_product_name#54, null AS i_brand#55, null AS i_class#56, null AS i_category#57, avg(qoh#18)#53 AS qoh#58] + +(48) Union + +(49) TakeOrderedAndProject +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#24] +Arguments: 100, [qoh#24 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#8 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..04c8e759d1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_datafusion/simplified.txt @@ -0,0 +1,78 @@ +TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] + Union + WholeStageCodegen (5) + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(qoh),qoh,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,qoh] [sum,count,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class,i_category] #1 + WholeStageCodegen (4) + HashAggregate [i_product_name,i_brand,i_class,i_category,inv_quantity_on_hand] [sum,count,sum,count] + Project [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk] + WholeStageCodegen (11) + HashAggregate [i_product_name,i_brand,i_class,sum,count] [avg(qoh),i_category,qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class] #5 + WholeStageCodegen (10) + HashAggregate [i_product_name,i_brand,i_class,qoh] [sum,count,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + WholeStageCodegen (17) + HashAggregate [i_product_name,i_brand,sum,count] [avg(qoh),i_class,i_category,qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand] #6 + WholeStageCodegen (16) + HashAggregate [i_product_name,i_brand,qoh] [sum,count,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + WholeStageCodegen (23) + HashAggregate [i_product_name,sum,count] [avg(qoh),i_brand,i_class,i_category,qoh,sum,count] + InputAdapter + Exchange [i_product_name] #7 + WholeStageCodegen (22) + HashAggregate [i_product_name,qoh] [sum,count,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + WholeStageCodegen (29) + HashAggregate [sum,count] [avg(qoh),i_product_name,i_brand,i_class,i_category,qoh,sum,count] + InputAdapter + Exchange #8 + WholeStageCodegen (28) + HashAggregate [qoh] [sum,count,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..33ed9945bb --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_iceberg_compat/explain.txt @@ -0,0 +1,305 @@ +== Physical Plan == +TakeOrderedAndProject (49) ++- Union (48) + :- * HashAggregate (27) + : +- * HashAggregate (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.inventory (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.item (11) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.warehouse (17) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * HashAggregate (29) + : +- ReusedExchange (28) + :- * HashAggregate (37) + : +- Exchange (36) + : +- * HashAggregate (35) + : +- * HashAggregate (34) + : +- ReusedExchange (33) + :- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * HashAggregate (39) + : +- ReusedExchange (38) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * HashAggregate (44) + +- ReusedExchange (43) + + +(1) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#4)] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4] +Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3] +Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(13) Filter [codegen id : 2] +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Condition : isnotnull(i_item_sk#7) + +(14) BroadcastExchange +Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#1] +Right keys [1]: [i_item_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] + +(17) Scan parquet spark_catalog.default.warehouse +Output [1]: [w_warehouse_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [1]: [w_warehouse_sk#12] + +(19) Filter [codegen id : 3] +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) + +(20) BroadcastExchange +Input [1]: [w_warehouse_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [partial_avg(inv_quantity_on_hand#3)] +Aggregate Attributes [2]: [sum#13, count#14] +Results [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] + +(24) Exchange +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, i_class#9, i_category#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, avg(inv_quantity_on_hand#3)#17 AS qoh#18] + +(26) HashAggregate [codegen id : 5] +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#18] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#19, count#20] +Results [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#21, count#22] + +(27) HashAggregate [codegen id : 5] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#21, count#22] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#23] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, avg(qoh#18)#23 AS qoh#24] + +(28) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] + +(29) HashAggregate [codegen id : 10] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [4]: [i_product_name#11, i_brand#8, i_class#9, avg(inv_quantity_on_hand#3)#17 AS qoh#18] + +(30) HashAggregate [codegen id : 10] +Input [4]: [i_product_name#11, i_brand#8, i_class#9, qoh#18] +Keys [3]: [i_product_name#11, i_brand#8, i_class#9] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#25, count#26] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, sum#27, count#28] + +(31) Exchange +Input [5]: [i_product_name#11, i_brand#8, i_class#9, sum#27, count#28] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 11] +Input [5]: [i_product_name#11, i_brand#8, i_class#9, sum#27, count#28] +Keys [3]: [i_product_name#11, i_brand#8, i_class#9] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#29] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, null AS i_category#30, avg(qoh#18)#29 AS qoh#31] + +(33) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] + +(34) HashAggregate [codegen id : 16] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [3]: [i_product_name#11, i_brand#8, avg(inv_quantity_on_hand#3)#17 AS qoh#18] + +(35) HashAggregate [codegen id : 16] +Input [3]: [i_product_name#11, i_brand#8, qoh#18] +Keys [2]: [i_product_name#11, i_brand#8] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#32, count#33] +Results [4]: [i_product_name#11, i_brand#8, sum#34, count#35] + +(36) Exchange +Input [4]: [i_product_name#11, i_brand#8, sum#34, count#35] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(37) HashAggregate [codegen id : 17] +Input [4]: [i_product_name#11, i_brand#8, sum#34, count#35] +Keys [2]: [i_product_name#11, i_brand#8] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#36] +Results [5]: [i_product_name#11, i_brand#8, null AS i_class#37, null AS i_category#38, avg(qoh#18)#36 AS qoh#39] + +(38) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] + +(39) HashAggregate [codegen id : 22] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [2]: [i_product_name#11, avg(inv_quantity_on_hand#3)#17 AS qoh#18] + +(40) HashAggregate [codegen id : 22] +Input [2]: [i_product_name#11, qoh#18] +Keys [1]: [i_product_name#11] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#40, count#41] +Results [3]: [i_product_name#11, sum#42, count#43] + +(41) Exchange +Input [3]: [i_product_name#11, sum#42, count#43] +Arguments: hashpartitioning(i_product_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(42) HashAggregate [codegen id : 23] +Input [3]: [i_product_name#11, sum#42, count#43] +Keys [1]: [i_product_name#11] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#44] +Results [5]: [i_product_name#11, null AS i_brand#45, null AS i_class#46, null AS i_category#47, avg(qoh#18)#44 AS qoh#48] + +(43) ReusedExchange [Reuses operator id: 24] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] + +(44) HashAggregate [codegen id : 28] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] +Functions [1]: [avg(inv_quantity_on_hand#3)] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [1]: [avg(inv_quantity_on_hand#3)#17 AS qoh#18] + +(45) HashAggregate [codegen id : 28] +Input [1]: [qoh#18] +Keys: [] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#49, count#50] +Results [2]: [sum#51, count#52] + +(46) Exchange +Input [2]: [sum#51, count#52] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(47) HashAggregate [codegen id : 29] +Input [2]: [sum#51, count#52] +Keys: [] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#53] +Results [5]: [null AS i_product_name#54, null AS i_brand#55, null AS i_class#56, null AS i_category#57, avg(qoh#18)#53 AS qoh#58] + +(48) Union + +(49) TakeOrderedAndProject +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#24] +Arguments: 100, [qoh#24 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#8 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#24] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..04c8e759d1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.native_iceberg_compat/simplified.txt @@ -0,0 +1,78 @@ +TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] + Union + WholeStageCodegen (5) + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(qoh),qoh,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,qoh] [sum,count,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class,i_category] #1 + WholeStageCodegen (4) + HashAggregate [i_product_name,i_brand,i_class,i_category,inv_quantity_on_hand] [sum,count,sum,count] + Project [inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Filter [inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk] + WholeStageCodegen (11) + HashAggregate [i_product_name,i_brand,i_class,sum,count] [avg(qoh),i_category,qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand,i_class] #5 + WholeStageCodegen (10) + HashAggregate [i_product_name,i_brand,i_class,qoh] [sum,count,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + WholeStageCodegen (17) + HashAggregate [i_product_name,i_brand,sum,count] [avg(qoh),i_class,i_category,qoh,sum,count] + InputAdapter + Exchange [i_product_name,i_brand] #6 + WholeStageCodegen (16) + HashAggregate [i_product_name,i_brand,qoh] [sum,count,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + WholeStageCodegen (23) + HashAggregate [i_product_name,sum,count] [avg(qoh),i_brand,i_class,i_category,qoh,sum,count] + InputAdapter + Exchange [i_product_name] #7 + WholeStageCodegen (22) + HashAggregate [i_product_name,qoh] [sum,count,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 + WholeStageCodegen (29) + HashAggregate [sum,count] [avg(qoh),i_product_name,i_brand,i_class,i_category,qoh,sum,count] + InputAdapter + Exchange #8 + WholeStageCodegen (28) + HashAggregate [qoh] [sum,count,sum,count] + HashAggregate [i_product_name,i_brand,i_class,i_category,sum,count] [avg(inv_quantity_on_hand),qoh,sum,count] + InputAdapter + ReusedExchange [i_product_name,i_brand,i_class,i_category,sum,count] #1 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_datafusion/explain.txt new file mode 100644 index 0000000000..a0e507ac1c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_datafusion/explain.txt @@ -0,0 +1,437 @@ +== Physical Plan == +* Sort (48) ++- Exchange (47) + +- * Filter (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Project (14) + : : : : +- * SortMergeJoin Inner (13) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- * Sort (12) + : : : : +- Exchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet spark_catalog.default.store_returns (7) + : : : +- BroadcastExchange (19) + : : : +- * Project (18) + : : : +- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet spark_catalog.default.store (15) + : : +- BroadcastExchange (25) + : : +- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet spark_catalog.default.item (22) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet spark_catalog.default.customer (28) + +- BroadcastExchange (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.customer_address (34) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Project [codegen id : 1] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(5) Exchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(9) Filter [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(10) Project [codegen id : 3] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(11) Exchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(13) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(14) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(15) Scan parquet spark_catalog.default.store +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(17) Filter [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(18) Project [codegen id : 5] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(19) BroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(22) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) + +(25) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(28) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(30) Filter [codegen id : 7] +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : ((isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) AND isnotnull(c_birth_country#25)) + +(31) BroadcastExchange +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [13]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(34) Scan parquet spark_catalog.default.customer_address +Output [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(36) Filter [codegen id : 8] +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Condition : ((isnotnull(ca_address_sk#26) AND isnotnull(ca_country#29)) AND isnotnull(ca_zip#28)) + +(37) BroadcastExchange +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(input[0, int, false], upper(input[3, string, false]), input[2, string, false]),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [3]: [c_current_addr_sk#22, c_birth_country#25, s_zip#14] +Right keys [3]: [ca_address_sk#26, upper(ca_country#29), ca_zip#28] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [17]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25, ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(40) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#30] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] + +(41) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(42) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#32] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#32,17,2) AS netpaid#33] + +(43) HashAggregate [codegen id : 10] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, netpaid#33] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#11] +Functions [1]: [partial_sum(netpaid#33)] +Aggregate Attributes [2]: [sum#34, isEmpty#35] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] + +(44) Exchange +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(45) HashAggregate [codegen id : 11] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#11] +Functions [1]: [sum(netpaid#33)] +Aggregate Attributes [1]: [sum(netpaid#33)#38] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#11, sum(netpaid#33)#38 AS paid#39] + +(46) Filter [codegen id : 11] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Condition : (isnotnull(paid#39) AND (cast(paid#39 as decimal(33,8)) > cast(Subquery scalar-subquery#40, [id=#41] as decimal(33,8)))) + +(47) Exchange +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Arguments: rangepartitioning(c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(48) Sort [codegen id : 12] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Arguments: [c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], true, 0 + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#40, [id=#41] +* HashAggregate (75) ++- Exchange (74) + +- * HashAggregate (73) + +- * HashAggregate (72) + +- Exchange (71) + +- * HashAggregate (70) + +- * Project (69) + +- * BroadcastHashJoin Inner BuildRight (68) + :- * Project (66) + : +- * BroadcastHashJoin Inner BuildRight (65) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (57) + : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : :- * Project (54) + : : : : +- * SortMergeJoin Inner (53) + : : : : :- * Sort (50) + : : : : : +- ReusedExchange (49) + : : : : +- * Sort (52) + : : : : +- ReusedExchange (51) + : : : +- ReusedExchange (55) + : : +- BroadcastExchange (61) + : : +- * Filter (60) + : : +- * ColumnarToRow (59) + : : +- Scan parquet spark_catalog.default.item (58) + : +- ReusedExchange (64) + +- ReusedExchange (67) + + +(49) ReusedExchange [Reuses operator id: 5] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(50) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(51) ReusedExchange [Reuses operator id: 11] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] + +(52) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(54) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(55) ReusedExchange [Reuses operator id: 19] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(56) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(57) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(58) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(60) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(61) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(62) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(64) ReusedExchange [Reuses operator id: 31] +Output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(65) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 9] +Output [13]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(67) ReusedExchange [Reuses operator id: 37] +Output [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(68) BroadcastHashJoin [codegen id : 9] +Left keys [3]: [c_current_addr_sk#22, c_birth_country#25, s_zip#14] +Right keys [3]: [ca_address_sk#26, upper(ca_country#29), ca_zip#28] +Join type: Inner +Join condition: None + +(69) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [17]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25, ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(70) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#42] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#43] + +(71) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#43] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(72) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#43] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#32] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#32,17,2) AS netpaid#33] + +(73) HashAggregate [codegen id : 10] +Input [1]: [netpaid#33] +Keys: [] +Functions [1]: [partial_avg(netpaid#33)] +Aggregate Attributes [2]: [sum#44, count#45] +Results [2]: [sum#46, count#47] + +(74) Exchange +Input [2]: [sum#46, count#47] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] + +(75) HashAggregate [codegen id : 11] +Input [2]: [sum#46, count#47] +Keys: [] +Functions [1]: [avg(netpaid#33)] +Aggregate Attributes [1]: [avg(netpaid#33)#48] +Results [1]: [(0.05 * avg(netpaid#33)#48) AS (0.05 * avg(netpaid))#49] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_datafusion/simplified.txt new file mode 100644 index 0000000000..104ec43154 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_datafusion/simplified.txt @@ -0,0 +1,122 @@ +WholeStageCodegen (12) + Sort [c_last_name,c_first_name,s_store_name] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (11) + Filter [paid] + Subquery #1 + WholeStageCodegen (11) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #10 + WholeStageCodegen (10) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #11 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_current_addr_sk,c_birth_country,s_zip,ca_address_sk,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #4 + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] #5 + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #6 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + ReusedExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] #8 + InputAdapter + ReusedExchange [ca_address_sk,ca_state,ca_zip,ca_country] #9 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #2 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #3 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_current_addr_sk,c_birth_country,s_zip,ca_address_sk,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #4 + WholeStageCodegen (1) + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #5 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [c_customer_sk,c_current_addr_sk,c_birth_country] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ca_address_sk,ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..a0e507ac1c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_iceberg_compat/explain.txt @@ -0,0 +1,437 @@ +== Physical Plan == +* Sort (48) ++- Exchange (47) + +- * Filter (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (21) + : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : :- * Project (14) + : : : : +- * SortMergeJoin Inner (13) + : : : : :- * Sort (6) + : : : : : +- Exchange (5) + : : : : : +- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- * Sort (12) + : : : : +- Exchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet spark_catalog.default.store_returns (7) + : : : +- BroadcastExchange (19) + : : : +- * Project (18) + : : : +- * Filter (17) + : : : +- * ColumnarToRow (16) + : : : +- Scan parquet spark_catalog.default.store (15) + : : +- BroadcastExchange (25) + : : +- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet spark_catalog.default.item (22) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet spark_catalog.default.customer (28) + +- BroadcastExchange (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet spark_catalog.default.customer_address (34) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(3) Filter [codegen id : 1] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Project [codegen id : 1] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] + +(5) Exchange +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(6) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(7) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(9) Filter [codegen id : 3] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) + +(10) Project [codegen id : 3] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] + +(11) Exchange +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(13) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(14) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(15) Scan parquet spark_catalog.default.store +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(17) Filter [codegen id : 5] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) + +(18) Project [codegen id : 5] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] + +(19) BroadcastExchange +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(22) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale ), IsNotNull(i_item_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(24) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) + +(25) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(26) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(27) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(28) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(30) Filter [codegen id : 7] +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : ((isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) AND isnotnull(c_birth_country#25)) + +(31) BroadcastExchange +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [13]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(34) Scan parquet spark_catalog.default.customer_address +Output [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(36) Filter [codegen id : 8] +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Condition : ((isnotnull(ca_address_sk#26) AND isnotnull(ca_country#29)) AND isnotnull(ca_zip#28)) + +(37) BroadcastExchange +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(input[0, int, false], upper(input[3, string, false]), input[2, string, false]),false), [plan_id=6] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [3]: [c_current_addr_sk#22, c_birth_country#25, s_zip#14] +Right keys [3]: [ca_address_sk#26, upper(ca_country#29), ca_zip#28] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [17]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25, ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(40) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#30] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] + +(41) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(42) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#32] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#32,17,2) AS netpaid#33] + +(43) HashAggregate [codegen id : 10] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, netpaid#33] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#11] +Functions [1]: [partial_sum(netpaid#33)] +Aggregate Attributes [2]: [sum#34, isEmpty#35] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] + +(44) Exchange +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(45) HashAggregate [codegen id : 11] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#11] +Functions [1]: [sum(netpaid#33)] +Aggregate Attributes [1]: [sum(netpaid#33)#38] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#11, sum(netpaid#33)#38 AS paid#39] + +(46) Filter [codegen id : 11] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Condition : (isnotnull(paid#39) AND (cast(paid#39 as decimal(33,8)) > cast(Subquery scalar-subquery#40, [id=#41] as decimal(33,8)))) + +(47) Exchange +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Arguments: rangepartitioning(c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(48) Sort [codegen id : 12] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Arguments: [c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], true, 0 + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#40, [id=#41] +* HashAggregate (75) ++- Exchange (74) + +- * HashAggregate (73) + +- * HashAggregate (72) + +- Exchange (71) + +- * HashAggregate (70) + +- * Project (69) + +- * BroadcastHashJoin Inner BuildRight (68) + :- * Project (66) + : +- * BroadcastHashJoin Inner BuildRight (65) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (57) + : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : :- * Project (54) + : : : : +- * SortMergeJoin Inner (53) + : : : : :- * Sort (50) + : : : : : +- ReusedExchange (49) + : : : : +- * Sort (52) + : : : : +- ReusedExchange (51) + : : : +- ReusedExchange (55) + : : +- BroadcastExchange (61) + : : +- * Filter (60) + : : +- * ColumnarToRow (59) + : : +- Scan parquet spark_catalog.default.item (58) + : +- ReusedExchange (64) + +- ReusedExchange (67) + + +(49) ReusedExchange [Reuses operator id: 5] +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(50) Sort [codegen id : 2] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(51) ReusedExchange [Reuses operator id: 11] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] + +(52) Sort [codegen id : 4] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] +Join type: Inner +Join condition: None + +(54) Project [codegen id : 9] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] + +(55) ReusedExchange [Reuses operator id: 19] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(56) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#10] +Join type: Inner +Join condition: None + +(57) Project [codegen id : 9] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] + +(58) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(60) Filter [codegen id : 6] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(61) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(62) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 9] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(64) ReusedExchange [Reuses operator id: 31] +Output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(65) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#21] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 9] +Output [13]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(67) ReusedExchange [Reuses operator id: 37] +Output [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(68) BroadcastHashJoin [codegen id : 9] +Left keys [3]: [c_current_addr_sk#22, c_birth_country#25, s_zip#14] +Right keys [3]: [ca_address_sk#26, upper(ca_country#29), ca_zip#28] +Join type: Inner +Join condition: None + +(69) Project [codegen id : 9] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [17]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25, ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] + +(70) HashAggregate [codegen id : 9] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#42] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#43] + +(71) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#43] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(72) HashAggregate [codegen id : 10] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#43] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#32] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#32,17,2) AS netpaid#33] + +(73) HashAggregate [codegen id : 10] +Input [1]: [netpaid#33] +Keys: [] +Functions [1]: [partial_avg(netpaid#33)] +Aggregate Attributes [2]: [sum#44, count#45] +Results [2]: [sum#46, count#47] + +(74) Exchange +Input [2]: [sum#46, count#47] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] + +(75) HashAggregate [codegen id : 11] +Input [2]: [sum#46, count#47] +Keys: [] +Functions [1]: [avg(netpaid#33)] +Aggregate Attributes [1]: [avg(netpaid#33)#48] +Results [1]: [(0.05 * avg(netpaid#33)#48) AS (0.05 * avg(netpaid))#49] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..104ec43154 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.native_iceberg_compat/simplified.txt @@ -0,0 +1,122 @@ +WholeStageCodegen (12) + Sort [c_last_name,c_first_name,s_store_name] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (11) + Filter [paid] + Subquery #1 + WholeStageCodegen (11) + HashAggregate [sum,count] [avg(netpaid),(0.05 * avg(netpaid)),sum,count] + InputAdapter + Exchange #10 + WholeStageCodegen (10) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #11 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_current_addr_sk,c_birth_country,s_zip,ca_address_sk,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] #4 + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] #5 + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_state,s_zip] #6 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + ReusedExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] #8 + InputAdapter + ReusedExchange [ca_address_sk,ca_state,ca_zip,ca_country] #9 + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #2 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #3 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_current_addr_sk,c_birth_country,s_zip,ca_address_sk,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #4 + WholeStageCodegen (1) + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #5 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Filter [c_customer_sk,c_current_addr_sk,c_birth_country] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ca_address_sk,ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_zip,ca_country] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_datafusion/explain.txt new file mode 100644 index 0000000000..25c5db89ce --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_datafusion/explain.txt @@ -0,0 +1,443 @@ +== Physical Plan == +TakeOrderedAndProject (77) ++- Union (76) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.date_dim (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet spark_catalog.default.store (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet spark_catalog.default.item (24) + :- * HashAggregate (54) + : +- Exchange (53) + : +- * HashAggregate (52) + : +- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Project (48) + : : +- * BroadcastHashJoin Inner BuildRight (47) + : : :- * Project (41) + : : : +- * BroadcastHashJoin Inner BuildRight (40) + : : : :- * Project (38) + : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : :- * Filter (35) + : : : : : +- * ColumnarToRow (34) + : : : : : +- Scan parquet spark_catalog.default.store_sales (33) + : : : : +- ReusedExchange (36) + : : : +- ReusedExchange (39) + : : +- BroadcastExchange (46) + : : +- * Project (45) + : : +- * Filter (44) + : : +- * ColumnarToRow (43) + : : +- Scan parquet spark_catalog.default.store (42) + : +- ReusedExchange (49) + +- * HashAggregate (75) + +- Exchange (74) + +- * HashAggregate (73) + +- * Project (72) + +- * BroadcastHashJoin Inner BuildRight (71) + :- * Project (66) + : +- * BroadcastHashJoin Inner BuildRight (65) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (60) + : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : :- * Filter (57) + : : : : +- * ColumnarToRow (56) + : : : : +- Scan parquet spark_catalog.default.store_sales (55) + : : : +- ReusedExchange (58) + : : +- ReusedExchange (61) + : +- ReusedExchange (64) + +- BroadcastExchange (70) + +- * Filter (69) + +- * ColumnarToRow (68) + +- Scan parquet spark_catalog.default.item (67) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_marital_status,W), EqualTo(cd_education_status,Primary ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = F)) AND (cd_marital_status#11 = W)) AND (cd_education_status#12 = Primary )) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 1998)) AND isnotnull(d_date_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] + +(18) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#15, s_state#16] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#15, s_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] + +(24) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(27) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [i_item_id#18, s_state#16, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] + +(30) HashAggregate [codegen id : 5] +Input [6]: [i_item_id#18, s_state#16, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] +Results [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] + +(31) Exchange +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Arguments: hashpartitioning(i_item_id#18, s_state#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#39, avg(UnscaledValue(agg2#20))#40, avg(UnscaledValue(agg3#21))#41, avg(UnscaledValue(agg4#22))#42] +Results [7]: [i_item_id#18, s_state#16, 0 AS g_state#43, avg(agg1#19)#39 AS agg1#44, cast((avg(UnscaledValue(agg2#20))#40 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(agg3#21))#41 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(agg4#22))#42 / 100.0) as decimal(11,6)) AS agg4#47] + +(33) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(35) Filter [codegen id : 11] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [cd_demo_sk#9] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#13] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 11] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] + +(42) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 9] +Input [2]: [s_store_sk#15, s_state#16] + +(44) Filter [codegen id : 9] +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(45) Project [codegen id : 9] +Output [1]: [s_store_sk#15] +Input [2]: [s_store_sk#15, s_state#16] + +(46) BroadcastExchange +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(47) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 11] +Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15] + +(49) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#17, i_item_id#18] + +(50) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 11] +Output [5]: [i_item_id#18, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17, i_item_id#18] + +(52) HashAggregate [codegen id : 11] +Input [5]: [i_item_id#18, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#48, count#49, sum#50, count#51, sum#52, count#53, sum#54, count#55] +Results [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] + +(53) Exchange +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Arguments: hashpartitioning(i_item_id#18, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(54) HashAggregate [codegen id : 12] +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#64, avg(UnscaledValue(agg2#20))#65, avg(UnscaledValue(agg3#21))#66, avg(UnscaledValue(agg4#22))#67] +Results [7]: [i_item_id#18, null AS s_state#68, 1 AS g_state#69, avg(agg1#19)#64 AS agg1#70, cast((avg(UnscaledValue(agg2#20))#65 / 100.0) as decimal(11,6)) AS agg2#71, cast((avg(UnscaledValue(agg3#21))#66 / 100.0) as decimal(11,6)) AS agg3#72, cast((avg(UnscaledValue(agg4#22))#67 / 100.0) as decimal(11,6)) AS agg4#73] + +(55) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 17] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(57) Filter [codegen id : 17] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(58) ReusedExchange [Reuses operator id: 8] +Output [1]: [cd_demo_sk#9] + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(60) Project [codegen id : 17] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] + +(61) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#13] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 17] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] + +(64) ReusedExchange [Reuses operator id: 46] +Output [1]: [s_store_sk#15] + +(65) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 17] +Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15] + +(67) Scan parquet spark_catalog.default.item +Output [1]: [i_item_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 16] +Input [1]: [i_item_sk#17] + +(69) Filter [codegen id : 16] +Input [1]: [i_item_sk#17] +Condition : isnotnull(i_item_sk#17) + +(70) BroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(71) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(72) Project [codegen id : 17] +Output [4]: [ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17] + +(73) HashAggregate [codegen id : 17] +Input [4]: [agg1#19, agg2#20, agg3#21, agg4#22] +Keys: [] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#74, count#75, sum#76, count#77, sum#78, count#79, sum#80, count#81] +Results [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] + +(74) Exchange +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] + +(75) HashAggregate [codegen id : 18] +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Keys: [] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#90, avg(UnscaledValue(agg2#20))#91, avg(UnscaledValue(agg3#21))#92, avg(UnscaledValue(agg4#22))#93] +Results [7]: [null AS i_item_id#94, null AS s_state#95, 1 AS g_state#96, avg(agg1#19)#90 AS agg1#97, cast((avg(UnscaledValue(agg2#20))#91 / 100.0) as decimal(11,6)) AS agg2#98, cast((avg(UnscaledValue(agg3#21))#92 / 100.0) as decimal(11,6)) AS agg3#99, cast((avg(UnscaledValue(agg4#22))#93 / 100.0) as decimal(11,6)) AS agg4#100] + +(76) Union + +(77) TakeOrderedAndProject +Input [7]: [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST, s_state#16 ASC NULLS FIRST], [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..fd5fca8f6a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_datafusion/simplified.txt @@ -0,0 +1,113 @@ +TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + Union + WholeStageCodegen (6) + HashAggregate [i_item_id,s_state,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,s_state] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,s_state,agg1,agg2,agg3,agg4] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,s_state,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen (11) + HashAggregate [i_item_id,agg1,agg2,agg3,agg4] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 + WholeStageCodegen (18) + HashAggregate [sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),i_item_id,s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange #8 + WholeStageCodegen (17) + HashAggregate [agg1,agg2,agg3,agg4] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [s_store_sk] #7 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (16) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..25c5db89ce --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_iceberg_compat/explain.txt @@ -0,0 +1,443 @@ +== Physical Plan == +TakeOrderedAndProject (77) ++- Union (76) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.customer_demographics (4) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.date_dim (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet spark_catalog.default.store (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet spark_catalog.default.item (24) + :- * HashAggregate (54) + : +- Exchange (53) + : +- * HashAggregate (52) + : +- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Project (48) + : : +- * BroadcastHashJoin Inner BuildRight (47) + : : :- * Project (41) + : : : +- * BroadcastHashJoin Inner BuildRight (40) + : : : :- * Project (38) + : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : :- * Filter (35) + : : : : : +- * ColumnarToRow (34) + : : : : : +- Scan parquet spark_catalog.default.store_sales (33) + : : : : +- ReusedExchange (36) + : : : +- ReusedExchange (39) + : : +- BroadcastExchange (46) + : : +- * Project (45) + : : +- * Filter (44) + : : +- * ColumnarToRow (43) + : : +- Scan parquet spark_catalog.default.store (42) + : +- ReusedExchange (49) + +- * HashAggregate (75) + +- Exchange (74) + +- * HashAggregate (73) + +- * Project (72) + +- * BroadcastHashJoin Inner BuildRight (71) + :- * Project (66) + : +- * BroadcastHashJoin Inner BuildRight (65) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- * Project (60) + : : : +- * BroadcastHashJoin Inner BuildRight (59) + : : : :- * Filter (57) + : : : : +- * ColumnarToRow (56) + : : : : +- Scan parquet spark_catalog.default.store_sales (55) + : : : +- ReusedExchange (58) + : : +- ReusedExchange (61) + : +- ReusedExchange (64) + +- BroadcastExchange (70) + +- * Filter (69) + +- * ColumnarToRow (68) + +- Scan parquet spark_catalog.default.item (67) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_marital_status,W), EqualTo(cd_education_status,Primary ), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = F)) AND (cd_marital_status#11 = W)) AND (cd_education_status#12 = Primary )) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 1998)) AND isnotnull(d_date_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] + +(18) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#15, s_state#16] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#15, s_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] + +(24) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(27) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [i_item_id#18, s_state#16, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] + +(30) HashAggregate [codegen id : 5] +Input [6]: [i_item_id#18, s_state#16, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] +Results [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] + +(31) Exchange +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Arguments: hashpartitioning(i_item_id#18, s_state#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(32) HashAggregate [codegen id : 6] +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#39, avg(UnscaledValue(agg2#20))#40, avg(UnscaledValue(agg3#21))#41, avg(UnscaledValue(agg4#22))#42] +Results [7]: [i_item_id#18, s_state#16, 0 AS g_state#43, avg(agg1#19)#39 AS agg1#44, cast((avg(UnscaledValue(agg2#20))#40 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(agg3#21))#41 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(agg4#22))#42 / 100.0) as decimal(11,6)) AS agg4#47] + +(33) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(35) Filter [codegen id : 11] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [cd_demo_sk#9] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 11] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#13] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 11] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] + +(42) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_state#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 9] +Input [2]: [s_store_sk#15, s_state#16] + +(44) Filter [codegen id : 9] +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) + +(45) Project [codegen id : 9] +Output [1]: [s_store_sk#15] +Input [2]: [s_store_sk#15, s_state#16] + +(46) BroadcastExchange +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(47) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 11] +Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15] + +(49) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#17, i_item_id#18] + +(50) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 11] +Output [5]: [i_item_id#18, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17, i_item_id#18] + +(52) HashAggregate [codegen id : 11] +Input [5]: [i_item_id#18, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#48, count#49, sum#50, count#51, sum#52, count#53, sum#54, count#55] +Results [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] + +(53) Exchange +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Arguments: hashpartitioning(i_item_id#18, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(54) HashAggregate [codegen id : 12] +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#64, avg(UnscaledValue(agg2#20))#65, avg(UnscaledValue(agg3#21))#66, avg(UnscaledValue(agg4#22))#67] +Results [7]: [i_item_id#18, null AS s_state#68, 1 AS g_state#69, avg(agg1#19)#64 AS agg1#70, cast((avg(UnscaledValue(agg2#20))#65 / 100.0) as decimal(11,6)) AS agg2#71, cast((avg(UnscaledValue(agg3#21))#66 / 100.0) as decimal(11,6)) AS agg3#72, cast((avg(UnscaledValue(agg4#22))#67 / 100.0) as decimal(11,6)) AS agg4#73] + +(55) Scan parquet spark_catalog.default.store_sales +Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#8)] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 17] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] + +(57) Filter [codegen id : 17] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1)) + +(58) ReusedExchange [Reuses operator id: 8] +Output [1]: [cd_demo_sk#9] + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join type: Inner +Join condition: None + +(60) Project [codegen id : 17] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] +Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#9] + +(61) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#13] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 17] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#13] + +(64) ReusedExchange [Reuses operator id: 46] +Output [1]: [s_store_sk#15] + +(65) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 17] +Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15] + +(67) Scan parquet spark_catalog.default.item +Output [1]: [i_item_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 16] +Input [1]: [i_item_sk#17] + +(69) Filter [codegen id : 16] +Input [1]: [i_item_sk#17] +Condition : isnotnull(i_item_sk#17) + +(70) BroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(71) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(72) Project [codegen id : 17] +Output [4]: [ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17] + +(73) HashAggregate [codegen id : 17] +Input [4]: [agg1#19, agg2#20, agg3#21, agg4#22] +Keys: [] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#74, count#75, sum#76, count#77, sum#78, count#79, sum#80, count#81] +Results [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] + +(74) Exchange +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] + +(75) HashAggregate [codegen id : 18] +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Keys: [] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#90, avg(UnscaledValue(agg2#20))#91, avg(UnscaledValue(agg3#21))#92, avg(UnscaledValue(agg4#22))#93] +Results [7]: [null AS i_item_id#94, null AS s_state#95, 1 AS g_state#96, avg(agg1#19)#90 AS agg1#97, cast((avg(UnscaledValue(agg2#20))#91 / 100.0) as decimal(11,6)) AS agg2#98, cast((avg(UnscaledValue(agg3#21))#92 / 100.0) as decimal(11,6)) AS agg3#99, cast((avg(UnscaledValue(agg4#22))#93 / 100.0) as decimal(11,6)) AS agg4#100] + +(76) Union + +(77) TakeOrderedAndProject +Input [7]: [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST, s_state#16 ASC NULLS FIRST], [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..fd5fca8f6a --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.native_iceberg_compat/simplified.txt @@ -0,0 +1,113 @@ +TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + Union + WholeStageCodegen (6) + HashAggregate [i_item_id,s_state,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id,s_state] #1 + WholeStageCodegen (5) + HashAggregate [i_item_id,s_state,agg1,agg2,agg3,agg4] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,s_state,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen (11) + HashAggregate [i_item_id,agg1,agg2,agg3,agg4] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [i_item_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] #5 + WholeStageCodegen (18) + HashAggregate [sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),i_item_id,s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] + InputAdapter + Exchange #8 + WholeStageCodegen (17) + HashAggregate [agg1,agg2,agg3,agg4] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk] + InputAdapter + ReusedExchange [cd_demo_sk] #2 + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + ReusedExchange [s_store_sk] #7 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (16) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_datafusion/explain.txt new file mode 100644 index 0000000000..d0eba50a09 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_datafusion/explain.txt @@ -0,0 +1,208 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet spark_catalog.default.customer (29) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#9, s_county#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] +Condition : ((isnotnull(s_county#10) AND (s_county#10 = Williamson County)) AND isnotnull(s_store_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#9] +Input [2]: [s_store_sk#9, s_county#10] + +(15) BroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.2) END) AND isnotnull(hd_demo_sk#11)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#11] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#1, ss_ticket_number#4] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] + +(26) Exchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#17 AS cnt#18] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18] +Condition : ((cnt#18 >= 15) AND (cnt#18 <= 20)) + +(29) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Condition : isnotnull(c_customer_sk#19) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#19] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18, c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(35) Exchange +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: rangepartitioning(c_last_name#22 ASC NULLS FIRST, c_first_name#21 ASC NULLS FIRST, c_salutation#20 ASC NULLS FIRST, c_preferred_cust_flag#23 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: [c_last_name#22 ASC NULLS FIRST, c_first_name#21 ASC NULLS FIRST, c_salutation#20 ASC NULLS FIRST, c_preferred_cust_flag#23 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_datafusion/simplified.txt new file mode 100644 index 0000000000..67af698912 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_datafusion/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number] + InputAdapter + Exchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number] #1 + WholeStageCodegen (6) + Project [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [cnt] + HashAggregate [ss_ticket_number,ss_customer_sk,count] [count(1),cnt,count] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk] #2 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d0eba50a09 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_iceberg_compat/explain.txt @@ -0,0 +1,208 @@ +== Physical Plan == +* Sort (36) ++- Exchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Filter (28) + : +- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet spark_catalog.default.household_demographics (18) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet spark_catalog.default.customer (29) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] +Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] +Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#6] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#9, s_county#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#9, s_county#10] +Condition : ((isnotnull(s_county#10) AND (s_county#10 = Williamson County)) AND isnotnull(s_store_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#9] +Input [2]: [s_store_sk#9, s_county#10] + +(15) BroadcastExchange +Input [1]: [s_store_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join type: Inner +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#9] + +(18) Scan parquet spark_catalog.default.household_demographics +Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Condition : ((((isnotnull(hd_vehicle_count#14) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND CASE WHEN (hd_vehicle_count#14 > 0) THEN ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.2) END) AND isnotnull(hd_demo_sk#11)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#11] +Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#11] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#1, ss_ticket_number#4] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#1, ss_ticket_number#4] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] + +(26) Exchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#16] +Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#17 AS cnt#18] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18] +Condition : ((cnt#18 >= 15) AND (cnt#18 <= 20)) + +(29) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Condition : isnotnull(c_customer_sk#19) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#1] +Right keys [1]: [c_customer_sk#19] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#18, c_customer_sk#19, c_salutation#20, c_first_name#21, c_last_name#22, c_preferred_cust_flag#23] + +(35) Exchange +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: rangepartitioning(c_last_name#22 ASC NULLS FIRST, c_first_name#21 ASC NULLS FIRST, c_salutation#20 ASC NULLS FIRST, c_preferred_cust_flag#23 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#22, c_first_name#21, c_salutation#20, c_preferred_cust_flag#23, ss_ticket_number#4, cnt#18] +Arguments: [c_last_name#22 ASC NULLS FIRST, c_first_name#21 ASC NULLS FIRST, c_salutation#20 ASC NULLS FIRST, c_preferred_cust_flag#23 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..67af698912 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.native_iceberg_compat/simplified.txt @@ -0,0 +1,54 @@ +WholeStageCodegen (7) + Sort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number] + InputAdapter + Exchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number] #1 + WholeStageCodegen (6) + Project [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [cnt] + HashAggregate [ss_ticket_number,ss_customer_sk,count] [count(1),cnt,count] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk] #2 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk] [count,count] + Project [ss_customer_sk,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Project [s_store_sk] + Filter [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Project [hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_datafusion/explain.txt new file mode 100644 index 0000000000..23a0bff984 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_datafusion/explain.txt @@ -0,0 +1,267 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (30) + : : +- * Filter (29) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (28) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (21) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : +- BroadcastExchange (10) + : : : : +- * Project (9) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : : +- BroadcastExchange (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * ColumnarToRow (23) + : : : +- Scan parquet spark_catalog.default.catalog_sales (22) + : : +- ReusedExchange (24) + : +- BroadcastExchange (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet spark_catalog.default.customer_address (31) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.customer_demographics (37) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(10) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#6] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#12)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#13] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#11] +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] + +(20) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(22) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] + +(24) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#16] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#14] +Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] + +(27) BroadcastExchange +Input [1]: [cs_ship_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(29) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(30) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(31) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] + +(33) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) + +(34) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, ca_state#18] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#17, ca_state#18] + +(37) Scan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(39) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Condition : isnotnull(cd_demo_sk#19) + +(40) BroadcastExchange +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 9] +Output [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Input [8]: [c_current_cdemo_sk#4, ca_state#18, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(43) HashAggregate [codegen id : 9] +Input [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#22), partial_max(cd_dep_count#22), partial_sum(cd_dep_count#22), partial_avg(cd_dep_employed_count#23), partial_max(cd_dep_employed_count#23), partial_sum(cd_dep_employed_count#23), partial_avg(cd_dep_college_count#24), partial_max(cd_dep_college_count#24), partial_sum(cd_dep_college_count#24)] +Aggregate Attributes [13]: [count#25, sum#26, count#27, max#28, sum#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37] +Results [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] + +(44) Exchange +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] +Arguments: hashpartitioning(ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 10] +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [count(1), avg(cd_dep_count#22), max(cd_dep_count#22), sum(cd_dep_count#22), avg(cd_dep_employed_count#23), max(cd_dep_employed_count#23), sum(cd_dep_employed_count#23), avg(cd_dep_college_count#24), max(cd_dep_college_count#24), sum(cd_dep_college_count#24)] +Aggregate Attributes [10]: [count(1)#51, avg(cd_dep_count#22)#52, max(cd_dep_count#22)#53, sum(cd_dep_count#22)#54, avg(cd_dep_employed_count#23)#55, max(cd_dep_employed_count#23)#56, sum(cd_dep_employed_count#23)#57, avg(cd_dep_college_count#24)#58, max(cd_dep_college_count#24)#59, sum(cd_dep_college_count#24)#60] +Results [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, count(1)#51 AS cnt1#61, avg(cd_dep_count#22)#52 AS avg(cd_dep_count)#62, max(cd_dep_count#22)#53 AS max(cd_dep_count)#63, sum(cd_dep_count#22)#54 AS sum(cd_dep_count)#64, cd_dep_employed_count#23, count(1)#51 AS cnt2#65, avg(cd_dep_employed_count#23)#55 AS avg(cd_dep_employed_count)#66, max(cd_dep_employed_count#23)#56 AS max(cd_dep_employed_count)#67, sum(cd_dep_employed_count#23)#57 AS sum(cd_dep_employed_count)#68, cd_dep_college_count#24, count(1)#51 AS cnt3#69, avg(cd_dep_college_count#24)#58 AS avg(cd_dep_college_count)#70, max(cd_dep_college_count#24)#59 AS max(cd_dep_college_count)#71, sum(cd_dep_college_count#24)#60 AS sum(cd_dep_college_count)#72] + +(46) TakeOrderedAndProject +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#61, avg(cd_dep_count)#62, max(cd_dep_count)#63, sum(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, avg(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, sum(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, avg(cd_dep_college_count)#70, max(cd_dep_college_count)#71, sum(cd_dep_college_count)#72] +Arguments: 100, [ca_state#18 ASC NULLS FIRST, cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_dep_count#22 ASC NULLS FIRST, cd_dep_employed_count#23 ASC NULLS FIRST, cd_dep_college_count#24 ASC NULLS FIRST], [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#61, avg(cd_dep_count)#62, max(cd_dep_count)#63, sum(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, avg(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, sum(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, avg(cd_dep_college_count)#70, max(cd_dep_college_count)#71, sum(cd_dep_college_count)#72] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_datafusion/simplified.txt new file mode 100644 index 0000000000..46a3b243ff --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_datafusion/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count)] + WholeStageCodegen (10) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] [count(1),avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_qoy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..23a0bff984 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_iceberg_compat/explain.txt @@ -0,0 +1,267 @@ +== Physical Plan == +TakeOrderedAndProject (46) ++- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (30) + : : +- * Filter (29) + : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (28) + : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (21) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * ColumnarToRow (5) + : : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : : +- BroadcastExchange (10) + : : : : +- * Project (9) + : : : : +- * Filter (8) + : : : : +- * ColumnarToRow (7) + : : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : : +- BroadcastExchange (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * ColumnarToRow (23) + : : : +- Scan parquet spark_catalog.default.catalog_sales (22) + : : +- ReusedExchange (24) + : +- BroadcastExchange (34) + : +- * Filter (33) + : +- * ColumnarToRow (32) + : +- Scan parquet spark_catalog.default.customer_address (31) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet spark_catalog.default.customer_demographics (37) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(10) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#8] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#6] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#12)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#13] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#11] +Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] + +(20) BroadcastExchange +Input [1]: [ws_bill_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#11] +Join type: ExistenceJoin(exists#2) +Join condition: None + +(22) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#15)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] + +(24) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#16] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#14] +Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] + +(27) BroadcastExchange +Input [1]: [cs_ship_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(28) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#14] +Join type: ExistenceJoin(exists#1) +Join condition: None + +(29) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(30) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(31) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] + +(33) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) + +(34) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(36) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, ca_state#18] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#17, ca_state#18] + +(37) Scan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(39) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Condition : isnotnull(cd_demo_sk#19) + +(40) BroadcastExchange +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 9] +Output [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Input [8]: [c_current_cdemo_sk#4, ca_state#18, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(43) HashAggregate [codegen id : 9] +Input [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#22), partial_max(cd_dep_count#22), partial_sum(cd_dep_count#22), partial_avg(cd_dep_employed_count#23), partial_max(cd_dep_employed_count#23), partial_sum(cd_dep_employed_count#23), partial_avg(cd_dep_college_count#24), partial_max(cd_dep_college_count#24), partial_sum(cd_dep_college_count#24)] +Aggregate Attributes [13]: [count#25, sum#26, count#27, max#28, sum#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37] +Results [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] + +(44) Exchange +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] +Arguments: hashpartitioning(ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 10] +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [count(1), avg(cd_dep_count#22), max(cd_dep_count#22), sum(cd_dep_count#22), avg(cd_dep_employed_count#23), max(cd_dep_employed_count#23), sum(cd_dep_employed_count#23), avg(cd_dep_college_count#24), max(cd_dep_college_count#24), sum(cd_dep_college_count#24)] +Aggregate Attributes [10]: [count(1)#51, avg(cd_dep_count#22)#52, max(cd_dep_count#22)#53, sum(cd_dep_count#22)#54, avg(cd_dep_employed_count#23)#55, max(cd_dep_employed_count#23)#56, sum(cd_dep_employed_count#23)#57, avg(cd_dep_college_count#24)#58, max(cd_dep_college_count#24)#59, sum(cd_dep_college_count#24)#60] +Results [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, count(1)#51 AS cnt1#61, avg(cd_dep_count#22)#52 AS avg(cd_dep_count)#62, max(cd_dep_count#22)#53 AS max(cd_dep_count)#63, sum(cd_dep_count#22)#54 AS sum(cd_dep_count)#64, cd_dep_employed_count#23, count(1)#51 AS cnt2#65, avg(cd_dep_employed_count#23)#55 AS avg(cd_dep_employed_count)#66, max(cd_dep_employed_count#23)#56 AS max(cd_dep_employed_count)#67, sum(cd_dep_employed_count#23)#57 AS sum(cd_dep_employed_count)#68, cd_dep_college_count#24, count(1)#51 AS cnt3#69, avg(cd_dep_college_count#24)#58 AS avg(cd_dep_college_count)#70, max(cd_dep_college_count#24)#59 AS max(cd_dep_college_count)#71, sum(cd_dep_college_count#24)#60 AS sum(cd_dep_college_count)#72] + +(46) TakeOrderedAndProject +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#61, avg(cd_dep_count)#62, max(cd_dep_count)#63, sum(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, avg(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, sum(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, avg(cd_dep_college_count)#70, max(cd_dep_college_count)#71, sum(cd_dep_college_count)#72] +Arguments: 100, [ca_state#18 ASC NULLS FIRST, cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_dep_count#22 ASC NULLS FIRST, cd_dep_employed_count#23 ASC NULLS FIRST, cd_dep_college_count#24 ASC NULLS FIRST], [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#61, avg(cd_dep_count)#62, max(cd_dep_count)#63, sum(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, avg(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, sum(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, avg(cd_dep_college_count)#70, max(cd_dep_college_count)#71, sum(cd_dep_college_count)#72] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..46a3b243ff --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.native_iceberg_compat/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count)] + WholeStageCodegen (10) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] [count(1),avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + Filter [exists,exists] + BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_qoy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_datafusion/explain.txt new file mode 100644 index 0000000000..33b8893391 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_datafusion/explain.txt @@ -0,0 +1,253 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (34) + : +- * BroadcastHashJoin Inner BuildRight (33) + : :- * Project (28) + : : +- * BroadcastHashJoin LeftSemi BuildRight (27) + : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : +- BroadcastExchange (13) + : : : +- * Project (12) + : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : :- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : +- BroadcastExchange (26) + : : +- Union (25) + : : :- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- * Project (24) + : : +- * BroadcastHashJoin Inner BuildRight (23) + : : :- * ColumnarToRow (21) + : : : +- Scan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (22) + : +- BroadcastExchange (32) + : +- * Filter (31) + : +- * ColumnarToRow (30) + : +- Scan parquet spark_catalog.default.customer_address (29) + +- BroadcastExchange (38) + +- * Filter (37) + +- * ColumnarToRow (36) + +- Scan parquet spark_catalog.default.customer_demographics (35) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,1999), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Condition : ((((isnotnull(d_year#7) AND isnotnull(d_qoy#8)) AND (d_year#7 = 1999)) AND (d_qoy#8 < 4)) AND isnotnull(d_date_sk#6)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] + +(10) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#4] +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#4] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#10)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#11] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#9 AS customsk#12] +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] + +(20) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#14)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] + +(22) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#15] + +(23) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#13 AS customsk#16] +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] + +(25) Union + +(26) BroadcastExchange +Input [1]: [customsk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(27) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customsk#12] +Join type: LeftSemi +Join condition: None + +(28) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(29) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] + +(31) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) + +(32) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(33) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, ca_state#18] +Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17, ca_state#18] + +(35) Scan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(37) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Condition : isnotnull(cd_demo_sk#19) + +(38) BroadcastExchange +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 9] +Output [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Input [8]: [c_current_cdemo_sk#2, ca_state#18, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(41) HashAggregate [codegen id : 9] +Input [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#22), partial_max(cd_dep_count#22), partial_sum(cd_dep_count#22), partial_avg(cd_dep_employed_count#23), partial_max(cd_dep_employed_count#23), partial_sum(cd_dep_employed_count#23), partial_avg(cd_dep_college_count#24), partial_max(cd_dep_college_count#24), partial_sum(cd_dep_college_count#24)] +Aggregate Attributes [13]: [count#25, sum#26, count#27, max#28, sum#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37] +Results [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] + +(42) Exchange +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] +Arguments: hashpartitioning(ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(43) HashAggregate [codegen id : 10] +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [count(1), avg(cd_dep_count#22), max(cd_dep_count#22), sum(cd_dep_count#22), avg(cd_dep_employed_count#23), max(cd_dep_employed_count#23), sum(cd_dep_employed_count#23), avg(cd_dep_college_count#24), max(cd_dep_college_count#24), sum(cd_dep_college_count#24)] +Aggregate Attributes [10]: [count(1)#51, avg(cd_dep_count#22)#52, max(cd_dep_count#22)#53, sum(cd_dep_count#22)#54, avg(cd_dep_employed_count#23)#55, max(cd_dep_employed_count#23)#56, sum(cd_dep_employed_count#23)#57, avg(cd_dep_college_count#24)#58, max(cd_dep_college_count#24)#59, sum(cd_dep_college_count#24)#60] +Results [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, count(1)#51 AS cnt1#61, avg(cd_dep_count#22)#52 AS avg(cd_dep_count)#62, max(cd_dep_count#22)#53 AS max(cd_dep_count)#63, sum(cd_dep_count#22)#54 AS sum(cd_dep_count)#64, cd_dep_employed_count#23, count(1)#51 AS cnt2#65, avg(cd_dep_employed_count#23)#55 AS avg(cd_dep_employed_count)#66, max(cd_dep_employed_count#23)#56 AS max(cd_dep_employed_count)#67, sum(cd_dep_employed_count#23)#57 AS sum(cd_dep_employed_count)#68, cd_dep_college_count#24, count(1)#51 AS cnt3#69, avg(cd_dep_college_count#24)#58 AS avg(cd_dep_college_count)#70, max(cd_dep_college_count#24)#59 AS max(cd_dep_college_count)#71, sum(cd_dep_college_count#24)#60 AS sum(cd_dep_college_count)#72] + +(44) TakeOrderedAndProject +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#61, avg(cd_dep_count)#62, max(cd_dep_count)#63, sum(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, avg(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, sum(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, avg(cd_dep_college_count)#70, max(cd_dep_college_count)#71, sum(cd_dep_college_count)#72] +Arguments: 100, [ca_state#18 ASC NULLS FIRST, cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_dep_count#22 ASC NULLS FIRST, cd_dep_employed_count#23 ASC NULLS FIRST, cd_dep_college_count#24 ASC NULLS FIRST], [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#61, avg(cd_dep_count)#62, max(cd_dep_count)#63, sum(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, avg(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, sum(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, avg(cd_dep_college_count)#70, max(cd_dep_college_count)#71, sum(cd_dep_college_count)#72] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..11b6f555e2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_datafusion/simplified.txt @@ -0,0 +1,67 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count)] + WholeStageCodegen (10) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] [count(1),avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + BroadcastHashJoin [c_customer_sk,customsk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_qoy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + Union + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..33b8893391 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_iceberg_compat/explain.txt @@ -0,0 +1,253 @@ +== Physical Plan == +TakeOrderedAndProject (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (34) + : +- * BroadcastHashJoin Inner BuildRight (33) + : :- * Project (28) + : : +- * BroadcastHashJoin LeftSemi BuildRight (27) + : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : +- BroadcastExchange (13) + : : : +- * Project (12) + : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : :- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.date_dim (6) + : : +- BroadcastExchange (26) + : : +- Union (25) + : : :- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * ColumnarToRow (16) + : : : : +- Scan parquet spark_catalog.default.web_sales (15) + : : : +- ReusedExchange (17) + : : +- * Project (24) + : : +- * BroadcastHashJoin Inner BuildRight (23) + : : :- * ColumnarToRow (21) + : : : +- Scan parquet spark_catalog.default.catalog_sales (20) + : : +- ReusedExchange (22) + : +- BroadcastExchange (32) + : +- * Filter (31) + : +- * ColumnarToRow (30) + : +- Scan parquet spark_catalog.default.customer_address (29) + +- BroadcastExchange (38) + +- * Filter (37) + +- * ColumnarToRow (36) + +- Scan parquet spark_catalog.default.customer_demographics (35) + + +(1) Scan parquet spark_catalog.default.customer +Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5] + +(6) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,1999), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] + +(8) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] +Condition : ((((isnotnull(d_year#7) AND isnotnull(d_qoy#8)) AND (d_year#7 = 1999)) AND (d_qoy#8 < 4)) AND isnotnull(d_date_sk#6)) + +(9) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_qoy#8] + +(10) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(12) Project [codegen id : 2] +Output [1]: [ss_customer_sk#4] +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#6] + +(13) BroadcastExchange +Input [1]: [ss_customer_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#4] +Join type: LeftSemi +Join condition: None + +(15) Scan parquet spark_catalog.default.web_sales +Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#10)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 4] +Input [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] + +(17) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#11] + +(18) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(19) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#9 AS customsk#12] +Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] + +(20) Scan parquet spark_catalog.default.catalog_sales +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#14)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] + +(22) ReusedExchange [Reuses operator id: 10] +Output [1]: [d_date_sk#15] + +(23) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] +Join type: Inner +Join condition: None + +(24) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#13 AS customsk#16] +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] + +(25) Union + +(26) BroadcastExchange +Input [1]: [customsk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(27) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [customsk#12] +Join type: LeftSemi +Join condition: None + +(28) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(29) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] + +(31) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) + +(32) BroadcastExchange +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(33) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#17] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, ca_state#18] +Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17, ca_state#18] + +(35) Scan parquet spark_catalog.default.customer_demographics +Output [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(37) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Condition : isnotnull(cd_demo_sk#19) + +(38) BroadcastExchange +Input [6]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#19] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 9] +Output [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Input [8]: [c_current_cdemo_sk#2, ca_state#18, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] + +(41) HashAggregate [codegen id : 9] +Input [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#22), partial_max(cd_dep_count#22), partial_sum(cd_dep_count#22), partial_avg(cd_dep_employed_count#23), partial_max(cd_dep_employed_count#23), partial_sum(cd_dep_employed_count#23), partial_avg(cd_dep_college_count#24), partial_max(cd_dep_college_count#24), partial_sum(cd_dep_college_count#24)] +Aggregate Attributes [13]: [count#25, sum#26, count#27, max#28, sum#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37] +Results [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] + +(42) Exchange +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] +Arguments: hashpartitioning(ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(43) HashAggregate [codegen id : 10] +Input [19]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24, count#38, sum#39, count#40, max#41, sum#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50] +Keys [6]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cd_dep_employed_count#23, cd_dep_college_count#24] +Functions [10]: [count(1), avg(cd_dep_count#22), max(cd_dep_count#22), sum(cd_dep_count#22), avg(cd_dep_employed_count#23), max(cd_dep_employed_count#23), sum(cd_dep_employed_count#23), avg(cd_dep_college_count#24), max(cd_dep_college_count#24), sum(cd_dep_college_count#24)] +Aggregate Attributes [10]: [count(1)#51, avg(cd_dep_count#22)#52, max(cd_dep_count#22)#53, sum(cd_dep_count#22)#54, avg(cd_dep_employed_count#23)#55, max(cd_dep_employed_count#23)#56, sum(cd_dep_employed_count#23)#57, avg(cd_dep_college_count#24)#58, max(cd_dep_college_count#24)#59, sum(cd_dep_college_count#24)#60] +Results [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, count(1)#51 AS cnt1#61, avg(cd_dep_count#22)#52 AS avg(cd_dep_count)#62, max(cd_dep_count#22)#53 AS max(cd_dep_count)#63, sum(cd_dep_count#22)#54 AS sum(cd_dep_count)#64, cd_dep_employed_count#23, count(1)#51 AS cnt2#65, avg(cd_dep_employed_count#23)#55 AS avg(cd_dep_employed_count)#66, max(cd_dep_employed_count#23)#56 AS max(cd_dep_employed_count)#67, sum(cd_dep_employed_count#23)#57 AS sum(cd_dep_employed_count)#68, cd_dep_college_count#24, count(1)#51 AS cnt3#69, avg(cd_dep_college_count#24)#58 AS avg(cd_dep_college_count)#70, max(cd_dep_college_count#24)#59 AS max(cd_dep_college_count)#71, sum(cd_dep_college_count#24)#60 AS sum(cd_dep_college_count)#72] + +(44) TakeOrderedAndProject +Input [18]: [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#61, avg(cd_dep_count)#62, max(cd_dep_count)#63, sum(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, avg(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, sum(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, avg(cd_dep_college_count)#70, max(cd_dep_college_count)#71, sum(cd_dep_college_count)#72] +Arguments: 100, [ca_state#18 ASC NULLS FIRST, cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_dep_count#22 ASC NULLS FIRST, cd_dep_employed_count#23 ASC NULLS FIRST, cd_dep_college_count#24 ASC NULLS FIRST], [ca_state#18, cd_gender#20, cd_marital_status#21, cd_dep_count#22, cnt1#61, avg(cd_dep_count)#62, max(cd_dep_count)#63, sum(cd_dep_count)#64, cd_dep_employed_count#23, cnt2#65, avg(cd_dep_employed_count)#66, max(cd_dep_employed_count)#67, sum(cd_dep_employed_count)#68, cd_dep_college_count#24, cnt3#69, avg(cd_dep_college_count)#70, max(cd_dep_college_count)#71, sum(cd_dep_college_count)#72] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..11b6f555e2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.native_iceberg_compat/simplified.txt @@ -0,0 +1,67 @@ +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count)] + WholeStageCodegen (10) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] [count(1),avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),cnt1,avg(cd_dep_count),max(cd_dep_count),sum(cd_dep_count),cnt2,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum(cd_dep_employed_count),cnt3,avg(cd_dep_college_count),max(cd_dep_college_count),sum(cd_dep_college_count),count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + InputAdapter + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum,count,sum,count,max,sum,sum,count,max,sum,sum,count,max,sum] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] + BroadcastHashJoin [c_customer_sk,customsk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (2) + Project [ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_qoy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + Union + WholeStageCodegen (4) + Project [ws_bill_customer_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (6) + Project [cs_ship_customer_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_datafusion/explain.txt new file mode 100644 index 0000000000..e464aa75d3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_datafusion/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * Project (44) + +- Window (43) + +- * Sort (42) + +- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- Union (37) + :- * HashAggregate (26) + : +- Exchange (25) + : +- * HashAggregate (24) + : +- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.item (11) + : +- BroadcastExchange (21) + : +- * Project (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.store (17) + :- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * HashAggregate (28) + : +- ReusedExchange (27) + +- * HashAggregate (36) + +- Exchange (35) + +- * HashAggregate (34) + +- * HashAggregate (33) + +- ReusedExchange (32) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_year#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#8, i_class#9, i_category#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#8, i_class#9, i_category#10] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Condition : isnotnull(i_item_sk#8) + +(14) BroadcastExchange +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#8] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_state#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#11, s_state#12] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) + +(20) Project [codegen id : 3] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_state#12] + +(21) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 4] +Output [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] + +(24) HashAggregate [codegen id : 4] +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum#13, sum#14] +Results [4]: [i_category#10, i_class#9, sum#15, sum#16] + +(25) Exchange +Input [4]: [i_category#10, i_class#9, sum#15, sum#16] +Arguments: hashpartitioning(i_category#10, i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 5] +Input [4]: [i_category#10, i_class#9, sum#15, sum#16] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#17, sum(UnscaledValue(ss_ext_sales_price#3))#18] +Results [6]: [cast((MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#17,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2)) as decimal(38,20)) AS gross_margin#19, i_category#10, i_class#9, 0 AS t_category#20, 0 AS t_class#21, 0 AS lochierarchy#22] + +(27) ReusedExchange [Reuses operator id: 25] +Output [4]: [i_category#10, i_class#9, sum#23, sum#24] + +(28) HashAggregate [codegen id : 10] +Input [4]: [i_category#10, i_class#9, sum#23, sum#24] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#25, sum(UnscaledValue(ss_ext_sales_price#3))#26] +Results [3]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#25,17,2) AS ss_net_profit#27, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#26,17,2) AS ss_ext_sales_price#28, i_category#10] + +(29) HashAggregate [codegen id : 10] +Input [3]: [ss_net_profit#27, ss_ext_sales_price#28, i_category#10] +Keys [1]: [i_category#10] +Functions [2]: [partial_sum(ss_net_profit#27), partial_sum(ss_ext_sales_price#28)] +Aggregate Attributes [4]: [sum#29, isEmpty#30, sum#31, isEmpty#32] +Results [5]: [i_category#10, sum#33, isEmpty#34, sum#35, isEmpty#36] + +(30) Exchange +Input [5]: [i_category#10, sum#33, isEmpty#34, sum#35, isEmpty#36] +Arguments: hashpartitioning(i_category#10, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(31) HashAggregate [codegen id : 11] +Input [5]: [i_category#10, sum#33, isEmpty#34, sum#35, isEmpty#36] +Keys [1]: [i_category#10] +Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)] +Aggregate Attributes [2]: [sum(ss_net_profit#27)#37, sum(ss_ext_sales_price#28)#38] +Results [6]: [cast((sum(ss_net_profit#27)#37 / sum(ss_ext_sales_price#28)#38) as decimal(38,20)) AS gross_margin#39, i_category#10, null AS i_class#40, 0 AS t_category#41, 1 AS t_class#42, 1 AS lochierarchy#43] + +(32) ReusedExchange [Reuses operator id: 25] +Output [4]: [i_category#10, i_class#9, sum#44, sum#45] + +(33) HashAggregate [codegen id : 16] +Input [4]: [i_category#10, i_class#9, sum#44, sum#45] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#25, sum(UnscaledValue(ss_ext_sales_price#3))#26] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#25,17,2) AS ss_net_profit#27, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#26,17,2) AS ss_ext_sales_price#28] + +(34) HashAggregate [codegen id : 16] +Input [2]: [ss_net_profit#27, ss_ext_sales_price#28] +Keys: [] +Functions [2]: [partial_sum(ss_net_profit#27), partial_sum(ss_ext_sales_price#28)] +Aggregate Attributes [4]: [sum#46, isEmpty#47, sum#48, isEmpty#49] +Results [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] + +(35) Exchange +Input [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(36) HashAggregate [codegen id : 17] +Input [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] +Keys: [] +Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)] +Aggregate Attributes [2]: [sum(ss_net_profit#27)#54, sum(ss_ext_sales_price#28)#55] +Results [6]: [cast((sum(ss_net_profit#27)#54 / sum(ss_ext_sales_price#28)#55) as decimal(38,20)) AS gross_margin#56, null AS i_category#57, null AS i_class#58, 1 AS t_category#59, 1 AS t_class#60, 2 AS lochierarchy#61] + +(37) Union + +(38) HashAggregate [codegen id : 18] +Input [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Keys [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Functions: [] +Aggregate Attributes: [] +Results [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] + +(39) Exchange +Input [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Arguments: hashpartitioning(gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(40) HashAggregate [codegen id : 19] +Input [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Keys [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Functions: [] +Aggregate Attributes: [] +Results [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, CASE WHEN (t_class#21 = 0) THEN i_category#10 END AS _w0#62] + +(41) Exchange +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62] +Arguments: hashpartitioning(lochierarchy#22, _w0#62, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(42) Sort [codegen id : 20] +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62] +Arguments: [lochierarchy#22 ASC NULLS FIRST, _w0#62 ASC NULLS FIRST, gross_margin#19 ASC NULLS FIRST], false, 0 + +(43) Window +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62] +Arguments: [rank(gross_margin#19) windowspecdefinition(lochierarchy#22, _w0#62, gross_margin#19 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#63], [lochierarchy#22, _w0#62], [gross_margin#19 ASC NULLS FIRST] + +(44) Project [codegen id : 21] +Output [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, rank_within_parent#63] +Input [6]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62, rank_within_parent#63] + +(45) TakeOrderedAndProject +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, rank_within_parent#63] +Arguments: 100, [lochierarchy#22 DESC NULLS LAST, CASE WHEN (lochierarchy#22 = 0) THEN i_category#10 END ASC NULLS FIRST, rank_within_parent#63 ASC NULLS FIRST], [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, rank_within_parent#63] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..ad136a38ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_datafusion/simplified.txt @@ -0,0 +1,74 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] + WholeStageCodegen (21) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [gross_margin,lochierarchy,_w0] + WholeStageCodegen (20) + Sort [lochierarchy,_w0,gross_margin] + InputAdapter + Exchange [lochierarchy,_w0] #1 + WholeStageCodegen (19) + HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] [_w0] + InputAdapter + Exchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #2 + WholeStageCodegen (18) + HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] + InputAdapter + Union + WholeStageCodegen (5) + HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,t_category,t_class,lochierarchy,sum,sum] + InputAdapter + Exchange [i_category,i_class] #3 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,i_class,i_category] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + WholeStageCodegen (11) + HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [i_category] #7 + WholeStageCodegen (10) + HashAggregate [i_category,ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum,sum] #3 + WholeStageCodegen (17) + HashAggregate [sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #8 + WholeStageCodegen (16) + HashAggregate [ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..e464aa75d3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_iceberg_compat/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (45) ++- * Project (44) + +- Window (43) + +- * Sort (42) + +- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- Union (37) + :- * HashAggregate (26) + : +- Exchange (25) + : +- * HashAggregate (24) + : +- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.item (11) + : +- BroadcastExchange (21) + : +- * Project (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.store (17) + :- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * HashAggregate (28) + : +- ReusedExchange (27) + +- * HashAggregate (36) + +- Exchange (35) + +- * HashAggregate (34) + +- * HashAggregate (33) + +- ReusedExchange (32) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_year#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#6] + +(11) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#8, i_class#9, i_category#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#8, i_class#9, i_category#10] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Condition : isnotnull(i_item_sk#8) + +(14) BroadcastExchange +Input [3]: [i_item_sk#8, i_class#9, i_category#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#8] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_state#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#11, s_state#12] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) + +(20) Project [codegen id : 3] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_state#12] + +(21) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(23) Project [codegen id : 4] +Output [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] + +(24) HashAggregate [codegen id : 4] +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum#13, sum#14] +Results [4]: [i_category#10, i_class#9, sum#15, sum#16] + +(25) Exchange +Input [4]: [i_category#10, i_class#9, sum#15, sum#16] +Arguments: hashpartitioning(i_category#10, i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(26) HashAggregate [codegen id : 5] +Input [4]: [i_category#10, i_class#9, sum#15, sum#16] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#17, sum(UnscaledValue(ss_ext_sales_price#3))#18] +Results [6]: [cast((MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#17,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2)) as decimal(38,20)) AS gross_margin#19, i_category#10, i_class#9, 0 AS t_category#20, 0 AS t_class#21, 0 AS lochierarchy#22] + +(27) ReusedExchange [Reuses operator id: 25] +Output [4]: [i_category#10, i_class#9, sum#23, sum#24] + +(28) HashAggregate [codegen id : 10] +Input [4]: [i_category#10, i_class#9, sum#23, sum#24] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#25, sum(UnscaledValue(ss_ext_sales_price#3))#26] +Results [3]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#25,17,2) AS ss_net_profit#27, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#26,17,2) AS ss_ext_sales_price#28, i_category#10] + +(29) HashAggregate [codegen id : 10] +Input [3]: [ss_net_profit#27, ss_ext_sales_price#28, i_category#10] +Keys [1]: [i_category#10] +Functions [2]: [partial_sum(ss_net_profit#27), partial_sum(ss_ext_sales_price#28)] +Aggregate Attributes [4]: [sum#29, isEmpty#30, sum#31, isEmpty#32] +Results [5]: [i_category#10, sum#33, isEmpty#34, sum#35, isEmpty#36] + +(30) Exchange +Input [5]: [i_category#10, sum#33, isEmpty#34, sum#35, isEmpty#36] +Arguments: hashpartitioning(i_category#10, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(31) HashAggregate [codegen id : 11] +Input [5]: [i_category#10, sum#33, isEmpty#34, sum#35, isEmpty#36] +Keys [1]: [i_category#10] +Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)] +Aggregate Attributes [2]: [sum(ss_net_profit#27)#37, sum(ss_ext_sales_price#28)#38] +Results [6]: [cast((sum(ss_net_profit#27)#37 / sum(ss_ext_sales_price#28)#38) as decimal(38,20)) AS gross_margin#39, i_category#10, null AS i_class#40, 0 AS t_category#41, 1 AS t_class#42, 1 AS lochierarchy#43] + +(32) ReusedExchange [Reuses operator id: 25] +Output [4]: [i_category#10, i_class#9, sum#44, sum#45] + +(33) HashAggregate [codegen id : 16] +Input [4]: [i_category#10, i_class#9, sum#44, sum#45] +Keys [2]: [i_category#10, i_class#9] +Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#25, sum(UnscaledValue(ss_ext_sales_price#3))#26] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#25,17,2) AS ss_net_profit#27, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#26,17,2) AS ss_ext_sales_price#28] + +(34) HashAggregate [codegen id : 16] +Input [2]: [ss_net_profit#27, ss_ext_sales_price#28] +Keys: [] +Functions [2]: [partial_sum(ss_net_profit#27), partial_sum(ss_ext_sales_price#28)] +Aggregate Attributes [4]: [sum#46, isEmpty#47, sum#48, isEmpty#49] +Results [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] + +(35) Exchange +Input [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] + +(36) HashAggregate [codegen id : 17] +Input [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] +Keys: [] +Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)] +Aggregate Attributes [2]: [sum(ss_net_profit#27)#54, sum(ss_ext_sales_price#28)#55] +Results [6]: [cast((sum(ss_net_profit#27)#54 / sum(ss_ext_sales_price#28)#55) as decimal(38,20)) AS gross_margin#56, null AS i_category#57, null AS i_class#58, 1 AS t_category#59, 1 AS t_class#60, 2 AS lochierarchy#61] + +(37) Union + +(38) HashAggregate [codegen id : 18] +Input [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Keys [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Functions: [] +Aggregate Attributes: [] +Results [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] + +(39) Exchange +Input [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Arguments: hashpartitioning(gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(40) HashAggregate [codegen id : 19] +Input [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Keys [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Functions: [] +Aggregate Attributes: [] +Results [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, CASE WHEN (t_class#21 = 0) THEN i_category#10 END AS _w0#62] + +(41) Exchange +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62] +Arguments: hashpartitioning(lochierarchy#22, _w0#62, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(42) Sort [codegen id : 20] +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62] +Arguments: [lochierarchy#22 ASC NULLS FIRST, _w0#62 ASC NULLS FIRST, gross_margin#19 ASC NULLS FIRST], false, 0 + +(43) Window +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62] +Arguments: [rank(gross_margin#19) windowspecdefinition(lochierarchy#22, _w0#62, gross_margin#19 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#63], [lochierarchy#22, _w0#62], [gross_margin#19 ASC NULLS FIRST] + +(44) Project [codegen id : 21] +Output [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, rank_within_parent#63] +Input [6]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62, rank_within_parent#63] + +(45) TakeOrderedAndProject +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, rank_within_parent#63] +Arguments: 100, [lochierarchy#22 DESC NULLS LAST, CASE WHEN (lochierarchy#22 = 0) THEN i_category#10 END ASC NULLS FIRST, rank_within_parent#63 ASC NULLS FIRST], [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, rank_within_parent#63] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..ad136a38ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.native_iceberg_compat/simplified.txt @@ -0,0 +1,74 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] + WholeStageCodegen (21) + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [gross_margin,lochierarchy,_w0] + WholeStageCodegen (20) + Sort [lochierarchy,_w0,gross_margin] + InputAdapter + Exchange [lochierarchy,_w0] #1 + WholeStageCodegen (19) + HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] [_w0] + InputAdapter + Exchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #2 + WholeStageCodegen (18) + HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] + InputAdapter + Union + WholeStageCodegen (5) + HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,t_category,t_class,lochierarchy,sum,sum] + InputAdapter + Exchange [i_category,i_class] #3 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,i_class,i_category] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [s_store_sk] + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + WholeStageCodegen (11) + HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [i_category] #7 + WholeStageCodegen (10) + HashAggregate [i_category,ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum,sum] #3 + WholeStageCodegen (17) + HashAggregate [sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #8 + WholeStageCodegen (16) + HashAggregate [ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_datafusion/explain.txt new file mode 100644 index 0000000000..4a4b596795 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_datafusion/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (31) + : : +- * Filter (30) + : : +- Window (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.store (16) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- Window (36) + : +- * Sort (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- ReusedExchange (32) + +- BroadcastExchange (45) + +- * Project (44) + +- Window (43) + +- * Sort (42) + +- ReusedExchange (41) + + +(1) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) + +(7) BroadcastExchange +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] + +(16) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(18) Filter [codegen id : 3] +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Condition : ((isnotnull(s_store_sk#11) AND isnotnull(s_store_name#12)) AND isnotnull(s_company_name#13)) + +(19) BroadcastExchange +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 4] +Output [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10, s_store_sk#11, s_store_name#12, s_company_name#13] + +(22) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum#14] +Results [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] + +(23) Exchange +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) HashAggregate [codegen id : 5] +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#16] +Results [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#16,17,2) AS sum_sales#17, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#16,17,2) AS _w0#18] + +(25) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(26) Sort [codegen id : 6] +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST, s_company_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#19], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(28) Filter [codegen id : 7] +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(29) Window +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19] +Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9] + +(30) Filter [codegen id : 22] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN ((abs((sum_sales#17 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END) + +(31) Project [codegen id : 22] +Output [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] + +(32) ReusedExchange [Reuses operator id: 23] +Output [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum#27] + +(33) HashAggregate [codegen id : 12] +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum#27] +Keys [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26] +Functions [1]: [sum(UnscaledValue(ss_sales_price#28))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#28))#16] +Results [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, MakeDecimal(sum(UnscaledValue(ss_sales_price#28))#16,17,2) AS sum_sales#17] + +(34) Exchange +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: hashpartitioning(i_category#21, i_brand#22, s_store_name#23, s_company_name#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 13] +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, s_store_name#23 ASC NULLS FIRST, s_company_name#24 ASC NULLS FIRST, d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST], false, 0 + +(36) Window +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: [rank(d_year#25, d_moy#26) windowspecdefinition(i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#29], [i_category#21, i_brand#22, s_store_name#23, s_company_name#24], [d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST] + +(37) Project [codegen id : 14] +Output [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#17 AS sum_sales#30, rn#29] +Input [8]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17, rn#29] + +(38) BroadcastExchange +Input [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#30, rn#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=7] + +(39) BroadcastHashJoin [codegen id : 22] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#19] +Right keys [5]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, (rn#29 + 1)] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 22] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#30] +Input [15]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#30, rn#29] + +(41) ReusedExchange [Reuses operator id: 34] +Output [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] + +(42) Sort [codegen id : 20] +Input [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] +Arguments: [i_category#31 ASC NULLS FIRST, i_brand#32 ASC NULLS FIRST, s_store_name#33 ASC NULLS FIRST, s_company_name#34 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +(43) Window +Input [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] +Arguments: [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#31, i_brand#32, s_store_name#33, s_company_name#34], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] + +(44) Project [codegen id : 21] +Output [6]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#17 AS sum_sales#38, rn#37] +Input [8]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17, rn#37] + +(45) BroadcastExchange +Input [6]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#38, rn#37] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=8] + +(46) BroadcastHashJoin [codegen id : 22] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#19] +Right keys [5]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, (rn#37 - 1)] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 22] +Output [7]: [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, sum_sales#30 AS psum#39, sum_sales#38 AS nsum#40] +Input [16]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#30, i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#38, rn#37] + +(48) TakeOrderedAndProject +Input [7]: [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, psum#39, nsum#40] +Arguments: 100, [(sum_sales#17 - avg_monthly_sales#20) ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, psum#39, nsum#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_datafusion/simplified.txt new file mode 100644 index 0000000000..32992e11f2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_datafusion/simplified.txt @@ -0,0 +1,79 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum,nsum] + WholeStageCodegen (22) + Project [i_category,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] + WholeStageCodegen (7) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum] + Project [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk,i_category,i_brand] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk,s_store_name,s_company_name] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (14) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (13) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #7 + WholeStageCodegen (12) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (21) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (20) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..4a4b596795 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_iceberg_compat/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (31) + : : +- * Filter (30) + : : +- Window (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.store (16) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- Window (36) + : +- * Sort (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- ReusedExchange (32) + +- BroadcastExchange (45) + +- * Project (44) + +- Window (43) + +- * Sort (42) + +- ReusedExchange (41) + + +(1) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) + +(7) BroadcastExchange +Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10] +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] + +(16) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] + +(18) Filter [codegen id : 3] +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Condition : ((isnotnull(s_store_sk#11) AND isnotnull(s_store_name#12)) AND isnotnull(s_company_name#13)) + +(19) BroadcastExchange +Input [3]: [s_store_sk#11, s_store_name#12, s_company_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 4] +Output [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#9, d_moy#10, s_store_sk#11, s_store_name#12, s_company_name#13] + +(22) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#9, d_moy#10, s_store_name#12, s_company_name#13] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum#14] +Results [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] + +(23) Exchange +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) HashAggregate [codegen id : 5] +Input [7]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum#15] +Keys [6]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#16] +Results [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#16,17,2) AS sum_sales#17, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#16,17,2) AS _w0#18] + +(25) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(26) Sort [codegen id : 6] +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#12 ASC NULLS FIRST, s_company_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#19], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(28) Filter [codegen id : 7] +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(29) Window +Input [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19] +Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9] + +(30) Filter [codegen id : 22] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN ((abs((sum_sales#17 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END) + +(31) Project [codegen id : 22] +Output [9]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19] +Input [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] + +(32) ReusedExchange [Reuses operator id: 23] +Output [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum#27] + +(33) HashAggregate [codegen id : 12] +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum#27] +Keys [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26] +Functions [1]: [sum(UnscaledValue(ss_sales_price#28))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#28))#16] +Results [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, MakeDecimal(sum(UnscaledValue(ss_sales_price#28))#16,17,2) AS sum_sales#17] + +(34) Exchange +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: hashpartitioning(i_category#21, i_brand#22, s_store_name#23, s_company_name#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 13] +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, s_store_name#23 ASC NULLS FIRST, s_company_name#24 ASC NULLS FIRST, d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST], false, 0 + +(36) Window +Input [7]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17] +Arguments: [rank(d_year#25, d_moy#26) windowspecdefinition(i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#29], [i_category#21, i_brand#22, s_store_name#23, s_company_name#24], [d_year#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST] + +(37) Project [codegen id : 14] +Output [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#17 AS sum_sales#30, rn#29] +Input [8]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, d_year#25, d_moy#26, sum_sales#17, rn#29] + +(38) BroadcastExchange +Input [6]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#30, rn#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=7] + +(39) BroadcastHashJoin [codegen id : 22] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#19] +Right keys [5]: [i_category#21, i_brand#22, s_store_name#23, s_company_name#24, (rn#29 + 1)] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 22] +Output [10]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#30] +Input [15]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, i_category#21, i_brand#22, s_store_name#23, s_company_name#24, sum_sales#30, rn#29] + +(41) ReusedExchange [Reuses operator id: 34] +Output [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] + +(42) Sort [codegen id : 20] +Input [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] +Arguments: [i_category#31 ASC NULLS FIRST, i_brand#32 ASC NULLS FIRST, s_store_name#33 ASC NULLS FIRST, s_company_name#34 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +(43) Window +Input [7]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17] +Arguments: [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#31, i_brand#32, s_store_name#33, s_company_name#34], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] + +(44) Project [codegen id : 21] +Output [6]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#17 AS sum_sales#38, rn#37] +Input [8]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, d_year#35, d_moy#36, sum_sales#17, rn#37] + +(45) BroadcastExchange +Input [6]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#38, rn#37] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=8] + +(46) BroadcastHashJoin [codegen id : 22] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, rn#19] +Right keys [5]: [i_category#31, i_brand#32, s_store_name#33, s_company_name#34, (rn#37 - 1)] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 22] +Output [7]: [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, sum_sales#30 AS psum#39, sum_sales#38 AS nsum#40] +Input [16]: [i_category#3, i_brand#2, s_store_name#12, s_company_name#13, d_year#9, d_moy#10, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#30, i_category#31, i_brand#32, s_store_name#33, s_company_name#34, sum_sales#38, rn#37] + +(48) TakeOrderedAndProject +Input [7]: [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, psum#39, nsum#40] +Arguments: 100, [(sum_sales#17 - avg_monthly_sales#20) ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], [i_category#3, d_year#9, d_moy#10, avg_monthly_sales#20, sum_sales#17, psum#39, nsum#40] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..32992e11f2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.native_iceberg_compat/simplified.txt @@ -0,0 +1,79 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum,nsum] + WholeStageCodegen (22) + Project [i_category,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] + WholeStageCodegen (7) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum] + Project [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_brand,i_category,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk,i_category,i_brand] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [s_store_sk,s_store_name,s_company_name] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (14) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (13) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #7 + WholeStageCodegen (12) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (21) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (20) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_datafusion/explain.txt new file mode 100644 index 0000000000..5548f821e9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_datafusion/explain.txt @@ -0,0 +1,457 @@ +== Physical Plan == +TakeOrderedAndProject (81) ++- * HashAggregate (80) + +- Exchange (79) + +- * HashAggregate (78) + +- Union (77) + :- * Project (28) + : +- * Filter (27) + : +- Window (26) + : +- * Sort (25) + : +- Window (24) + : +- * Sort (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- Exchange (20) + : +- * HashAggregate (19) + : +- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildLeft (10) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : +- * Project (9) + : : +- * Filter (8) + : : +- * ColumnarToRow (7) + : : +- Scan parquet spark_catalog.default.web_returns (6) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet spark_catalog.default.date_dim (12) + :- * Project (52) + : +- * Filter (51) + : +- Window (50) + : +- * Sort (49) + : +- Window (48) + : +- * Sort (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildLeft (38) + : : :- BroadcastExchange (33) + : : : +- * Project (32) + : : : +- * Filter (31) + : : : +- * ColumnarToRow (30) + : : : +- Scan parquet spark_catalog.default.catalog_sales (29) + : : +- * Project (37) + : : +- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet spark_catalog.default.catalog_returns (34) + : +- ReusedExchange (40) + +- * Project (76) + +- * Filter (75) + +- Window (74) + +- * Sort (73) + +- Window (72) + +- * Sort (71) + +- Exchange (70) + +- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Project (66) + +- * BroadcastHashJoin Inner BuildRight (65) + :- * Project (63) + : +- * BroadcastHashJoin Inner BuildLeft (62) + : :- BroadcastExchange (57) + : : +- * Project (56) + : : +- * Filter (55) + : : +- * ColumnarToRow (54) + : : +- Scan parquet spark_catalog.default.store_sales (53) + : +- * Project (61) + : +- * Filter (60) + : +- * ColumnarToRow (59) + : +- Scan parquet spark_catalog.default.store_returns (58) + +- ReusedExchange (64) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#6)] +PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_order_number), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(3) Filter [codegen id : 1] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Condition : (((((((isnotnull(ws_net_profit#5) AND isnotnull(ws_net_paid#4)) AND isnotnull(ws_quantity#3)) AND (ws_net_profit#5 > 1.00)) AND (ws_net_paid#4 > 0.00)) AND (ws_quantity#3 > 0)) AND isnotnull(ws_order_number#2)) AND isnotnull(ws_item_sk#1)) + +(4) Project [codegen id : 1] +Output [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(5) BroadcastExchange +Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=1] + +(6) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(8) Filter +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(9) Project +Output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(10) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [ws_order_number#2, ws_item_sk#1] +Right keys [2]: [wr_order_number#8, wr_item_sk#7] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 3] +Output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(12) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(14) Filter [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#12] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(16) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#6] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 3] +Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] + +(19) HashAggregate [codegen id : 3] +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#1] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#9, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Results [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] + +(20) Exchange +Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(21) HashAggregate [codegen id : 4] +Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Keys [1]: [ws_item_sk#1] +Functions [4]: [sum(coalesce(wr_return_quantity#9, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#9, 0))#27, sum(coalesce(ws_quantity#3, 0))#28, sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#29, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#30] +Results [3]: [ws_item_sk#1 AS item#31, (cast(sum(coalesce(wr_return_quantity#9, 0))#27 as decimal(15,4)) / cast(sum(coalesce(ws_quantity#3, 0))#28 as decimal(15,4))) AS return_ratio#32, (cast(sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#29 as decimal(15,4)) / cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#30 as decimal(15,4))) AS currency_ratio#33] + +(22) Exchange +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(23) Sort [codegen id : 5] +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: [return_ratio#32 ASC NULLS FIRST], false, 0 + +(24) Window +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: [rank(return_ratio#32) windowspecdefinition(return_ratio#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#34], [return_ratio#32 ASC NULLS FIRST] + +(25) Sort [codegen id : 6] +Input [4]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34] +Arguments: [currency_ratio#33 ASC NULLS FIRST], false, 0 + +(26) Window +Input [4]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34] +Arguments: [rank(currency_ratio#33) windowspecdefinition(currency_ratio#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#35], [currency_ratio#33 ASC NULLS FIRST] + +(27) Filter [codegen id : 7] +Input [5]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34, currency_rank#35] +Condition : ((return_rank#34 <= 10) OR (currency_rank#35 <= 10)) + +(28) Project [codegen id : 7] +Output [5]: [web AS channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Input [5]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34, currency_rank#35] + +(29) Scan parquet spark_catalog.default.catalog_sales +Output [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#42)] +PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 8] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] + +(31) Filter [codegen id : 8] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] +Condition : (((((((isnotnull(cs_net_profit#41) AND isnotnull(cs_net_paid#40)) AND isnotnull(cs_quantity#39)) AND (cs_net_profit#41 > 1.00)) AND (cs_net_paid#40 > 0.00)) AND (cs_quantity#39 > 0)) AND isnotnull(cs_order_number#38)) AND isnotnull(cs_item_sk#37)) + +(32) Project [codegen id : 8] +Output [5]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] + +(33) BroadcastExchange +Input [5]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=5] + +(34) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(35) ColumnarToRow +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] + +(36) Filter +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] +Condition : (((isnotnull(cr_return_amount#46) AND (cr_return_amount#46 > 10000.00)) AND isnotnull(cr_order_number#44)) AND isnotnull(cr_item_sk#43)) + +(37) Project +Output [4]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [cs_order_number#38, cs_item_sk#37] +Right keys [2]: [cr_order_number#44, cr_item_sk#43] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [6]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_return_quantity#45, cr_return_amount#46] +Input [9]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] + +(40) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#48] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#42] +Right keys [1]: [d_date_sk#48] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [5]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cr_return_quantity#45, cr_return_amount#46] +Input [7]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_return_quantity#45, cr_return_amount#46, d_date_sk#48] + +(43) HashAggregate [codegen id : 10] +Input [5]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cr_return_quantity#45, cr_return_amount#46] +Keys [1]: [cs_item_sk#37] +Functions [4]: [partial_sum(coalesce(cr_return_quantity#45, 0)), partial_sum(coalesce(cs_quantity#39, 0)), partial_sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#49, sum#50, sum#51, isEmpty#52, sum#53, isEmpty#54] +Results [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] + +(44) Exchange +Input [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Arguments: hashpartitioning(cs_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(45) HashAggregate [codegen id : 11] +Input [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Keys [1]: [cs_item_sk#37] +Functions [4]: [sum(coalesce(cr_return_quantity#45, 0)), sum(coalesce(cs_quantity#39, 0)), sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#45, 0))#61, sum(coalesce(cs_quantity#39, 0))#62, sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#63, sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))#64] +Results [3]: [cs_item_sk#37 AS item#65, (cast(sum(coalesce(cr_return_quantity#45, 0))#61 as decimal(15,4)) / cast(sum(coalesce(cs_quantity#39, 0))#62 as decimal(15,4))) AS return_ratio#66, (cast(sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#63 as decimal(15,4)) / cast(sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))#64 as decimal(15,4))) AS currency_ratio#67] + +(46) Exchange +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(47) Sort [codegen id : 12] +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: [return_ratio#66 ASC NULLS FIRST], false, 0 + +(48) Window +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: [rank(return_ratio#66) windowspecdefinition(return_ratio#66 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#68], [return_ratio#66 ASC NULLS FIRST] + +(49) Sort [codegen id : 13] +Input [4]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68] +Arguments: [currency_ratio#67 ASC NULLS FIRST], false, 0 + +(50) Window +Input [4]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68] +Arguments: [rank(currency_ratio#67) windowspecdefinition(currency_ratio#67 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#69], [currency_ratio#67 ASC NULLS FIRST] + +(51) Filter [codegen id : 14] +Input [5]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68, currency_rank#69] +Condition : ((return_rank#68 <= 10) OR (currency_rank#69 <= 10)) + +(52) Project [codegen id : 14] +Output [5]: [catalog AS channel#70, item#65, return_ratio#66, return_rank#68, currency_rank#69] +Input [5]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68, currency_rank#69] + +(53) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#76)] +PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 15] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] + +(55) Filter [codegen id : 15] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] +Condition : (((((((isnotnull(ss_net_profit#75) AND isnotnull(ss_net_paid#74)) AND isnotnull(ss_quantity#73)) AND (ss_net_profit#75 > 1.00)) AND (ss_net_paid#74 > 0.00)) AND (ss_quantity#73 > 0)) AND isnotnull(ss_ticket_number#72)) AND isnotnull(ss_item_sk#71)) + +(56) Project [codegen id : 15] +Output [5]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] + +(57) BroadcastExchange +Input [5]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=8] + +(58) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(59) ColumnarToRow +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] + +(60) Filter +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] +Condition : (((isnotnull(sr_return_amt#80) AND (sr_return_amt#80 > 10000.00)) AND isnotnull(sr_ticket_number#78)) AND isnotnull(sr_item_sk#77)) + +(61) Project +Output [4]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [2]: [ss_ticket_number#72, ss_item_sk#71] +Right keys [2]: [sr_ticket_number#78, sr_item_sk#77] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 17] +Output [6]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_return_quantity#79, sr_return_amt#80] +Input [9]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] + +(64) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#82] + +(65) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#76] +Right keys [1]: [d_date_sk#82] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 17] +Output [5]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, sr_return_quantity#79, sr_return_amt#80] +Input [7]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_return_quantity#79, sr_return_amt#80, d_date_sk#82] + +(67) HashAggregate [codegen id : 17] +Input [5]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, sr_return_quantity#79, sr_return_amt#80] +Keys [1]: [ss_item_sk#71] +Functions [4]: [partial_sum(coalesce(sr_return_quantity#79, 0)), partial_sum(coalesce(ss_quantity#73, 0)), partial_sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#83, sum#84, sum#85, isEmpty#86, sum#87, isEmpty#88] +Results [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] + +(68) Exchange +Input [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] +Arguments: hashpartitioning(ss_item_sk#71, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(69) HashAggregate [codegen id : 18] +Input [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] +Keys [1]: [ss_item_sk#71] +Functions [4]: [sum(coalesce(sr_return_quantity#79, 0)), sum(coalesce(ss_quantity#73, 0)), sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#79, 0))#95, sum(coalesce(ss_quantity#73, 0))#96, sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#97, sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))#98] +Results [3]: [ss_item_sk#71 AS item#99, (cast(sum(coalesce(sr_return_quantity#79, 0))#95 as decimal(15,4)) / cast(sum(coalesce(ss_quantity#73, 0))#96 as decimal(15,4))) AS return_ratio#100, (cast(sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#97 as decimal(15,4)) / cast(sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))#98 as decimal(15,4))) AS currency_ratio#101] + +(70) Exchange +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(71) Sort [codegen id : 19] +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: [return_ratio#100 ASC NULLS FIRST], false, 0 + +(72) Window +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: [rank(return_ratio#100) windowspecdefinition(return_ratio#100 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#102], [return_ratio#100 ASC NULLS FIRST] + +(73) Sort [codegen id : 20] +Input [4]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102] +Arguments: [currency_ratio#101 ASC NULLS FIRST], false, 0 + +(74) Window +Input [4]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102] +Arguments: [rank(currency_ratio#101) windowspecdefinition(currency_ratio#101 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#103], [currency_ratio#101 ASC NULLS FIRST] + +(75) Filter [codegen id : 21] +Input [5]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102, currency_rank#103] +Condition : ((return_rank#102 <= 10) OR (currency_rank#103 <= 10)) + +(76) Project [codegen id : 21] +Output [5]: [store AS channel#104, item#99, return_ratio#100, return_rank#102, currency_rank#103] +Input [5]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102, currency_rank#103] + +(77) Union + +(78) HashAggregate [codegen id : 22] +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Keys [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + +(79) Exchange +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Arguments: hashpartitioning(channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(80) HashAggregate [codegen id : 23] +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Keys [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + +(81) TakeOrderedAndProject +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Arguments: 100, [channel#36 ASC NULLS FIRST, return_rank#34 ASC NULLS FIRST, currency_rank#35 ASC NULLS FIRST, item#31 ASC NULLS FIRST], [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_datafusion/simplified.txt new file mode 100644 index 0000000000..018748b274 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_datafusion/simplified.txt @@ -0,0 +1,129 @@ +TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] + WholeStageCodegen (23) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 + WholeStageCodegen (22) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Union + WholeStageCodegen (7) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (6) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (5) + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen (4) + HashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(wr_return_quantity, 0)),sum(coalesce(ws_quantity, 0)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] + Filter [ws_net_profit,ws_net_paid,ws_quantity,ws_order_number,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Filter [wr_return_amt,wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (14) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (13) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (12) + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen (11) + HashAggregate [cs_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(cr_return_quantity, 0)),sum(coalesce(cs_quantity, 0)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen (10) + HashAggregate [cs_item_sk,cr_return_quantity,cs_quantity,cr_return_amount,cs_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [cs_item_sk,cs_quantity,cs_net_paid,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_net_paid,cs_sold_date_sk,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_sold_date_sk] + Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_order_number,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk] + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_return_amount,cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (21) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (20) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (19) + Sort [return_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen (18) + HashAggregate [ss_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(sr_return_quantity, 0)),sum(coalesce(ss_quantity, 0)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (17) + HashAggregate [ss_item_sk,sr_return_quantity,ss_quantity,sr_return_amt,ss_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [ss_item_sk,ss_quantity,ss_net_paid,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_quantity,ss_net_paid,ss_sold_date_sk,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (15) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_sold_date_sk] + Filter [ss_net_profit,ss_net_paid,ss_quantity,ss_ticket_number,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk] + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_return_amt,sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..5548f821e9 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_iceberg_compat/explain.txt @@ -0,0 +1,457 @@ +== Physical Plan == +TakeOrderedAndProject (81) ++- * HashAggregate (80) + +- Exchange (79) + +- * HashAggregate (78) + +- Union (77) + :- * Project (28) + : +- * Filter (27) + : +- Window (26) + : +- * Sort (25) + : +- Window (24) + : +- * Sort (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- Exchange (20) + : +- * HashAggregate (19) + : +- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildLeft (10) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : +- * Project (9) + : : +- * Filter (8) + : : +- * ColumnarToRow (7) + : : +- Scan parquet spark_catalog.default.web_returns (6) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet spark_catalog.default.date_dim (12) + :- * Project (52) + : +- * Filter (51) + : +- Window (50) + : +- * Sort (49) + : +- Window (48) + : +- * Sort (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildLeft (38) + : : :- BroadcastExchange (33) + : : : +- * Project (32) + : : : +- * Filter (31) + : : : +- * ColumnarToRow (30) + : : : +- Scan parquet spark_catalog.default.catalog_sales (29) + : : +- * Project (37) + : : +- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet spark_catalog.default.catalog_returns (34) + : +- ReusedExchange (40) + +- * Project (76) + +- * Filter (75) + +- Window (74) + +- * Sort (73) + +- Window (72) + +- * Sort (71) + +- Exchange (70) + +- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Project (66) + +- * BroadcastHashJoin Inner BuildRight (65) + :- * Project (63) + : +- * BroadcastHashJoin Inner BuildLeft (62) + : :- BroadcastExchange (57) + : : +- * Project (56) + : : +- * Filter (55) + : : +- * ColumnarToRow (54) + : : +- Scan parquet spark_catalog.default.store_sales (53) + : +- * Project (61) + : +- * Filter (60) + : +- * ColumnarToRow (59) + : +- Scan parquet spark_catalog.default.store_returns (58) + +- ReusedExchange (64) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#6)] +PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_order_number), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(3) Filter [codegen id : 1] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] +Condition : (((((((isnotnull(ws_net_profit#5) AND isnotnull(ws_net_paid#4)) AND isnotnull(ws_quantity#3)) AND (ws_net_profit#5 > 1.00)) AND (ws_net_paid#4 > 0.00)) AND (ws_quantity#3 > 0)) AND isnotnull(ws_order_number#2)) AND isnotnull(ws_item_sk#1)) + +(4) Project [codegen id : 1] +Output [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6] + +(5) BroadcastExchange +Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=1] + +(6) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(8) Filter +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(9) Project +Output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Input [5]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10, wr_returned_date_sk#11] + +(10) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [ws_order_number#2, ws_item_sk#1] +Right keys [2]: [wr_order_number#8, wr_item_sk#7] +Join type: Inner +Join condition: None + +(11) Project [codegen id : 3] +Output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10] +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(12) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(14) Filter [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#12] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(16) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#6] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(18) Project [codegen id : 3] +Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] + +(19) HashAggregate [codegen id : 3] +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#1] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#9, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] +Results [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] + +(20) Exchange +Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(21) HashAggregate [codegen id : 4] +Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Keys [1]: [ws_item_sk#1] +Functions [4]: [sum(coalesce(wr_return_quantity#9, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#9, 0))#27, sum(coalesce(ws_quantity#3, 0))#28, sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#29, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#30] +Results [3]: [ws_item_sk#1 AS item#31, (cast(sum(coalesce(wr_return_quantity#9, 0))#27 as decimal(15,4)) / cast(sum(coalesce(ws_quantity#3, 0))#28 as decimal(15,4))) AS return_ratio#32, (cast(sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#29 as decimal(15,4)) / cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#30 as decimal(15,4))) AS currency_ratio#33] + +(22) Exchange +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(23) Sort [codegen id : 5] +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: [return_ratio#32 ASC NULLS FIRST], false, 0 + +(24) Window +Input [3]: [item#31, return_ratio#32, currency_ratio#33] +Arguments: [rank(return_ratio#32) windowspecdefinition(return_ratio#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#34], [return_ratio#32 ASC NULLS FIRST] + +(25) Sort [codegen id : 6] +Input [4]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34] +Arguments: [currency_ratio#33 ASC NULLS FIRST], false, 0 + +(26) Window +Input [4]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34] +Arguments: [rank(currency_ratio#33) windowspecdefinition(currency_ratio#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#35], [currency_ratio#33 ASC NULLS FIRST] + +(27) Filter [codegen id : 7] +Input [5]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34, currency_rank#35] +Condition : ((return_rank#34 <= 10) OR (currency_rank#35 <= 10)) + +(28) Project [codegen id : 7] +Output [5]: [web AS channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Input [5]: [item#31, return_ratio#32, currency_ratio#33, return_rank#34, currency_rank#35] + +(29) Scan parquet spark_catalog.default.catalog_sales +Output [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#42)] +PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 8] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] + +(31) Filter [codegen id : 8] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] +Condition : (((((((isnotnull(cs_net_profit#41) AND isnotnull(cs_net_paid#40)) AND isnotnull(cs_quantity#39)) AND (cs_net_profit#41 > 1.00)) AND (cs_net_paid#40 > 0.00)) AND (cs_quantity#39 > 0)) AND isnotnull(cs_order_number#38)) AND isnotnull(cs_item_sk#37)) + +(32) Project [codegen id : 8] +Output [5]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42] +Input [6]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_net_profit#41, cs_sold_date_sk#42] + +(33) BroadcastExchange +Input [5]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=5] + +(34) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(35) ColumnarToRow +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] + +(36) Filter +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] +Condition : (((isnotnull(cr_return_amount#46) AND (cr_return_amount#46 > 10000.00)) AND isnotnull(cr_order_number#44)) AND isnotnull(cr_item_sk#43)) + +(37) Project +Output [4]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [cs_order_number#38, cs_item_sk#37] +Right keys [2]: [cr_order_number#44, cr_item_sk#43] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [6]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_return_quantity#45, cr_return_amount#46] +Input [9]: [cs_item_sk#37, cs_order_number#38, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] + +(40) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#48] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#42] +Right keys [1]: [d_date_sk#48] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [5]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cr_return_quantity#45, cr_return_amount#46] +Input [7]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cs_sold_date_sk#42, cr_return_quantity#45, cr_return_amount#46, d_date_sk#48] + +(43) HashAggregate [codegen id : 10] +Input [5]: [cs_item_sk#37, cs_quantity#39, cs_net_paid#40, cr_return_quantity#45, cr_return_amount#46] +Keys [1]: [cs_item_sk#37] +Functions [4]: [partial_sum(coalesce(cr_return_quantity#45, 0)), partial_sum(coalesce(cs_quantity#39, 0)), partial_sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#49, sum#50, sum#51, isEmpty#52, sum#53, isEmpty#54] +Results [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] + +(44) Exchange +Input [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Arguments: hashpartitioning(cs_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(45) HashAggregate [codegen id : 11] +Input [7]: [cs_item_sk#37, sum#55, sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Keys [1]: [cs_item_sk#37] +Functions [4]: [sum(coalesce(cr_return_quantity#45, 0)), sum(coalesce(cs_quantity#39, 0)), sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#45, 0))#61, sum(coalesce(cs_quantity#39, 0))#62, sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#63, sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))#64] +Results [3]: [cs_item_sk#37 AS item#65, (cast(sum(coalesce(cr_return_quantity#45, 0))#61 as decimal(15,4)) / cast(sum(coalesce(cs_quantity#39, 0))#62 as decimal(15,4))) AS return_ratio#66, (cast(sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#63 as decimal(15,4)) / cast(sum(coalesce(cast(cs_net_paid#40 as decimal(12,2)), 0.00))#64 as decimal(15,4))) AS currency_ratio#67] + +(46) Exchange +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] + +(47) Sort [codegen id : 12] +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: [return_ratio#66 ASC NULLS FIRST], false, 0 + +(48) Window +Input [3]: [item#65, return_ratio#66, currency_ratio#67] +Arguments: [rank(return_ratio#66) windowspecdefinition(return_ratio#66 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#68], [return_ratio#66 ASC NULLS FIRST] + +(49) Sort [codegen id : 13] +Input [4]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68] +Arguments: [currency_ratio#67 ASC NULLS FIRST], false, 0 + +(50) Window +Input [4]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68] +Arguments: [rank(currency_ratio#67) windowspecdefinition(currency_ratio#67 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#69], [currency_ratio#67 ASC NULLS FIRST] + +(51) Filter [codegen id : 14] +Input [5]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68, currency_rank#69] +Condition : ((return_rank#68 <= 10) OR (currency_rank#69 <= 10)) + +(52) Project [codegen id : 14] +Output [5]: [catalog AS channel#70, item#65, return_ratio#66, return_rank#68, currency_rank#69] +Input [5]: [item#65, return_ratio#66, currency_ratio#67, return_rank#68, currency_rank#69] + +(53) Scan parquet spark_catalog.default.store_sales +Output [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#76)] +PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 15] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] + +(55) Filter [codegen id : 15] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] +Condition : (((((((isnotnull(ss_net_profit#75) AND isnotnull(ss_net_paid#74)) AND isnotnull(ss_quantity#73)) AND (ss_net_profit#75 > 1.00)) AND (ss_net_paid#74 > 0.00)) AND (ss_quantity#73 > 0)) AND isnotnull(ss_ticket_number#72)) AND isnotnull(ss_item_sk#71)) + +(56) Project [codegen id : 15] +Output [5]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76] +Input [6]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_net_profit#75, ss_sold_date_sk#76] + +(57) BroadcastExchange +Input [5]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=8] + +(58) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(59) ColumnarToRow +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] + +(60) Filter +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] +Condition : (((isnotnull(sr_return_amt#80) AND (sr_return_amt#80 > 10000.00)) AND isnotnull(sr_ticket_number#78)) AND isnotnull(sr_item_sk#77)) + +(61) Project +Output [4]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [2]: [ss_ticket_number#72, ss_item_sk#71] +Right keys [2]: [sr_ticket_number#78, sr_item_sk#77] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 17] +Output [6]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_return_quantity#79, sr_return_amt#80] +Input [9]: [ss_item_sk#71, ss_ticket_number#72, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] + +(64) ReusedExchange [Reuses operator id: 16] +Output [1]: [d_date_sk#82] + +(65) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#76] +Right keys [1]: [d_date_sk#82] +Join type: Inner +Join condition: None + +(66) Project [codegen id : 17] +Output [5]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, sr_return_quantity#79, sr_return_amt#80] +Input [7]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, ss_sold_date_sk#76, sr_return_quantity#79, sr_return_amt#80, d_date_sk#82] + +(67) HashAggregate [codegen id : 17] +Input [5]: [ss_item_sk#71, ss_quantity#73, ss_net_paid#74, sr_return_quantity#79, sr_return_amt#80] +Keys [1]: [ss_item_sk#71] +Functions [4]: [partial_sum(coalesce(sr_return_quantity#79, 0)), partial_sum(coalesce(ss_quantity#73, 0)), partial_sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#83, sum#84, sum#85, isEmpty#86, sum#87, isEmpty#88] +Results [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] + +(68) Exchange +Input [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] +Arguments: hashpartitioning(ss_item_sk#71, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(69) HashAggregate [codegen id : 18] +Input [7]: [ss_item_sk#71, sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] +Keys [1]: [ss_item_sk#71] +Functions [4]: [sum(coalesce(sr_return_quantity#79, 0)), sum(coalesce(ss_quantity#73, 0)), sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#79, 0))#95, sum(coalesce(ss_quantity#73, 0))#96, sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#97, sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))#98] +Results [3]: [ss_item_sk#71 AS item#99, (cast(sum(coalesce(sr_return_quantity#79, 0))#95 as decimal(15,4)) / cast(sum(coalesce(ss_quantity#73, 0))#96 as decimal(15,4))) AS return_ratio#100, (cast(sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#97 as decimal(15,4)) / cast(sum(coalesce(cast(ss_net_paid#74 as decimal(12,2)), 0.00))#98 as decimal(15,4))) AS currency_ratio#101] + +(70) Exchange +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] + +(71) Sort [codegen id : 19] +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: [return_ratio#100 ASC NULLS FIRST], false, 0 + +(72) Window +Input [3]: [item#99, return_ratio#100, currency_ratio#101] +Arguments: [rank(return_ratio#100) windowspecdefinition(return_ratio#100 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#102], [return_ratio#100 ASC NULLS FIRST] + +(73) Sort [codegen id : 20] +Input [4]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102] +Arguments: [currency_ratio#101 ASC NULLS FIRST], false, 0 + +(74) Window +Input [4]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102] +Arguments: [rank(currency_ratio#101) windowspecdefinition(currency_ratio#101 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#103], [currency_ratio#101 ASC NULLS FIRST] + +(75) Filter [codegen id : 21] +Input [5]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102, currency_rank#103] +Condition : ((return_rank#102 <= 10) OR (currency_rank#103 <= 10)) + +(76) Project [codegen id : 21] +Output [5]: [store AS channel#104, item#99, return_ratio#100, return_rank#102, currency_rank#103] +Input [5]: [item#99, return_ratio#100, currency_ratio#101, return_rank#102, currency_rank#103] + +(77) Union + +(78) HashAggregate [codegen id : 22] +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Keys [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + +(79) Exchange +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Arguments: hashpartitioning(channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(80) HashAggregate [codegen id : 23] +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Keys [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + +(81) TakeOrderedAndProject +Input [5]: [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] +Arguments: 100, [channel#36 ASC NULLS FIRST, return_rank#34 ASC NULLS FIRST, currency_rank#35 ASC NULLS FIRST, item#31 ASC NULLS FIRST], [channel#36, item#31, return_ratio#32, return_rank#34, currency_rank#35] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..018748b274 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.native_iceberg_compat/simplified.txt @@ -0,0 +1,129 @@ +TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] + WholeStageCodegen (23) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 + WholeStageCodegen (22) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Union + WholeStageCodegen (7) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (6) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (5) + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen (4) + HashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(wr_return_quantity, 0)),sum(coalesce(ws_quantity, 0)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_quantity,ws_net_paid,ws_sold_date_sk,wr_return_quantity,wr_return_amt] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_sold_date_sk] + Filter [ws_net_profit,ws_net_paid,ws_quantity,ws_order_number,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk] + Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Filter [wr_return_amt,wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (14) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (13) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (12) + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen (11) + HashAggregate [cs_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(cr_return_quantity, 0)),sum(coalesce(cs_quantity, 0)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen (10) + HashAggregate [cs_item_sk,cr_return_quantity,cs_quantity,cr_return_amount,cs_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [cs_item_sk,cs_quantity,cs_net_paid,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_quantity,cs_net_paid,cs_sold_date_sk,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_sold_date_sk] + Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_order_number,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk] + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_return_amount,cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (21) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (20) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (19) + Sort [return_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen (18) + HashAggregate [ss_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(coalesce(sr_return_quantity, 0)),sum(coalesce(ss_quantity, 0)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (17) + HashAggregate [ss_item_sk,sr_return_quantity,ss_quantity,sr_return_amt,ss_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [ss_item_sk,ss_quantity,ss_net_paid,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_quantity,ss_net_paid,ss_sold_date_sk,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (15) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_sold_date_sk] + Filter [ss_net_profit,ss_net_paid,ss_quantity,ss_ticket_number,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk] + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_return_amt,sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_datafusion/explain.txt new file mode 100644 index 0000000000..40f1f3e69c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_datafusion/explain.txt @@ -0,0 +1,404 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Filter (70) + +- * HashAggregate (69) + +- * HashAggregate (68) + +- * Project (67) + +- * BroadcastHashJoin Inner BuildRight (66) + :- Window (60) + : +- * Sort (59) + : +- Exchange (58) + : +- * Project (57) + : +- * Filter (56) + : +- * SortMergeJoin FullOuter (55) + : :- * Sort (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- Exchange (26) + : : +- * HashAggregate (25) + : : +- * Project (24) + : : +- * BroadcastHashJoin Inner BuildRight (23) + : : :- * Project (17) + : : : +- Window (16) + : : : +- * Sort (15) + : : : +- Exchange (14) + : : : +- * HashAggregate (13) + : : : +- Exchange (12) + : : : +- * HashAggregate (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (22) + : : +- * Project (21) + : : +- Window (20) + : : +- * Sort (19) + : : +- ReusedExchange (18) + : +- * Sort (54) + : +- Exchange (53) + : +- * HashAggregate (52) + : +- Exchange (51) + : +- * HashAggregate (50) + : +- * Project (49) + : +- * BroadcastHashJoin Inner BuildRight (48) + : :- * Project (42) + : : +- Window (41) + : : +- * Sort (40) + : : +- Exchange (39) + : : +- * HashAggregate (38) + : : +- Exchange (37) + : : +- * HashAggregate (36) + : : +- * Project (35) + : : +- * BroadcastHashJoin Inner BuildRight (34) + : : :- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet spark_catalog.default.store_sales (30) + : : +- ReusedExchange (33) + : +- BroadcastExchange (47) + : +- * Project (46) + : +- Window (45) + : +- * Sort (44) + : +- ReusedExchange (43) + +- BroadcastExchange (65) + +- * Project (64) + +- Window (63) + +- * Sort (62) + +- ReusedExchange (61) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#4, d_date#5] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(8) BroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Input [5]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3, d_date_sk#4, d_date#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#2))] +Aggregate Attributes [1]: [sum#7] +Results [3]: [ws_item_sk#1, d_date#5, sum#8] + +(12) Exchange +Input [3]: [ws_item_sk#1, d_date#5, sum#8] +Arguments: hashpartitioning(ws_item_sk#1, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(13) HashAggregate [codegen id : 3] +Input [3]: [ws_item_sk#1, d_date#5, sum#8] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#9] +Results [4]: [ws_item_sk#1 AS item_sk#10, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#9,17,2) AS sumws#11, ws_item_sk#1] + +(14) Exchange +Input [4]: [item_sk#10, d_date#5, sumws#11, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(15) Sort [codegen id : 4] +Input [4]: [item_sk#10, d_date#5, sumws#11, ws_item_sk#1] +Arguments: [ws_item_sk#1 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(16) Window +Input [4]: [item_sk#10, d_date#5, sumws#11, ws_item_sk#1] +Arguments: [row_number() windowspecdefinition(ws_item_sk#1, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#12], [ws_item_sk#1], [d_date#5 ASC NULLS FIRST] + +(17) Project [codegen id : 10] +Output [4]: [item_sk#10, d_date#5, sumws#11, rk#12] +Input [5]: [item_sk#10, d_date#5, sumws#11, ws_item_sk#1, rk#12] + +(18) ReusedExchange [Reuses operator id: 14] +Output [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] + +(19) Sort [codegen id : 8] +Input [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] +Arguments: [ws_item_sk#14 ASC NULLS FIRST, d_date#13 ASC NULLS FIRST], false, 0 + +(20) Window +Input [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] +Arguments: [row_number() windowspecdefinition(ws_item_sk#14, d_date#13 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#15], [ws_item_sk#14], [d_date#13 ASC NULLS FIRST] + +(21) Project [codegen id : 9] +Output [3]: [item_sk#10 AS item_sk#16, sumws#11 AS sumws#17, rk#15] +Input [5]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14, rk#15] + +(22) BroadcastExchange +Input [3]: [item_sk#16, sumws#17, rk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(23) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [item_sk#10] +Right keys [1]: [item_sk#16] +Join type: Inner +Join condition: (rk#12 >= rk#15) + +(24) Project [codegen id : 10] +Output [4]: [item_sk#10, d_date#5, sumws#11, sumws#17] +Input [7]: [item_sk#10, d_date#5, sumws#11, rk#12, item_sk#16, sumws#17, rk#15] + +(25) HashAggregate [codegen id : 10] +Input [4]: [item_sk#10, d_date#5, sumws#11, sumws#17] +Keys [3]: [item_sk#10, d_date#5, sumws#11] +Functions [1]: [partial_sum(sumws#17)] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [5]: [item_sk#10, d_date#5, sumws#11, sum#20, isEmpty#21] + +(26) Exchange +Input [5]: [item_sk#10, d_date#5, sumws#11, sum#20, isEmpty#21] +Arguments: hashpartitioning(item_sk#10, d_date#5, sumws#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(27) HashAggregate [codegen id : 11] +Input [5]: [item_sk#10, d_date#5, sumws#11, sum#20, isEmpty#21] +Keys [3]: [item_sk#10, d_date#5, sumws#11] +Functions [1]: [sum(sumws#17)] +Aggregate Attributes [1]: [sum(sumws#17)#22] +Results [3]: [item_sk#10, d_date#5, sum(sumws#17)#22 AS cume_sales#23] + +(28) Exchange +Input [3]: [item_sk#10, d_date#5, cume_sales#23] +Arguments: hashpartitioning(item_sk#10, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(29) Sort [codegen id : 12] +Input [3]: [item_sk#10, d_date#5, cume_sales#23] +Arguments: [item_sk#10 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(30) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#26)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 14] +Input [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] + +(32) Filter [codegen id : 14] +Input [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] +Condition : isnotnull(ss_item_sk#24) + +(33) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#27, d_date#28] + +(34) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_sold_date_sk#26] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 14] +Output [3]: [ss_item_sk#24, ss_sales_price#25, d_date#28] +Input [5]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26, d_date_sk#27, d_date#28] + +(36) HashAggregate [codegen id : 14] +Input [3]: [ss_item_sk#24, ss_sales_price#25, d_date#28] +Keys [2]: [ss_item_sk#24, d_date#28] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum#29] +Results [3]: [ss_item_sk#24, d_date#28, sum#30] + +(37) Exchange +Input [3]: [ss_item_sk#24, d_date#28, sum#30] +Arguments: hashpartitioning(ss_item_sk#24, d_date#28, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(38) HashAggregate [codegen id : 15] +Input [3]: [ss_item_sk#24, d_date#28, sum#30] +Keys [2]: [ss_item_sk#24, d_date#28] +Functions [1]: [sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#25))#31] +Results [4]: [ss_item_sk#24 AS item_sk#32, d_date#28, MakeDecimal(sum(UnscaledValue(ss_sales_price#25))#31,17,2) AS sumss#33, ss_item_sk#24] + +(39) Exchange +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: hashpartitioning(ss_item_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(40) Sort [codegen id : 16] +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: [ss_item_sk#24 ASC NULLS FIRST, d_date#28 ASC NULLS FIRST], false, 0 + +(41) Window +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: [row_number() windowspecdefinition(ss_item_sk#24, d_date#28 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#34], [ss_item_sk#24], [d_date#28 ASC NULLS FIRST] + +(42) Project [codegen id : 22] +Output [4]: [item_sk#32, d_date#28, sumss#33, rk#34] +Input [5]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24, rk#34] + +(43) ReusedExchange [Reuses operator id: 39] +Output [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] + +(44) Sort [codegen id : 20] +Input [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] +Arguments: [ss_item_sk#36 ASC NULLS FIRST, d_date#35 ASC NULLS FIRST], false, 0 + +(45) Window +Input [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] +Arguments: [row_number() windowspecdefinition(ss_item_sk#36, d_date#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#37], [ss_item_sk#36], [d_date#35 ASC NULLS FIRST] + +(46) Project [codegen id : 21] +Output [3]: [item_sk#32 AS item_sk#38, sumss#33 AS sumss#39, rk#37] +Input [5]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36, rk#37] + +(47) BroadcastExchange +Input [3]: [item_sk#38, sumss#39, rk#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] + +(48) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [item_sk#32] +Right keys [1]: [item_sk#38] +Join type: Inner +Join condition: (rk#34 >= rk#37) + +(49) Project [codegen id : 22] +Output [4]: [item_sk#32, d_date#28, sumss#33, sumss#39] +Input [7]: [item_sk#32, d_date#28, sumss#33, rk#34, item_sk#38, sumss#39, rk#37] + +(50) HashAggregate [codegen id : 22] +Input [4]: [item_sk#32, d_date#28, sumss#33, sumss#39] +Keys [3]: [item_sk#32, d_date#28, sumss#33] +Functions [1]: [partial_sum(sumss#39)] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] + +(51) Exchange +Input [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] +Arguments: hashpartitioning(item_sk#32, d_date#28, sumss#33, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(52) HashAggregate [codegen id : 23] +Input [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] +Keys [3]: [item_sk#32, d_date#28, sumss#33] +Functions [1]: [sum(sumss#39)] +Aggregate Attributes [1]: [sum(sumss#39)#44] +Results [3]: [item_sk#32, d_date#28, sum(sumss#39)#44 AS cume_sales#45] + +(53) Exchange +Input [3]: [item_sk#32, d_date#28, cume_sales#45] +Arguments: hashpartitioning(item_sk#32, d_date#28, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(54) Sort [codegen id : 24] +Input [3]: [item_sk#32, d_date#28, cume_sales#45] +Arguments: [item_sk#32 ASC NULLS FIRST, d_date#28 ASC NULLS FIRST], false, 0 + +(55) SortMergeJoin [codegen id : 25] +Left keys [2]: [item_sk#10, d_date#5] +Right keys [2]: [item_sk#32, d_date#28] +Join type: FullOuter +Join condition: None + +(56) Filter [codegen id : 25] +Input [6]: [item_sk#10, d_date#5, cume_sales#23, item_sk#32, d_date#28, cume_sales#45] +Condition : isnotnull(CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#32 END) + +(57) Project [codegen id : 25] +Output [4]: [CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#32 END AS item_sk#46, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#28 END AS d_date#47, cume_sales#23 AS web_sales#48, cume_sales#45 AS store_sales#49] +Input [6]: [item_sk#10, d_date#5, cume_sales#23, item_sk#32, d_date#28, cume_sales#45] + +(58) Exchange +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: hashpartitioning(item_sk#46, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(59) Sort [codegen id : 26] +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], false, 0 + +(60) Window +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [row_number() windowspecdefinition(item_sk#46, d_date#47 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#50], [item_sk#46], [d_date#47 ASC NULLS FIRST] + +(61) ReusedExchange [Reuses operator id: 58] +Output [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] + +(62) Sort [codegen id : 52] +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], false, 0 + +(63) Window +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [row_number() windowspecdefinition(item_sk#46, d_date#47 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#51], [item_sk#46], [d_date#47 ASC NULLS FIRST] + +(64) Project [codegen id : 53] +Output [4]: [item_sk#46 AS item_sk#52, web_sales#48 AS web_sales#53, store_sales#49 AS store_sales#54, rk#51] +Input [5]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, rk#51] + +(65) BroadcastExchange +Input [4]: [item_sk#52, web_sales#53, store_sales#54, rk#51] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] + +(66) BroadcastHashJoin [codegen id : 54] +Left keys [1]: [item_sk#46] +Right keys [1]: [item_sk#52] +Join type: Inner +Join condition: (rk#50 >= rk#51) + +(67) Project [codegen id : 54] +Output [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_sales#53, store_sales#54] +Input [9]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, rk#50, item_sk#52, web_sales#53, store_sales#54, rk#51] + +(68) HashAggregate [codegen id : 54] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_sales#53, store_sales#54] +Keys [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Functions [2]: [partial_max(web_sales#53), partial_max(store_sales#54)] +Aggregate Attributes [2]: [max#55, max#56] +Results [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max#57, max#58] + +(69) HashAggregate [codegen id : 54] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max#57, max#58] +Keys [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Functions [2]: [max(web_sales#53), max(store_sales#54)] +Aggregate Attributes [2]: [max(web_sales#53)#59, max(store_sales#54)#60] +Results [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max(web_sales#53)#59 AS web_cumulative#61, max(store_sales#54)#60 AS store_cumulative#62] + +(70) Filter [codegen id : 54] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] +Condition : ((isnotnull(web_cumulative#61) AND isnotnull(store_cumulative#62)) AND (web_cumulative#61 > store_cumulative#62)) + +(71) TakeOrderedAndProject +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] +Arguments: 100, [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..6256c0684c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_datafusion/simplified.txt @@ -0,0 +1,121 @@ +TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] + WholeStageCodegen (54) + Filter [web_cumulative,store_cumulative] + HashAggregate [item_sk,d_date,web_sales,store_sales,max,max] [max(web_sales),max(store_sales),web_cumulative,store_cumulative,max,max] + HashAggregate [item_sk,d_date,web_sales,store_sales,web_sales,store_sales] [max,max,max,max] + Project [item_sk,d_date,web_sales,store_sales,web_sales,store_sales] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + InputAdapter + Window [item_sk,d_date] + WholeStageCodegen (26) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (25) + Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] + Filter [item_sk,item_sk] + SortMergeJoin [item_sk,d_date,item_sk,d_date] + InputAdapter + WholeStageCodegen (12) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen (11) + HashAggregate [item_sk,d_date,sumws,sum,isEmpty] [sum(sumws),cume_sales,sum,isEmpty] + InputAdapter + Exchange [item_sk,d_date,sumws] #3 + WholeStageCodegen (10) + HashAggregate [item_sk,d_date,sumws,sumws] [sum,isEmpty,sum,isEmpty] + Project [item_sk,d_date,sumws,sumws] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + Project [item_sk,d_date,sumws,rk] + InputAdapter + Window [ws_item_sk,d_date] + WholeStageCodegen (4) + Sort [ws_item_sk,d_date] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,d_date,sum] [sum(UnscaledValue(ws_sales_price)),item_sk,sumws,sum] + InputAdapter + Exchange [ws_item_sk,d_date] #5 + WholeStageCodegen (2) + HashAggregate [ws_item_sk,d_date,ws_sales_price] [sum,sum] + Project [ws_item_sk,ws_sales_price,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Project [item_sk,sumws,rk] + InputAdapter + Window [ws_item_sk,d_date] + WholeStageCodegen (8) + Sort [ws_item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,sumws,ws_item_sk] #4 + InputAdapter + WholeStageCodegen (24) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #8 + WholeStageCodegen (23) + HashAggregate [item_sk,d_date,sumss,sum,isEmpty] [sum(sumss),cume_sales,sum,isEmpty] + InputAdapter + Exchange [item_sk,d_date,sumss] #9 + WholeStageCodegen (22) + HashAggregate [item_sk,d_date,sumss,sumss] [sum,isEmpty,sum,isEmpty] + Project [item_sk,d_date,sumss,sumss] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + Project [item_sk,d_date,sumss,rk] + InputAdapter + Window [ss_item_sk,d_date] + WholeStageCodegen (16) + Sort [ss_item_sk,d_date] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (15) + HashAggregate [ss_item_sk,d_date,sum] [sum(UnscaledValue(ss_sales_price)),item_sk,sumss,sum] + InputAdapter + Exchange [ss_item_sk,d_date] #11 + WholeStageCodegen (14) + HashAggregate [ss_item_sk,d_date,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_sales_price,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #6 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (21) + Project [item_sk,sumss,rk] + InputAdapter + Window [ss_item_sk,d_date] + WholeStageCodegen (20) + Sort [ss_item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,sumss,ss_item_sk] #10 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (53) + Project [item_sk,web_sales,store_sales,rk] + InputAdapter + Window [item_sk,d_date] + WholeStageCodegen (52) + Sort [item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,web_sales,store_sales] #1 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..40f1f3e69c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_iceberg_compat/explain.txt @@ -0,0 +1,404 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Filter (70) + +- * HashAggregate (69) + +- * HashAggregate (68) + +- * Project (67) + +- * BroadcastHashJoin Inner BuildRight (66) + :- Window (60) + : +- * Sort (59) + : +- Exchange (58) + : +- * Project (57) + : +- * Filter (56) + : +- * SortMergeJoin FullOuter (55) + : :- * Sort (29) + : : +- Exchange (28) + : : +- * HashAggregate (27) + : : +- Exchange (26) + : : +- * HashAggregate (25) + : : +- * Project (24) + : : +- * BroadcastHashJoin Inner BuildRight (23) + : : :- * Project (17) + : : : +- Window (16) + : : : +- * Sort (15) + : : : +- Exchange (14) + : : : +- * HashAggregate (13) + : : : +- Exchange (12) + : : : +- * HashAggregate (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (22) + : : +- * Project (21) + : : +- Window (20) + : : +- * Sort (19) + : : +- ReusedExchange (18) + : +- * Sort (54) + : +- Exchange (53) + : +- * HashAggregate (52) + : +- Exchange (51) + : +- * HashAggregate (50) + : +- * Project (49) + : +- * BroadcastHashJoin Inner BuildRight (48) + : :- * Project (42) + : : +- Window (41) + : : +- * Sort (40) + : : +- Exchange (39) + : : +- * HashAggregate (38) + : : +- Exchange (37) + : : +- * HashAggregate (36) + : : +- * Project (35) + : : +- * BroadcastHashJoin Inner BuildRight (34) + : : :- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet spark_catalog.default.store_sales (30) + : : +- ReusedExchange (33) + : +- BroadcastExchange (47) + : +- * Project (46) + : +- Window (45) + : +- * Sort (44) + : +- ReusedExchange (43) + +- BroadcastExchange (65) + +- * Project (64) + +- Window (63) + +- * Sort (62) + +- ReusedExchange (61) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1212)) AND (d_month_seq#6 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#4, d_date#5] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(8) BroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Input [5]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3, d_date_sk#4, d_date#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#1, ws_sales_price#2, d_date#5] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#2))] +Aggregate Attributes [1]: [sum#7] +Results [3]: [ws_item_sk#1, d_date#5, sum#8] + +(12) Exchange +Input [3]: [ws_item_sk#1, d_date#5, sum#8] +Arguments: hashpartitioning(ws_item_sk#1, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(13) HashAggregate [codegen id : 3] +Input [3]: [ws_item_sk#1, d_date#5, sum#8] +Keys [2]: [ws_item_sk#1, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#9] +Results [4]: [ws_item_sk#1 AS item_sk#10, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#9,17,2) AS sumws#11, ws_item_sk#1] + +(14) Exchange +Input [4]: [item_sk#10, d_date#5, sumws#11, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(15) Sort [codegen id : 4] +Input [4]: [item_sk#10, d_date#5, sumws#11, ws_item_sk#1] +Arguments: [ws_item_sk#1 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(16) Window +Input [4]: [item_sk#10, d_date#5, sumws#11, ws_item_sk#1] +Arguments: [row_number() windowspecdefinition(ws_item_sk#1, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#12], [ws_item_sk#1], [d_date#5 ASC NULLS FIRST] + +(17) Project [codegen id : 10] +Output [4]: [item_sk#10, d_date#5, sumws#11, rk#12] +Input [5]: [item_sk#10, d_date#5, sumws#11, ws_item_sk#1, rk#12] + +(18) ReusedExchange [Reuses operator id: 14] +Output [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] + +(19) Sort [codegen id : 8] +Input [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] +Arguments: [ws_item_sk#14 ASC NULLS FIRST, d_date#13 ASC NULLS FIRST], false, 0 + +(20) Window +Input [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] +Arguments: [row_number() windowspecdefinition(ws_item_sk#14, d_date#13 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#15], [ws_item_sk#14], [d_date#13 ASC NULLS FIRST] + +(21) Project [codegen id : 9] +Output [3]: [item_sk#10 AS item_sk#16, sumws#11 AS sumws#17, rk#15] +Input [5]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14, rk#15] + +(22) BroadcastExchange +Input [3]: [item_sk#16, sumws#17, rk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(23) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [item_sk#10] +Right keys [1]: [item_sk#16] +Join type: Inner +Join condition: (rk#12 >= rk#15) + +(24) Project [codegen id : 10] +Output [4]: [item_sk#10, d_date#5, sumws#11, sumws#17] +Input [7]: [item_sk#10, d_date#5, sumws#11, rk#12, item_sk#16, sumws#17, rk#15] + +(25) HashAggregate [codegen id : 10] +Input [4]: [item_sk#10, d_date#5, sumws#11, sumws#17] +Keys [3]: [item_sk#10, d_date#5, sumws#11] +Functions [1]: [partial_sum(sumws#17)] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [5]: [item_sk#10, d_date#5, sumws#11, sum#20, isEmpty#21] + +(26) Exchange +Input [5]: [item_sk#10, d_date#5, sumws#11, sum#20, isEmpty#21] +Arguments: hashpartitioning(item_sk#10, d_date#5, sumws#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(27) HashAggregate [codegen id : 11] +Input [5]: [item_sk#10, d_date#5, sumws#11, sum#20, isEmpty#21] +Keys [3]: [item_sk#10, d_date#5, sumws#11] +Functions [1]: [sum(sumws#17)] +Aggregate Attributes [1]: [sum(sumws#17)#22] +Results [3]: [item_sk#10, d_date#5, sum(sumws#17)#22 AS cume_sales#23] + +(28) Exchange +Input [3]: [item_sk#10, d_date#5, cume_sales#23] +Arguments: hashpartitioning(item_sk#10, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(29) Sort [codegen id : 12] +Input [3]: [item_sk#10, d_date#5, cume_sales#23] +Arguments: [item_sk#10 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(30) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#26)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 14] +Input [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] + +(32) Filter [codegen id : 14] +Input [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] +Condition : isnotnull(ss_item_sk#24) + +(33) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#27, d_date#28] + +(34) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_sold_date_sk#26] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 14] +Output [3]: [ss_item_sk#24, ss_sales_price#25, d_date#28] +Input [5]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26, d_date_sk#27, d_date#28] + +(36) HashAggregate [codegen id : 14] +Input [3]: [ss_item_sk#24, ss_sales_price#25, d_date#28] +Keys [2]: [ss_item_sk#24, d_date#28] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum#29] +Results [3]: [ss_item_sk#24, d_date#28, sum#30] + +(37) Exchange +Input [3]: [ss_item_sk#24, d_date#28, sum#30] +Arguments: hashpartitioning(ss_item_sk#24, d_date#28, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(38) HashAggregate [codegen id : 15] +Input [3]: [ss_item_sk#24, d_date#28, sum#30] +Keys [2]: [ss_item_sk#24, d_date#28] +Functions [1]: [sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#25))#31] +Results [4]: [ss_item_sk#24 AS item_sk#32, d_date#28, MakeDecimal(sum(UnscaledValue(ss_sales_price#25))#31,17,2) AS sumss#33, ss_item_sk#24] + +(39) Exchange +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: hashpartitioning(ss_item_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(40) Sort [codegen id : 16] +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: [ss_item_sk#24 ASC NULLS FIRST, d_date#28 ASC NULLS FIRST], false, 0 + +(41) Window +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: [row_number() windowspecdefinition(ss_item_sk#24, d_date#28 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#34], [ss_item_sk#24], [d_date#28 ASC NULLS FIRST] + +(42) Project [codegen id : 22] +Output [4]: [item_sk#32, d_date#28, sumss#33, rk#34] +Input [5]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24, rk#34] + +(43) ReusedExchange [Reuses operator id: 39] +Output [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] + +(44) Sort [codegen id : 20] +Input [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] +Arguments: [ss_item_sk#36 ASC NULLS FIRST, d_date#35 ASC NULLS FIRST], false, 0 + +(45) Window +Input [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] +Arguments: [row_number() windowspecdefinition(ss_item_sk#36, d_date#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#37], [ss_item_sk#36], [d_date#35 ASC NULLS FIRST] + +(46) Project [codegen id : 21] +Output [3]: [item_sk#32 AS item_sk#38, sumss#33 AS sumss#39, rk#37] +Input [5]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36, rk#37] + +(47) BroadcastExchange +Input [3]: [item_sk#38, sumss#39, rk#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] + +(48) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [item_sk#32] +Right keys [1]: [item_sk#38] +Join type: Inner +Join condition: (rk#34 >= rk#37) + +(49) Project [codegen id : 22] +Output [4]: [item_sk#32, d_date#28, sumss#33, sumss#39] +Input [7]: [item_sk#32, d_date#28, sumss#33, rk#34, item_sk#38, sumss#39, rk#37] + +(50) HashAggregate [codegen id : 22] +Input [4]: [item_sk#32, d_date#28, sumss#33, sumss#39] +Keys [3]: [item_sk#32, d_date#28, sumss#33] +Functions [1]: [partial_sum(sumss#39)] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] + +(51) Exchange +Input [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] +Arguments: hashpartitioning(item_sk#32, d_date#28, sumss#33, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(52) HashAggregate [codegen id : 23] +Input [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] +Keys [3]: [item_sk#32, d_date#28, sumss#33] +Functions [1]: [sum(sumss#39)] +Aggregate Attributes [1]: [sum(sumss#39)#44] +Results [3]: [item_sk#32, d_date#28, sum(sumss#39)#44 AS cume_sales#45] + +(53) Exchange +Input [3]: [item_sk#32, d_date#28, cume_sales#45] +Arguments: hashpartitioning(item_sk#32, d_date#28, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(54) Sort [codegen id : 24] +Input [3]: [item_sk#32, d_date#28, cume_sales#45] +Arguments: [item_sk#32 ASC NULLS FIRST, d_date#28 ASC NULLS FIRST], false, 0 + +(55) SortMergeJoin [codegen id : 25] +Left keys [2]: [item_sk#10, d_date#5] +Right keys [2]: [item_sk#32, d_date#28] +Join type: FullOuter +Join condition: None + +(56) Filter [codegen id : 25] +Input [6]: [item_sk#10, d_date#5, cume_sales#23, item_sk#32, d_date#28, cume_sales#45] +Condition : isnotnull(CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#32 END) + +(57) Project [codegen id : 25] +Output [4]: [CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#32 END AS item_sk#46, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#28 END AS d_date#47, cume_sales#23 AS web_sales#48, cume_sales#45 AS store_sales#49] +Input [6]: [item_sk#10, d_date#5, cume_sales#23, item_sk#32, d_date#28, cume_sales#45] + +(58) Exchange +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: hashpartitioning(item_sk#46, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(59) Sort [codegen id : 26] +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], false, 0 + +(60) Window +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [row_number() windowspecdefinition(item_sk#46, d_date#47 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#50], [item_sk#46], [d_date#47 ASC NULLS FIRST] + +(61) ReusedExchange [Reuses operator id: 58] +Output [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] + +(62) Sort [codegen id : 52] +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], false, 0 + +(63) Window +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [row_number() windowspecdefinition(item_sk#46, d_date#47 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#51], [item_sk#46], [d_date#47 ASC NULLS FIRST] + +(64) Project [codegen id : 53] +Output [4]: [item_sk#46 AS item_sk#52, web_sales#48 AS web_sales#53, store_sales#49 AS store_sales#54, rk#51] +Input [5]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, rk#51] + +(65) BroadcastExchange +Input [4]: [item_sk#52, web_sales#53, store_sales#54, rk#51] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] + +(66) BroadcastHashJoin [codegen id : 54] +Left keys [1]: [item_sk#46] +Right keys [1]: [item_sk#52] +Join type: Inner +Join condition: (rk#50 >= rk#51) + +(67) Project [codegen id : 54] +Output [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_sales#53, store_sales#54] +Input [9]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, rk#50, item_sk#52, web_sales#53, store_sales#54, rk#51] + +(68) HashAggregate [codegen id : 54] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_sales#53, store_sales#54] +Keys [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Functions [2]: [partial_max(web_sales#53), partial_max(store_sales#54)] +Aggregate Attributes [2]: [max#55, max#56] +Results [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max#57, max#58] + +(69) HashAggregate [codegen id : 54] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max#57, max#58] +Keys [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Functions [2]: [max(web_sales#53), max(store_sales#54)] +Aggregate Attributes [2]: [max(web_sales#53)#59, max(store_sales#54)#60] +Results [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max(web_sales#53)#59 AS web_cumulative#61, max(store_sales#54)#60 AS store_cumulative#62] + +(70) Filter [codegen id : 54] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] +Condition : ((isnotnull(web_cumulative#61) AND isnotnull(store_cumulative#62)) AND (web_cumulative#61 > store_cumulative#62)) + +(71) TakeOrderedAndProject +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] +Arguments: 100, [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..6256c0684c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.native_iceberg_compat/simplified.txt @@ -0,0 +1,121 @@ +TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] + WholeStageCodegen (54) + Filter [web_cumulative,store_cumulative] + HashAggregate [item_sk,d_date,web_sales,store_sales,max,max] [max(web_sales),max(store_sales),web_cumulative,store_cumulative,max,max] + HashAggregate [item_sk,d_date,web_sales,store_sales,web_sales,store_sales] [max,max,max,max] + Project [item_sk,d_date,web_sales,store_sales,web_sales,store_sales] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + InputAdapter + Window [item_sk,d_date] + WholeStageCodegen (26) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen (25) + Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] + Filter [item_sk,item_sk] + SortMergeJoin [item_sk,d_date,item_sk,d_date] + InputAdapter + WholeStageCodegen (12) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen (11) + HashAggregate [item_sk,d_date,sumws,sum,isEmpty] [sum(sumws),cume_sales,sum,isEmpty] + InputAdapter + Exchange [item_sk,d_date,sumws] #3 + WholeStageCodegen (10) + HashAggregate [item_sk,d_date,sumws,sumws] [sum,isEmpty,sum,isEmpty] + Project [item_sk,d_date,sumws,sumws] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + Project [item_sk,d_date,sumws,rk] + InputAdapter + Window [ws_item_sk,d_date] + WholeStageCodegen (4) + Sort [ws_item_sk,d_date] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,d_date,sum] [sum(UnscaledValue(ws_sales_price)),item_sk,sumws,sum] + InputAdapter + Exchange [ws_item_sk,d_date] #5 + WholeStageCodegen (2) + HashAggregate [ws_item_sk,d_date,ws_sales_price] [sum,sum] + Project [ws_item_sk,ws_sales_price,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (9) + Project [item_sk,sumws,rk] + InputAdapter + Window [ws_item_sk,d_date] + WholeStageCodegen (8) + Sort [ws_item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,sumws,ws_item_sk] #4 + InputAdapter + WholeStageCodegen (24) + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #8 + WholeStageCodegen (23) + HashAggregate [item_sk,d_date,sumss,sum,isEmpty] [sum(sumss),cume_sales,sum,isEmpty] + InputAdapter + Exchange [item_sk,d_date,sumss] #9 + WholeStageCodegen (22) + HashAggregate [item_sk,d_date,sumss,sumss] [sum,isEmpty,sum,isEmpty] + Project [item_sk,d_date,sumss,sumss] + BroadcastHashJoin [item_sk,item_sk,rk,rk] + Project [item_sk,d_date,sumss,rk] + InputAdapter + Window [ss_item_sk,d_date] + WholeStageCodegen (16) + Sort [ss_item_sk,d_date] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (15) + HashAggregate [ss_item_sk,d_date,sum] [sum(UnscaledValue(ss_sales_price)),item_sk,sumss,sum] + InputAdapter + Exchange [ss_item_sk,d_date] #11 + WholeStageCodegen (14) + HashAggregate [ss_item_sk,d_date,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_sales_price,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #6 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (21) + Project [item_sk,sumss,rk] + InputAdapter + Window [ss_item_sk,d_date] + WholeStageCodegen (20) + Sort [ss_item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,sumss,ss_item_sk] #10 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (53) + Project [item_sk,web_sales,store_sales,rk] + InputAdapter + Window [item_sk,d_date] + WholeStageCodegen (52) + Sort [item_sk,d_date] + InputAdapter + ReusedExchange [item_sk,d_date,web_sales,store_sales] #1 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_datafusion/explain.txt new file mode 100644 index 0000000000..1e582841be --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_datafusion/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (31) + : : +- * Filter (30) + : : +- Window (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.call_center (16) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- Window (36) + : +- * Sort (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- ReusedExchange (32) + +- BroadcastExchange (45) + +- * Project (44) + +- Window (43) + +- * Sort (42) + +- ReusedExchange (41) + + +(1) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#7)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) + +(7) BroadcastExchange +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] + +(16) Scan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#11, cc_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#11, cc_name#12] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#11, cc_name#12] +Condition : (isnotnull(cc_call_center_sk#11) AND isnotnull(cc_name#12)) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_call_center_sk#4] +Right keys [1]: [cc_call_center_sk#11] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10, cc_call_center_sk#11, cc_name#12] + +(22) HashAggregate [codegen id : 4] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] + +(23) Exchange +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) HashAggregate [codegen id : 5] +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#6))#15] +Results [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#15,17,2) AS sum_sales#16, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#15,17,2) AS _w0#17] + +(25) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(26) Sort [codegen id : 6] +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#3, i_brand#2, cc_name#12], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(28) Filter [codegen id : 7] +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(29) Window +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18] +Arguments: [avg(_w0#17) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#19], [i_category#3, i_brand#2, cc_name#12, d_year#9] + +(30) Filter [codegen id : 22] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] +Condition : ((isnotnull(avg_monthly_sales#19) AND (avg_monthly_sales#19 > 0.000000)) AND CASE WHEN (avg_monthly_sales#19 > 0.000000) THEN ((abs((sum_sales#16 - avg_monthly_sales#19)) / avg_monthly_sales#19) > 0.1000000000000000) END) + +(31) Project [codegen id : 22] +Output [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] + +(32) ReusedExchange [Reuses operator id: 23] +Output [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum#25] + +(33) HashAggregate [codegen id : 12] +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum#25] +Keys [5]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24] +Functions [1]: [sum(UnscaledValue(cs_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#26))#15] +Results [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, MakeDecimal(sum(UnscaledValue(cs_sales_price#26))#15,17,2) AS sum_sales#16] + +(34) Exchange +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: hashpartitioning(i_category#20, i_brand#21, cc_name#22, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 13] +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: [i_category#20 ASC NULLS FIRST, i_brand#21 ASC NULLS FIRST, cc_name#22 ASC NULLS FIRST, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST], false, 0 + +(36) Window +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: [rank(d_year#23, d_moy#24) windowspecdefinition(i_category#20, i_brand#21, cc_name#22, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#27], [i_category#20, i_brand#21, cc_name#22], [d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(37) Project [codegen id : 14] +Output [5]: [i_category#20, i_brand#21, cc_name#22, sum_sales#16 AS sum_sales#28, rn#27] +Input [7]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16, rn#27] + +(38) BroadcastExchange +Input [5]: [i_category#20, i_brand#21, cc_name#22, sum_sales#28, rn#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=7] + +(39) BroadcastHashJoin [codegen id : 22] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#18] +Right keys [4]: [i_category#20, i_brand#21, cc_name#22, (rn#27 + 1)] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 22] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#28] +Input [13]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, cc_name#22, sum_sales#28, rn#27] + +(41) ReusedExchange [Reuses operator id: 34] +Output [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] + +(42) Sort [codegen id : 20] +Input [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] +Arguments: [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, cc_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + +(43) Window +Input [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] +Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#29, i_brand#30, cc_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#34], [i_category#29, i_brand#30, cc_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + +(44) Project [codegen id : 21] +Output [5]: [i_category#29, i_brand#30, cc_name#31, sum_sales#16 AS sum_sales#35, rn#34] +Input [7]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16, rn#34] + +(45) BroadcastExchange +Input [5]: [i_category#29, i_brand#30, cc_name#31, sum_sales#35, rn#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=8] + +(46) BroadcastHashJoin [codegen id : 22] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#18] +Right keys [4]: [i_category#29, i_brand#30, cc_name#31, (rn#34 - 1)] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 22] +Output [8]: [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, sum_sales#28 AS psum#36, sum_sales#35 AS nsum#37] +Input [14]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#28, i_category#29, i_brand#30, cc_name#31, sum_sales#35, rn#34] + +(48) TakeOrderedAndProject +Input [8]: [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, d_year#9 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_datafusion/simplified.txt new file mode 100644 index 0000000000..89e676545d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_datafusion/simplified.txt @@ -0,0 +1,79 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_moy,psum,nsum] + WholeStageCodegen (22) + Project [i_category,i_brand,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,cc_name,d_year] + WholeStageCodegen (7) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,cc_name,d_year,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum] + Project [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk] + Filter [i_item_sk,i_category,i_brand] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [cs_item_sk,cs_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [cc_call_center_sk,cc_name] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (14) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (13) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #7 + WholeStageCodegen (12) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (21) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (20) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..1e582841be --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_iceberg_compat/explain.txt @@ -0,0 +1,269 @@ +== Physical Plan == +TakeOrderedAndProject (48) ++- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (31) + : : +- * Filter (30) + : : +- Window (29) + : : +- * Filter (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet spark_catalog.default.call_center (16) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- Window (36) + : +- * Sort (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- ReusedExchange (32) + +- BroadcastExchange (45) + +- * Project (44) + +- Window (43) + +- * Sort (42) + +- ReusedExchange (41) + + +(1) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#7)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) + +(7) BroadcastExchange +Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : ((((d_year#9 = 1999) OR ((d_year#9 = 1998) AND (d_moy#10 = 12))) OR ((d_year#9 = 2000) AND (d_moy#10 = 1))) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#8, d_year#9, d_moy#10] + +(16) Scan parquet spark_catalog.default.call_center +Output [2]: [cc_call_center_sk#11, cc_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#11, cc_name#12] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#11, cc_name#12] +Condition : (isnotnull(cc_call_center_sk#11) AND isnotnull(cc_name#12)) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#11, cc_name#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_call_center_sk#4] +Right keys [1]: [cc_call_center_sk#11] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#9, d_moy#10, cc_call_center_sk#11, cc_name#12] + +(22) HashAggregate [codegen id : 4] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#9, d_moy#10, cc_name#12] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] + +(23) Exchange +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) HashAggregate [codegen id : 5] +Input [6]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum#14] +Keys [5]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10] +Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#6))#15] +Results [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#15,17,2) AS sum_sales#16, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#15,17,2) AS _w0#17] + +(25) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#12, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(26) Sort [codegen id : 6] +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#12 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [7]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17] +Arguments: [rank(d_year#9, d_moy#10) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#3, i_brand#2, cc_name#12], [d_year#9 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST] + +(28) Filter [codegen id : 7] +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18] +Condition : (isnotnull(d_year#9) AND (d_year#9 = 1999)) + +(29) Window +Input [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18] +Arguments: [avg(_w0#17) windowspecdefinition(i_category#3, i_brand#2, cc_name#12, d_year#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#19], [i_category#3, i_brand#2, cc_name#12, d_year#9] + +(30) Filter [codegen id : 22] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] +Condition : ((isnotnull(avg_monthly_sales#19) AND (avg_monthly_sales#19 > 0.000000)) AND CASE WHEN (avg_monthly_sales#19 > 0.000000) THEN ((abs((sum_sales#16 - avg_monthly_sales#19)) / avg_monthly_sales#19) > 0.1000000000000000) END) + +(31) Project [codegen id : 22] +Output [8]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18] +Input [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] + +(32) ReusedExchange [Reuses operator id: 23] +Output [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum#25] + +(33) HashAggregate [codegen id : 12] +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum#25] +Keys [5]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24] +Functions [1]: [sum(UnscaledValue(cs_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#26))#15] +Results [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, MakeDecimal(sum(UnscaledValue(cs_sales_price#26))#15,17,2) AS sum_sales#16] + +(34) Exchange +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: hashpartitioning(i_category#20, i_brand#21, cc_name#22, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 13] +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: [i_category#20 ASC NULLS FIRST, i_brand#21 ASC NULLS FIRST, cc_name#22 ASC NULLS FIRST, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST], false, 0 + +(36) Window +Input [6]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16] +Arguments: [rank(d_year#23, d_moy#24) windowspecdefinition(i_category#20, i_brand#21, cc_name#22, d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#27], [i_category#20, i_brand#21, cc_name#22], [d_year#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST] + +(37) Project [codegen id : 14] +Output [5]: [i_category#20, i_brand#21, cc_name#22, sum_sales#16 AS sum_sales#28, rn#27] +Input [7]: [i_category#20, i_brand#21, cc_name#22, d_year#23, d_moy#24, sum_sales#16, rn#27] + +(38) BroadcastExchange +Input [5]: [i_category#20, i_brand#21, cc_name#22, sum_sales#28, rn#27] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=7] + +(39) BroadcastHashJoin [codegen id : 22] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#18] +Right keys [4]: [i_category#20, i_brand#21, cc_name#22, (rn#27 + 1)] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 22] +Output [9]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#28] +Input [13]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, cc_name#22, sum_sales#28, rn#27] + +(41) ReusedExchange [Reuses operator id: 34] +Output [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] + +(42) Sort [codegen id : 20] +Input [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] +Arguments: [i_category#29 ASC NULLS FIRST, i_brand#30 ASC NULLS FIRST, cc_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + +(43) Window +Input [6]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16] +Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#29, i_brand#30, cc_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#34], [i_category#29, i_brand#30, cc_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + +(44) Project [codegen id : 21] +Output [5]: [i_category#29, i_brand#30, cc_name#31, sum_sales#16 AS sum_sales#35, rn#34] +Input [7]: [i_category#29, i_brand#30, cc_name#31, d_year#32, d_moy#33, sum_sales#16, rn#34] + +(45) BroadcastExchange +Input [5]: [i_category#29, i_brand#30, cc_name#31, sum_sales#35, rn#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=8] + +(46) BroadcastHashJoin [codegen id : 22] +Left keys [4]: [i_category#3, i_brand#2, cc_name#12, rn#18] +Right keys [4]: [i_category#29, i_brand#30, cc_name#31, (rn#34 - 1)] +Join type: Inner +Join condition: None + +(47) Project [codegen id : 22] +Output [8]: [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, sum_sales#28 AS psum#36, sum_sales#35 AS nsum#37] +Input [14]: [i_category#3, i_brand#2, cc_name#12, d_year#9, d_moy#10, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#28, i_category#29, i_brand#30, cc_name#31, sum_sales#35, rn#34] + +(48) TakeOrderedAndProject +Input [8]: [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, d_year#9 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#9, d_moy#10, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..89e676545d --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.native_iceberg_compat/simplified.txt @@ -0,0 +1,79 @@ +TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_moy,psum,nsum] + WholeStageCodegen (22) + Project [i_category,i_brand,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_category,i_brand,cc_name,d_year] + WholeStageCodegen (7) + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,cc_name,d_year,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum] + Project [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [i_brand,i_category,cs_call_center_sk,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk] + Filter [i_item_sk,i_category,i_brand] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [cs_item_sk,cs_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [cc_call_center_sk,cc_name] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (14) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (13) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #7 + WholeStageCodegen (12) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (21) + Project [i_category,i_brand,cc_name,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (20) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #7 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_datafusion/explain.txt new file mode 100644 index 0000000000..c0cca65544 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_datafusion/explain.txt @@ -0,0 +1,533 @@ +== Physical Plan == +TakeOrderedAndProject (91) ++- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- Union (87) + :- * HashAggregate (76) + : +- Exchange (75) + : +- * HashAggregate (74) + : +- Union (73) + : :- * HashAggregate (25) + : : +- Exchange (24) + : : +- * HashAggregate (23) + : : +- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- Union (9) + : : : : :- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- * Project (8) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet spark_catalog.default.store_returns (5) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet spark_catalog.default.store (17) + : :- * HashAggregate (46) + : : +- Exchange (45) + : : +- * HashAggregate (44) + : : +- * Project (43) + : : +- * BroadcastHashJoin Inner BuildRight (42) + : : :- * Project (37) + : : : +- * BroadcastHashJoin Inner BuildRight (36) + : : : :- Union (34) + : : : : :- * Project (29) + : : : : : +- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (26) + : : : : +- * Project (33) + : : : : +- * Filter (32) + : : : : +- * ColumnarToRow (31) + : : : : +- Scan parquet spark_catalog.default.catalog_returns (30) + : : : +- ReusedExchange (35) + : : +- BroadcastExchange (41) + : : +- * Filter (40) + : : +- * ColumnarToRow (39) + : : +- Scan parquet spark_catalog.default.catalog_page (38) + : +- * HashAggregate (72) + : +- Exchange (71) + : +- * HashAggregate (70) + : +- * Project (69) + : +- * BroadcastHashJoin Inner BuildRight (68) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- Union (60) + : : : :- * Project (50) + : : : : +- * Filter (49) + : : : : +- * ColumnarToRow (48) + : : : : +- Scan parquet spark_catalog.default.web_sales (47) + : : : +- * Project (59) + : : : +- * BroadcastHashJoin Inner BuildLeft (58) + : : : :- BroadcastExchange (53) + : : : : +- * ColumnarToRow (52) + : : : : +- Scan parquet spark_catalog.default.web_returns (51) + : : : +- * Project (57) + : : : +- * Filter (56) + : : : +- * ColumnarToRow (55) + : : : +- Scan parquet spark_catalog.default.web_sales (54) + : : +- ReusedExchange (61) + : +- BroadcastExchange (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet spark_catalog.default.web_site (64) + :- * HashAggregate (81) + : +- Exchange (80) + : +- * HashAggregate (79) + : +- * HashAggregate (78) + : +- ReusedExchange (77) + +- * HashAggregate (86) + +- Exchange (85) + +- * HashAggregate (84) + +- * HashAggregate (83) + +- ReusedExchange (82) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(4) Project [codegen id : 1] +Output [6]: [ss_store_sk#1 AS store_sk#5, ss_sold_date_sk#4 AS date_sk#6, ss_ext_sales_price#2 AS sales_price#7, ss_net_profit#3 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(5) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#14)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(7) Filter [codegen id : 2] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#11) + +(8) Project [codegen id : 2] +Output [6]: [sr_store_sk#11 AS store_sk#15, sr_returned_date_sk#14 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#12 AS return_amt#19, sr_net_loss#13 AS net_loss#20] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(9) Union + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 1998-08-04)) AND (d_date#22 <= 1998-08-18)) AND isnotnull(d_date_sk#21)) + +(13) Project [codegen id : 3] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_date#22] + +(14) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [date_sk#6] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#23, s_store_id#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [2]: [s_store_sk#23, s_store_id#24] + +(19) Filter [codegen id : 4] +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(20) BroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [s_store_sk#23] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#23, s_store_id#24] + +(23) HashAggregate [codegen id : 5] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum#25, sum#26, sum#27, sum#28] +Results [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] + +(24) Exchange +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(25) HashAggregate [codegen id : 6] +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Keys [1]: [s_store_id#24] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#33, sum(UnscaledValue(return_amt#9))#34, sum(UnscaledValue(profit#8))#35, sum(UnscaledValue(net_loss#10))#36] +Results [5]: [store channel AS channel#37, concat(store, s_store_id#24) AS id#38, MakeDecimal(sum(UnscaledValue(sales_price#7))#33,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#9))#34,17,2) AS returns#40, (MakeDecimal(sum(UnscaledValue(profit#8))#35,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#10))#36,17,2)) AS profit#41] + +(26) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#45)] +PushedFilters: [IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(28) Filter [codegen id : 7] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : isnotnull(cs_catalog_page_sk#42) + +(29) Project [codegen id : 7] +Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(30) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#55)] +PushedFilters: [IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] + +(32) Filter [codegen id : 8] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Condition : isnotnull(cr_catalog_page_sk#52) + +(33) Project [codegen id : 8] +Output [6]: [cr_catalog_page_sk#52 AS page_sk#56, cr_returned_date_sk#55 AS date_sk#57, 0.00 AS sales_price#58, 0.00 AS profit#59, cr_return_amount#53 AS return_amt#60, cr_net_loss#54 AS net_loss#61] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] + +(34) Union + +(35) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#62] + +(36) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#47] +Right keys [1]: [d_date_sk#62] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 11] +Output [5]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51] +Input [7]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, d_date_sk#62] + +(38) Scan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] + +(40) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Condition : isnotnull(cp_catalog_page_sk#63) + +(41) BroadcastExchange +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [page_sk#46] +Right keys [1]: [cp_catalog_page_sk#63] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 11] +Output [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Input [7]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_sk#63, cp_catalog_page_id#64] + +(44) HashAggregate [codegen id : 11] +Input [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [partial_sum(UnscaledValue(sales_price#48)), partial_sum(UnscaledValue(return_amt#50)), partial_sum(UnscaledValue(profit#49)), partial_sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum#65, sum#66, sum#67, sum#68] +Results [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] + +(45) Exchange +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Arguments: hashpartitioning(cp_catalog_page_id#64, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(46) HashAggregate [codegen id : 12] +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76] +Results [5]: [catalog channel AS channel#77, concat(catalog_page, cp_catalog_page_id#64) AS id#78, MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#79, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#80, (MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2)) AS profit#81] + +(47) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#85)] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] + +(49) Filter [codegen id : 13] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Condition : isnotnull(ws_web_site_sk#82) + +(50) Project [codegen id : 13] +Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] + +(51) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#96)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 14] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] + +(53) BroadcastExchange +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [plan_id=6] + +(54) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(55) ColumnarToRow +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] + +(56) Filter +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Condition : ((isnotnull(ws_item_sk#97) AND isnotnull(ws_order_number#99)) AND isnotnull(ws_web_site_sk#98)) + +(57) Project +Output [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] + +(58) BroadcastHashJoin [codegen id : 15] +Left keys [2]: [wr_item_sk#92, wr_order_number#93] +Right keys [2]: [ws_item_sk#97, ws_order_number#99] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 15] +Output [6]: [ws_web_site_sk#98 AS wsr_web_site_sk#101, wr_returned_date_sk#96 AS date_sk#102, 0.00 AS sales_price#103, 0.00 AS profit#104, wr_return_amt#94 AS return_amt#105, wr_net_loss#95 AS net_loss#106] +Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96, ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] + +(60) Union + +(61) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#107] + +(62) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [date_sk#87] +Right keys [1]: [d_date_sk#107] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 18] +Output [5]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91] +Input [7]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, d_date_sk#107] + +(64) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#108, web_site_id#109] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 17] +Input [2]: [web_site_sk#108, web_site_id#109] + +(66) Filter [codegen id : 17] +Input [2]: [web_site_sk#108, web_site_id#109] +Condition : isnotnull(web_site_sk#108) + +(67) BroadcastExchange +Input [2]: [web_site_sk#108, web_site_id#109] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(68) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [wsr_web_site_sk#86] +Right keys [1]: [web_site_sk#108] +Join type: Inner +Join condition: None + +(69) Project [codegen id : 18] +Output [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Input [7]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_sk#108, web_site_id#109] + +(70) HashAggregate [codegen id : 18] +Input [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Keys [1]: [web_site_id#109] +Functions [4]: [partial_sum(UnscaledValue(sales_price#88)), partial_sum(UnscaledValue(return_amt#90)), partial_sum(UnscaledValue(profit#89)), partial_sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum#110, sum#111, sum#112, sum#113] +Results [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] + +(71) Exchange +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Arguments: hashpartitioning(web_site_id#109, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(72) HashAggregate [codegen id : 19] +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Keys [1]: [web_site_id#109] +Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121] +Results [5]: [web channel AS channel#122, concat(web_site, web_site_id#109) AS id#123, MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#124, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#125, (MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2)) AS profit#126] + +(73) Union + +(74) HashAggregate [codegen id : 20] +Input [5]: [channel#37, id#38, sales#39, returns#40, profit#41] +Keys [2]: [channel#37, id#38] +Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] +Aggregate Attributes [6]: [sum#127, isEmpty#128, sum#129, isEmpty#130, sum#131, isEmpty#132] +Results [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] + +(75) Exchange +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Arguments: hashpartitioning(channel#37, id#38, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(76) HashAggregate [codegen id : 21] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [5]: [channel#37, id#38, cast(sum(sales#39)#139 as decimal(37,2)) AS sales#142, cast(sum(returns#40)#140 as decimal(37,2)) AS returns#143, cast(sum(profit#41)#141 as decimal(38,2)) AS profit#144] + +(77) ReusedExchange [Reuses operator id: 75] +Output [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] + +(78) HashAggregate [codegen id : 42] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [4]: [channel#37, sum(sales#39)#139 AS sales#145, sum(returns#40)#140 AS returns#146, sum(profit#41)#141 AS profit#147] + +(79) HashAggregate [codegen id : 42] +Input [4]: [channel#37, sales#145, returns#146, profit#147] +Keys [1]: [channel#37] +Functions [3]: [partial_sum(sales#145), partial_sum(returns#146), partial_sum(profit#147)] +Aggregate Attributes [6]: [sum#148, isEmpty#149, sum#150, isEmpty#151, sum#152, isEmpty#153] +Results [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] + +(80) Exchange +Input [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +Arguments: hashpartitioning(channel#37, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(81) HashAggregate [codegen id : 43] +Input [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +Keys [1]: [channel#37] +Functions [3]: [sum(sales#145), sum(returns#146), sum(profit#147)] +Aggregate Attributes [3]: [sum(sales#145)#160, sum(returns#146)#161, sum(profit#147)#162] +Results [5]: [channel#37, null AS id#163, sum(sales#145)#160 AS sum(sales)#164, sum(returns#146)#161 AS sum(returns)#165, sum(profit#147)#162 AS sum(profit)#166] + +(82) ReusedExchange [Reuses operator id: 75] +Output [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] + +(83) HashAggregate [codegen id : 64] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [3]: [sum(sales#39)#139 AS sales#145, sum(returns#40)#140 AS returns#146, sum(profit#41)#141 AS profit#147] + +(84) HashAggregate [codegen id : 64] +Input [3]: [sales#145, returns#146, profit#147] +Keys: [] +Functions [3]: [partial_sum(sales#145), partial_sum(returns#146), partial_sum(profit#147)] +Aggregate Attributes [6]: [sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172] +Results [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] + +(85) Exchange +Input [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] + +(86) HashAggregate [codegen id : 65] +Input [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] +Keys: [] +Functions [3]: [sum(sales#145), sum(returns#146), sum(profit#147)] +Aggregate Attributes [3]: [sum(sales#145)#179, sum(returns#146)#180, sum(profit#147)#181] +Results [5]: [null AS channel#182, null AS id#183, sum(sales#145)#179 AS sum(sales)#184, sum(returns#146)#180 AS sum(returns)#185, sum(profit#147)#181 AS sum(profit)#186] + +(87) Union + +(88) HashAggregate [codegen id : 66] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Keys [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#37, id#38, sales#142, returns#143, profit#144] + +(89) Exchange +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Arguments: hashpartitioning(channel#37, id#38, sales#142, returns#143, profit#144, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(90) HashAggregate [codegen id : 67] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Keys [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#37, id#38, sales#142, returns#143, profit#144] + +(91) TakeOrderedAndProject +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Arguments: 100, [channel#37 ASC NULLS FIRST, id#38 ASC NULLS FIRST], [channel#37, id#38, sales#142, returns#143, profit#144] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a67882dfb4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_datafusion/simplified.txt @@ -0,0 +1,148 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (67) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (66) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (21) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (20) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_id] #3 + WholeStageCodegen (5) + HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] + Project [store_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + WholeStageCodegen (2) + Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] + Filter [sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + InputAdapter + Exchange [cp_catalog_page_id] #6 + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] + Project [page_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen (19) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + InputAdapter + Exchange [web_site_id] #8 + WholeStageCodegen (18) + HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] + Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (13) + Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] + Filter [ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + WholeStageCodegen (15) + Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (14) + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + Project [ws_item_sk,ws_web_site_sk,ws_order_number] + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] + WholeStageCodegen (43) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #11 + WholeStageCodegen (42) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (65) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #12 + WholeStageCodegen (64) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..c0cca65544 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_iceberg_compat/explain.txt @@ -0,0 +1,533 @@ +== Physical Plan == +TakeOrderedAndProject (91) ++- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- Union (87) + :- * HashAggregate (76) + : +- Exchange (75) + : +- * HashAggregate (74) + : +- Union (73) + : :- * HashAggregate (25) + : : +- Exchange (24) + : : +- * HashAggregate (23) + : : +- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- Union (9) + : : : : :- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- * Project (8) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet spark_catalog.default.store_returns (5) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet spark_catalog.default.store (17) + : :- * HashAggregate (46) + : : +- Exchange (45) + : : +- * HashAggregate (44) + : : +- * Project (43) + : : +- * BroadcastHashJoin Inner BuildRight (42) + : : :- * Project (37) + : : : +- * BroadcastHashJoin Inner BuildRight (36) + : : : :- Union (34) + : : : : :- * Project (29) + : : : : : +- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (26) + : : : : +- * Project (33) + : : : : +- * Filter (32) + : : : : +- * ColumnarToRow (31) + : : : : +- Scan parquet spark_catalog.default.catalog_returns (30) + : : : +- ReusedExchange (35) + : : +- BroadcastExchange (41) + : : +- * Filter (40) + : : +- * ColumnarToRow (39) + : : +- Scan parquet spark_catalog.default.catalog_page (38) + : +- * HashAggregate (72) + : +- Exchange (71) + : +- * HashAggregate (70) + : +- * Project (69) + : +- * BroadcastHashJoin Inner BuildRight (68) + : :- * Project (63) + : : +- * BroadcastHashJoin Inner BuildRight (62) + : : :- Union (60) + : : : :- * Project (50) + : : : : +- * Filter (49) + : : : : +- * ColumnarToRow (48) + : : : : +- Scan parquet spark_catalog.default.web_sales (47) + : : : +- * Project (59) + : : : +- * BroadcastHashJoin Inner BuildLeft (58) + : : : :- BroadcastExchange (53) + : : : : +- * ColumnarToRow (52) + : : : : +- Scan parquet spark_catalog.default.web_returns (51) + : : : +- * Project (57) + : : : +- * Filter (56) + : : : +- * ColumnarToRow (55) + : : : +- Scan parquet spark_catalog.default.web_sales (54) + : : +- ReusedExchange (61) + : +- BroadcastExchange (67) + : +- * Filter (66) + : +- * ColumnarToRow (65) + : +- Scan parquet spark_catalog.default.web_site (64) + :- * HashAggregate (81) + : +- Exchange (80) + : +- * HashAggregate (79) + : +- * HashAggregate (78) + : +- ReusedExchange (77) + +- * HashAggregate (86) + +- Exchange (85) + +- * HashAggregate (84) + +- * HashAggregate (83) + +- ReusedExchange (82) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(4) Project [codegen id : 1] +Output [6]: [ss_store_sk#1 AS store_sk#5, ss_sold_date_sk#4 AS date_sk#6, ss_ext_sales_price#2 AS sales_price#7, ss_net_profit#3 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(5) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#14)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(7) Filter [codegen id : 2] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Condition : isnotnull(sr_store_sk#11) + +(8) Project [codegen id : 2] +Output [6]: [sr_store_sk#11 AS store_sk#15, sr_returned_date_sk#14 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#12 AS return_amt#19, sr_net_loss#13 AS net_loss#20] +Input [4]: [sr_store_sk#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] + +(9) Union + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 1998-08-04)) AND (d_date#22 <= 1998-08-18)) AND isnotnull(d_date_sk#21)) + +(13) Project [codegen id : 3] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_date#22] + +(14) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [date_sk#6] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#23, s_store_id#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [2]: [s_store_sk#23, s_store_id#24] + +(19) Filter [codegen id : 4] +Input [2]: [s_store_sk#23, s_store_id#24] +Condition : isnotnull(s_store_sk#23) + +(20) BroadcastExchange +Input [2]: [s_store_sk#23, s_store_id#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [s_store_sk#23] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#23, s_store_id#24] + +(23) HashAggregate [codegen id : 5] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#24] +Keys [1]: [s_store_id#24] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum#25, sum#26, sum#27, sum#28] +Results [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] + +(24) Exchange +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(25) HashAggregate [codegen id : 6] +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Keys [1]: [s_store_id#24] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#33, sum(UnscaledValue(return_amt#9))#34, sum(UnscaledValue(profit#8))#35, sum(UnscaledValue(net_loss#10))#36] +Results [5]: [store channel AS channel#37, concat(store, s_store_id#24) AS id#38, MakeDecimal(sum(UnscaledValue(sales_price#7))#33,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#9))#34,17,2) AS returns#40, (MakeDecimal(sum(UnscaledValue(profit#8))#35,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#10))#36,17,2)) AS profit#41] + +(26) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#45)] +PushedFilters: [IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(28) Filter [codegen id : 7] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : isnotnull(cs_catalog_page_sk#42) + +(29) Project [codegen id : 7] +Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(30) Scan parquet spark_catalog.default.catalog_returns +Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#55)] +PushedFilters: [IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] + +(32) Filter [codegen id : 8] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Condition : isnotnull(cr_catalog_page_sk#52) + +(33) Project [codegen id : 8] +Output [6]: [cr_catalog_page_sk#52 AS page_sk#56, cr_returned_date_sk#55 AS date_sk#57, 0.00 AS sales_price#58, 0.00 AS profit#59, cr_return_amount#53 AS return_amt#60, cr_net_loss#54 AS net_loss#61] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] + +(34) Union + +(35) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#62] + +(36) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#47] +Right keys [1]: [d_date_sk#62] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 11] +Output [5]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51] +Input [7]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, d_date_sk#62] + +(38) Scan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] + +(40) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Condition : isnotnull(cp_catalog_page_sk#63) + +(41) BroadcastExchange +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [page_sk#46] +Right keys [1]: [cp_catalog_page_sk#63] +Join type: Inner +Join condition: None + +(43) Project [codegen id : 11] +Output [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Input [7]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_sk#63, cp_catalog_page_id#64] + +(44) HashAggregate [codegen id : 11] +Input [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [partial_sum(UnscaledValue(sales_price#48)), partial_sum(UnscaledValue(return_amt#50)), partial_sum(UnscaledValue(profit#49)), partial_sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum#65, sum#66, sum#67, sum#68] +Results [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] + +(45) Exchange +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Arguments: hashpartitioning(cp_catalog_page_id#64, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(46) HashAggregate [codegen id : 12] +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76] +Results [5]: [catalog channel AS channel#77, concat(catalog_page, cp_catalog_page_id#64) AS id#78, MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#79, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#80, (MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2)) AS profit#81] + +(47) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#85)] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] + +(49) Filter [codegen id : 13] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Condition : isnotnull(ws_web_site_sk#82) + +(50) Project [codegen id : 13] +Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] + +(51) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#96)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 14] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] + +(53) BroadcastExchange +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [plan_id=6] + +(54) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(55) ColumnarToRow +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] + +(56) Filter +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Condition : ((isnotnull(ws_item_sk#97) AND isnotnull(ws_order_number#99)) AND isnotnull(ws_web_site_sk#98)) + +(57) Project +Output [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] + +(58) BroadcastHashJoin [codegen id : 15] +Left keys [2]: [wr_item_sk#92, wr_order_number#93] +Right keys [2]: [ws_item_sk#97, ws_order_number#99] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 15] +Output [6]: [ws_web_site_sk#98 AS wsr_web_site_sk#101, wr_returned_date_sk#96 AS date_sk#102, 0.00 AS sales_price#103, 0.00 AS profit#104, wr_return_amt#94 AS return_amt#105, wr_net_loss#95 AS net_loss#106] +Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96, ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] + +(60) Union + +(61) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#107] + +(62) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [date_sk#87] +Right keys [1]: [d_date_sk#107] +Join type: Inner +Join condition: None + +(63) Project [codegen id : 18] +Output [5]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91] +Input [7]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, d_date_sk#107] + +(64) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#108, web_site_id#109] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 17] +Input [2]: [web_site_sk#108, web_site_id#109] + +(66) Filter [codegen id : 17] +Input [2]: [web_site_sk#108, web_site_id#109] +Condition : isnotnull(web_site_sk#108) + +(67) BroadcastExchange +Input [2]: [web_site_sk#108, web_site_id#109] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(68) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [wsr_web_site_sk#86] +Right keys [1]: [web_site_sk#108] +Join type: Inner +Join condition: None + +(69) Project [codegen id : 18] +Output [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Input [7]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_sk#108, web_site_id#109] + +(70) HashAggregate [codegen id : 18] +Input [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Keys [1]: [web_site_id#109] +Functions [4]: [partial_sum(UnscaledValue(sales_price#88)), partial_sum(UnscaledValue(return_amt#90)), partial_sum(UnscaledValue(profit#89)), partial_sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum#110, sum#111, sum#112, sum#113] +Results [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] + +(71) Exchange +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Arguments: hashpartitioning(web_site_id#109, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(72) HashAggregate [codegen id : 19] +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Keys [1]: [web_site_id#109] +Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121] +Results [5]: [web channel AS channel#122, concat(web_site, web_site_id#109) AS id#123, MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#124, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#125, (MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2)) AS profit#126] + +(73) Union + +(74) HashAggregate [codegen id : 20] +Input [5]: [channel#37, id#38, sales#39, returns#40, profit#41] +Keys [2]: [channel#37, id#38] +Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] +Aggregate Attributes [6]: [sum#127, isEmpty#128, sum#129, isEmpty#130, sum#131, isEmpty#132] +Results [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] + +(75) Exchange +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Arguments: hashpartitioning(channel#37, id#38, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(76) HashAggregate [codegen id : 21] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [5]: [channel#37, id#38, cast(sum(sales#39)#139 as decimal(37,2)) AS sales#142, cast(sum(returns#40)#140 as decimal(37,2)) AS returns#143, cast(sum(profit#41)#141 as decimal(38,2)) AS profit#144] + +(77) ReusedExchange [Reuses operator id: 75] +Output [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] + +(78) HashAggregate [codegen id : 42] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [4]: [channel#37, sum(sales#39)#139 AS sales#145, sum(returns#40)#140 AS returns#146, sum(profit#41)#141 AS profit#147] + +(79) HashAggregate [codegen id : 42] +Input [4]: [channel#37, sales#145, returns#146, profit#147] +Keys [1]: [channel#37] +Functions [3]: [partial_sum(sales#145), partial_sum(returns#146), partial_sum(profit#147)] +Aggregate Attributes [6]: [sum#148, isEmpty#149, sum#150, isEmpty#151, sum#152, isEmpty#153] +Results [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] + +(80) Exchange +Input [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +Arguments: hashpartitioning(channel#37, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(81) HashAggregate [codegen id : 43] +Input [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +Keys [1]: [channel#37] +Functions [3]: [sum(sales#145), sum(returns#146), sum(profit#147)] +Aggregate Attributes [3]: [sum(sales#145)#160, sum(returns#146)#161, sum(profit#147)#162] +Results [5]: [channel#37, null AS id#163, sum(sales#145)#160 AS sum(sales)#164, sum(returns#146)#161 AS sum(returns)#165, sum(profit#147)#162 AS sum(profit)#166] + +(82) ReusedExchange [Reuses operator id: 75] +Output [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] + +(83) HashAggregate [codegen id : 64] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [3]: [sum(sales#39)#139 AS sales#145, sum(returns#40)#140 AS returns#146, sum(profit#41)#141 AS profit#147] + +(84) HashAggregate [codegen id : 64] +Input [3]: [sales#145, returns#146, profit#147] +Keys: [] +Functions [3]: [partial_sum(sales#145), partial_sum(returns#146), partial_sum(profit#147)] +Aggregate Attributes [6]: [sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172] +Results [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] + +(85) Exchange +Input [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] + +(86) HashAggregate [codegen id : 65] +Input [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] +Keys: [] +Functions [3]: [sum(sales#145), sum(returns#146), sum(profit#147)] +Aggregate Attributes [3]: [sum(sales#145)#179, sum(returns#146)#180, sum(profit#147)#181] +Results [5]: [null AS channel#182, null AS id#183, sum(sales#145)#179 AS sum(sales)#184, sum(returns#146)#180 AS sum(returns)#185, sum(profit#147)#181 AS sum(profit)#186] + +(87) Union + +(88) HashAggregate [codegen id : 66] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Keys [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#37, id#38, sales#142, returns#143, profit#144] + +(89) Exchange +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Arguments: hashpartitioning(channel#37, id#38, sales#142, returns#143, profit#144, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(90) HashAggregate [codegen id : 67] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Keys [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#37, id#38, sales#142, returns#143, profit#144] + +(91) TakeOrderedAndProject +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Arguments: 100, [channel#37 ASC NULLS FIRST, id#38 ASC NULLS FIRST], [channel#37, id#38, sales#142, returns#143, profit#144] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a67882dfb4 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.native_iceberg_compat/simplified.txt @@ -0,0 +1,148 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (67) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (66) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (21) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (20) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + InputAdapter + Exchange [s_store_id] #3 + WholeStageCodegen (5) + HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] + Project [store_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (1) + Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + WholeStageCodegen (2) + Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] + Filter [sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + InputAdapter + Exchange [cp_catalog_page_id] #6 + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] + Project [page_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen (19) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum] + InputAdapter + Exchange [web_site_id] #8 + WholeStageCodegen (18) + HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] + Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen (13) + Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] + Filter [ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + WholeStageCodegen (15) + Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (14) + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + Project [ws_item_sk,ws_web_site_sk,ws_order_number] + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] + WholeStageCodegen (43) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #11 + WholeStageCodegen (42) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (65) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #12 + WholeStageCodegen (64) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_datafusion/explain.txt new file mode 100644 index 0000000000..eaed1f54a0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_datafusion/explain.txt @@ -0,0 +1,302 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Filter (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.customer_address (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.customer (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.store_sales (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.date_dim (16) + +- BroadcastExchange (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.item (23) + +- BroadcastExchange (33) + +- * Filter (32) + +- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet spark_catalog.default.item (26) + + +(1) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#1, ca_state#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] + +(3) Filter [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(4) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ca_address_sk#1] +Right keys [1]: [c_current_addr_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 7] +Output [2]: [ca_state#2, c_customer_sk#3] +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] + +(10) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(12) Filter [codegen id : 2] +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_item_sk#5)) + +(13) BroadcastExchange +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#6] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 7] +Output [3]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] +Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(16) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_month_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#8, d_month_seq#9] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#8, d_month_seq#9] +Condition : ((isnotnull(d_month_seq#9) AND (d_month_seq#9 = Subquery scalar-subquery#10, [id=#11])) AND isnotnull(d_date_sk#8)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#8] +Input [2]: [d_date_sk#8, d_month_seq#9] + +(20) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 7] +Output [2]: [ca_state#2, ss_item_sk#5] +Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#8] + +(23) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_category), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] + +(25) Filter [codegen id : 6] +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Condition : ((isnotnull(i_current_price#13) AND isnotnull(i_category#14)) AND isnotnull(i_item_sk#12)) + +(26) Scan parquet spark_catalog.default.item +Output [2]: [i_current_price#15, i_category#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] + +(28) Filter [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] +Condition : isnotnull(i_category#16) + +(29) HashAggregate [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] +Keys [1]: [i_category#16] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#15))] +Aggregate Attributes [2]: [sum#17, count#18] +Results [3]: [i_category#16, sum#19, count#20] + +(30) Exchange +Input [3]: [i_category#16, sum#19, count#20] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 5] +Input [3]: [i_category#16, sum#19, count#20] +Keys [1]: [i_category#16] +Functions [1]: [avg(UnscaledValue(i_current_price#15))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#15))#21] +Results [2]: [cast((avg(UnscaledValue(i_current_price#15))#21 / 100.0) as decimal(11,6)) AS avg(i_current_price)#22, i_category#16] + +(32) Filter [codegen id : 5] +Input [2]: [avg(i_current_price)#22, i_category#16] +Condition : isnotnull(avg(i_current_price)#22) + +(33) BroadcastExchange +Input [2]: [avg(i_current_price)#22, i_category#16] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_category#14] +Right keys [1]: [i_category#16] +Join type: Inner +Join condition: (cast(i_current_price#13 as decimal(14,7)) > (1.2 * avg(i_current_price)#22)) + +(35) Project [codegen id : 6] +Output [1]: [i_item_sk#12] +Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#22, i_category#16] + +(36) BroadcastExchange +Input [1]: [i_item_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#12] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [1]: [ca_state#2] +Input [3]: [ca_state#2, ss_item_sk#5, i_item_sk#12] + +(39) HashAggregate [codegen id : 7] +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [2]: [ca_state#2, count#24] + +(40) Exchange +Input [2]: [ca_state#2, count#24] +Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(41) HashAggregate [codegen id : 8] +Input [2]: [ca_state#2, count#24] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [3]: [ca_state#2 AS state#26, count(1)#25 AS cnt#27, ca_state#2] + +(42) Filter [codegen id : 8] +Input [3]: [state#26, cnt#27, ca_state#2] +Condition : (cnt#27 >= 10) + +(43) TakeOrderedAndProject +Input [3]: [state#26, cnt#27, ca_state#2] +Arguments: 100, [cnt#27 ASC NULLS FIRST, ca_state#2 ASC NULLS FIRST], [state#26, cnt#27] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 18 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* HashAggregate (50) ++- Exchange (49) + +- * HashAggregate (48) + +- * Project (47) + +- * Filter (46) + +- * ColumnarToRow (45) + +- Scan parquet spark_catalog.default.date_dim (44) + + +(44) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#28, d_year#29, d_moy#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] + +(46) Filter [codegen id : 1] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] +Condition : (((isnotnull(d_year#29) AND isnotnull(d_moy#30)) AND (d_year#29 = 2000)) AND (d_moy#30 = 1)) + +(47) Project [codegen id : 1] +Output [1]: [d_month_seq#28] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] + +(48) HashAggregate [codegen id : 1] +Input [1]: [d_month_seq#28] +Keys [1]: [d_month_seq#28] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#28] + +(49) Exchange +Input [1]: [d_month_seq#28] +Arguments: hashpartitioning(d_month_seq#28, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(50) HashAggregate [codegen id : 2] +Input [1]: [d_month_seq#28] +Keys [1]: [d_month_seq#28] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#28] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_datafusion/simplified.txt new file mode 100644 index 0000000000..9a2eddad66 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_datafusion/simplified.txt @@ -0,0 +1,76 @@ +TakeOrderedAndProject [cnt,ca_state,state] + WholeStageCodegen (8) + Filter [cnt] + HashAggregate [ca_state,count] [count(1),state,cnt,count] + InputAdapter + Exchange [ca_state] #1 + WholeStageCodegen (7) + HashAggregate [ca_state] [count,count] + Project [ca_state] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ca_state,ss_item_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ca_state,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ca_state,c_customer_sk] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen (1) + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [i_item_sk] + BroadcastHashJoin [i_category,i_category,i_current_price,avg(i_current_price)] + Filter [i_current_price,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_category] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [avg(i_current_price)] + HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count] + InputAdapter + Exchange [i_category] #8 + WholeStageCodegen (4) + HashAggregate [i_category,i_current_price] [sum,count,sum,count] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_current_price,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..eaed1f54a0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_iceberg_compat/explain.txt @@ -0,0 +1,302 @@ +== Physical Plan == +TakeOrderedAndProject (43) ++- * Filter (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.customer_address (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.customer (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet spark_catalog.default.store_sales (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet spark_catalog.default.date_dim (16) + +- BroadcastExchange (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet spark_catalog.default.item (23) + +- BroadcastExchange (33) + +- * Filter (32) + +- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet spark_catalog.default.item (26) + + +(1) Scan parquet spark_catalog.default.customer_address +Output [2]: [ca_address_sk#1, ca_state#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] + +(3) Filter [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(4) Scan parquet spark_catalog.default.customer +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ca_address_sk#1] +Right keys [1]: [c_current_addr_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 7] +Output [2]: [ca_state#2, c_customer_sk#3] +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] + +(10) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(12) Filter [codegen id : 2] +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_item_sk#5)) + +(13) BroadcastExchange +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#6] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 7] +Output [3]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] +Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] + +(16) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_month_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#8, d_month_seq#9] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#8, d_month_seq#9] +Condition : ((isnotnull(d_month_seq#9) AND (d_month_seq#9 = Subquery scalar-subquery#10, [id=#11])) AND isnotnull(d_date_sk#8)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#8] +Input [2]: [d_date_sk#8, d_month_seq#9] + +(20) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 7] +Output [2]: [ca_state#2, ss_item_sk#5] +Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#8] + +(23) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_category), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] + +(25) Filter [codegen id : 6] +Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Condition : ((isnotnull(i_current_price#13) AND isnotnull(i_category#14)) AND isnotnull(i_item_sk#12)) + +(26) Scan parquet spark_catalog.default.item +Output [2]: [i_current_price#15, i_category#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] + +(28) Filter [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] +Condition : isnotnull(i_category#16) + +(29) HashAggregate [codegen id : 4] +Input [2]: [i_current_price#15, i_category#16] +Keys [1]: [i_category#16] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#15))] +Aggregate Attributes [2]: [sum#17, count#18] +Results [3]: [i_category#16, sum#19, count#20] + +(30) Exchange +Input [3]: [i_category#16, sum#19, count#20] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 5] +Input [3]: [i_category#16, sum#19, count#20] +Keys [1]: [i_category#16] +Functions [1]: [avg(UnscaledValue(i_current_price#15))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#15))#21] +Results [2]: [cast((avg(UnscaledValue(i_current_price#15))#21 / 100.0) as decimal(11,6)) AS avg(i_current_price)#22, i_category#16] + +(32) Filter [codegen id : 5] +Input [2]: [avg(i_current_price)#22, i_category#16] +Condition : isnotnull(avg(i_current_price)#22) + +(33) BroadcastExchange +Input [2]: [avg(i_current_price)#22, i_category#16] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_category#14] +Right keys [1]: [i_category#16] +Join type: Inner +Join condition: (cast(i_current_price#13 as decimal(14,7)) > (1.2 * avg(i_current_price)#22)) + +(35) Project [codegen id : 6] +Output [1]: [i_item_sk#12] +Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#22, i_category#16] + +(36) BroadcastExchange +Input [1]: [i_item_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#12] +Join type: Inner +Join condition: None + +(38) Project [codegen id : 7] +Output [1]: [ca_state#2] +Input [3]: [ca_state#2, ss_item_sk#5, i_item_sk#12] + +(39) HashAggregate [codegen id : 7] +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#23] +Results [2]: [ca_state#2, count#24] + +(40) Exchange +Input [2]: [ca_state#2, count#24] +Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(41) HashAggregate [codegen id : 8] +Input [2]: [ca_state#2, count#24] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#25] +Results [3]: [ca_state#2 AS state#26, count(1)#25 AS cnt#27, ca_state#2] + +(42) Filter [codegen id : 8] +Input [3]: [state#26, cnt#27, ca_state#2] +Condition : (cnt#27 >= 10) + +(43) TakeOrderedAndProject +Input [3]: [state#26, cnt#27, ca_state#2] +Arguments: 100, [cnt#27 ASC NULLS FIRST, ca_state#2 ASC NULLS FIRST], [state#26, cnt#27] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 18 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* HashAggregate (50) ++- Exchange (49) + +- * HashAggregate (48) + +- * Project (47) + +- * Filter (46) + +- * ColumnarToRow (45) + +- Scan parquet spark_catalog.default.date_dim (44) + + +(44) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_month_seq#28, d_year#29, d_moy#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] + +(46) Filter [codegen id : 1] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] +Condition : (((isnotnull(d_year#29) AND isnotnull(d_moy#30)) AND (d_year#29 = 2000)) AND (d_moy#30 = 1)) + +(47) Project [codegen id : 1] +Output [1]: [d_month_seq#28] +Input [3]: [d_month_seq#28, d_year#29, d_moy#30] + +(48) HashAggregate [codegen id : 1] +Input [1]: [d_month_seq#28] +Keys [1]: [d_month_seq#28] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#28] + +(49) Exchange +Input [1]: [d_month_seq#28] +Arguments: hashpartitioning(d_month_seq#28, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(50) HashAggregate [codegen id : 2] +Input [1]: [d_month_seq#28] +Keys [1]: [d_month_seq#28] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#28] + + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..9a2eddad66 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.native_iceberg_compat/simplified.txt @@ -0,0 +1,76 @@ +TakeOrderedAndProject [cnt,ca_state,state] + WholeStageCodegen (8) + Filter [cnt] + HashAggregate [ca_state,count] [count(1),state,cnt,count] + InputAdapter + Exchange [ca_state] #1 + WholeStageCodegen (7) + HashAggregate [ca_state] [count,count] + Project [ca_state] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ca_state,ss_item_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ca_state,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ca_state,c_customer_sk] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (3) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen (2) + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen (1) + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [i_item_sk] + BroadcastHashJoin [i_category,i_category,i_current_price,avg(i_current_price)] + Filter [i_current_price,i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_category] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Filter [avg(i_current_price)] + HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count] + InputAdapter + Exchange [i_category] #8 + WholeStageCodegen (4) + HashAggregate [i_category,i_current_price] [sum,count,sum,count] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_current_price,i_category] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_datafusion/explain.txt new file mode 100644 index 0000000000..b557c1012c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_datafusion/explain.txt @@ -0,0 +1,999 @@ +== Physical Plan == +* Sort (179) ++- Exchange (178) + +- * Project (177) + +- * SortMergeJoin Inner (176) + :- * Sort (114) + : +- Exchange (113) + : +- * HashAggregate (112) + : +- * HashAggregate (111) + : +- * Project (110) + : +- * BroadcastHashJoin Inner BuildRight (109) + : :- * Project (103) + : : +- * BroadcastHashJoin Inner BuildRight (102) + : : :- * Project (100) + : : : +- * BroadcastHashJoin Inner BuildRight (99) + : : : :- * Project (94) + : : : : +- * BroadcastHashJoin Inner BuildRight (93) + : : : : :- * Project (91) + : : : : : +- * BroadcastHashJoin Inner BuildRight (90) + : : : : : :- * Project (85) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (84) + : : : : : : :- * Project (82) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (81) + : : : : : : : :- * Project (76) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (75) + : : : : : : : : :- * Project (70) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (69) + : : : : : : : : : :- * Project (67) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (66) + : : : : : : : : : : :- * Project (61) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : : : : : : : : : :- * Project (58) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (57) + : : : : : : : : : : : : :- * Project (52) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (51) + : : : : : : : : : : : : : :- * Project (46) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (45) + : : : : : : : : : : : : : : :- * Project (40) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (39) + : : : : : : : : : : : : : : : :- * Project (34) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (33) + : : : : : : : : : : : : : : : : :- * Sort (12) + : : : : : : : : : : : : : : : : : +- Exchange (11) + : : : : : : : : : : : : : : : : : +- * Project (10) + : : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : : : : : : : : : : : : : : :- BroadcastExchange (4) + : : : : : : : : : : : : : : : : : : +- * Filter (3) + : : : : : : : : : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : : : : : : : : : : : : +- * Project (8) + : : : : : : : : : : : : : : : : : +- * Filter (7) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (6) + : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store_returns (5) + : : : : : : : : : : : : : : : : +- * Sort (32) + : : : : : : : : : : : : : : : : +- * Project (31) + : : : : : : : : : : : : : : : : +- * Filter (30) + : : : : : : : : : : : : : : : : +- * HashAggregate (29) + : : : : : : : : : : : : : : : : +- Exchange (28) + : : : : : : : : : : : : : : : : +- * HashAggregate (27) + : : : : : : : : : : : : : : : : +- * Project (26) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (25) + : : : : : : : : : : : : : : : : :- * Sort (18) + : : : : : : : : : : : : : : : : : +- Exchange (17) + : : : : : : : : : : : : : : : : : +- * Project (16) + : : : : : : : : : : : : : : : : : +- * Filter (15) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : : : : : : : : : : : +- * Sort (24) + : : : : : : : : : : : : : : : : +- Exchange (23) + : : : : : : : : : : : : : : : : +- * Project (22) + : : : : : : : : : : : : : : : : +- * Filter (21) + : : : : : : : : : : : : : : : : +- * ColumnarToRow (20) + : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_returns (19) + : : : : : : : : : : : : : : : +- BroadcastExchange (38) + : : : : : : : : : : : : : : : +- * Filter (37) + : : : : : : : : : : : : : : : +- * ColumnarToRow (36) + : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (35) + : : : : : : : : : : : : : : +- BroadcastExchange (44) + : : : : : : : : : : : : : : +- * Filter (43) + : : : : : : : : : : : : : : +- * ColumnarToRow (42) + : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store (41) + : : : : : : : : : : : : : +- BroadcastExchange (50) + : : : : : : : : : : : : : +- * Filter (49) + : : : : : : : : : : : : : +- * ColumnarToRow (48) + : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.customer (47) + : : : : : : : : : : : : +- BroadcastExchange (56) + : : : : : : : : : : : : +- * Filter (55) + : : : : : : : : : : : : +- * ColumnarToRow (54) + : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (53) + : : : : : : : : : : : +- ReusedExchange (59) + : : : : : : : : : : +- BroadcastExchange (65) + : : : : : : : : : : +- * Filter (64) + : : : : : : : : : : +- * ColumnarToRow (63) + : : : : : : : : : : +- Scan parquet spark_catalog.default.customer_demographics (62) + : : : : : : : : : +- ReusedExchange (68) + : : : : : : : : +- BroadcastExchange (74) + : : : : : : : : +- * Filter (73) + : : : : : : : : +- * ColumnarToRow (72) + : : : : : : : : +- Scan parquet spark_catalog.default.promotion (71) + : : : : : : : +- BroadcastExchange (80) + : : : : : : : +- * Filter (79) + : : : : : : : +- * ColumnarToRow (78) + : : : : : : : +- Scan parquet spark_catalog.default.household_demographics (77) + : : : : : : +- ReusedExchange (83) + : : : : : +- BroadcastExchange (89) + : : : : : +- * Filter (88) + : : : : : +- * ColumnarToRow (87) + : : : : : +- Scan parquet spark_catalog.default.customer_address (86) + : : : : +- ReusedExchange (92) + : : : +- BroadcastExchange (98) + : : : +- * Filter (97) + : : : +- * ColumnarToRow (96) + : : : +- Scan parquet spark_catalog.default.income_band (95) + : : +- ReusedExchange (101) + : +- BroadcastExchange (108) + : +- * Project (107) + : +- * Filter (106) + : +- * ColumnarToRow (105) + : +- Scan parquet spark_catalog.default.item (104) + +- * Sort (175) + +- Exchange (174) + +- * HashAggregate (173) + +- * HashAggregate (172) + +- * Project (171) + +- * BroadcastHashJoin Inner BuildRight (170) + :- * Project (168) + : +- * BroadcastHashJoin Inner BuildRight (167) + : :- * Project (165) + : : +- * BroadcastHashJoin Inner BuildRight (164) + : : :- * Project (162) + : : : +- * BroadcastHashJoin Inner BuildRight (161) + : : : :- * Project (159) + : : : : +- * BroadcastHashJoin Inner BuildRight (158) + : : : : :- * Project (156) + : : : : : +- * BroadcastHashJoin Inner BuildRight (155) + : : : : : :- * Project (153) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (152) + : : : : : : :- * Project (150) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (149) + : : : : : : : :- * Project (147) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (146) + : : : : : : : : :- * Project (144) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (143) + : : : : : : : : : :- * Project (141) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (140) + : : : : : : : : : : :- * Project (138) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (137) + : : : : : : : : : : : :- * Project (135) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (134) + : : : : : : : : : : : : :- * Project (132) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (131) + : : : : : : : : : : : : : :- * Project (129) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (128) + : : : : : : : : : : : : : : :- * Project (123) + : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (122) + : : : : : : : : : : : : : : : :- * Sort (116) + : : : : : : : : : : : : : : : : +- ReusedExchange (115) + : : : : : : : : : : : : : : : +- * Sort (121) + : : : : : : : : : : : : : : : +- * Project (120) + : : : : : : : : : : : : : : : +- * Filter (119) + : : : : : : : : : : : : : : : +- * HashAggregate (118) + : : : : : : : : : : : : : : : +- ReusedExchange (117) + : : : : : : : : : : : : : : +- BroadcastExchange (127) + : : : : : : : : : : : : : : +- * Filter (126) + : : : : : : : : : : : : : : +- * ColumnarToRow (125) + : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (124) + : : : : : : : : : : : : : +- ReusedExchange (130) + : : : : : : : : : : : : +- ReusedExchange (133) + : : : : : : : : : : : +- ReusedExchange (136) + : : : : : : : : : : +- ReusedExchange (139) + : : : : : : : : : +- ReusedExchange (142) + : : : : : : : : +- ReusedExchange (145) + : : : : : : : +- ReusedExchange (148) + : : : : : : +- ReusedExchange (151) + : : : : : +- ReusedExchange (154) + : : : : +- ReusedExchange (157) + : : : +- ReusedExchange (160) + : : +- ReusedExchange (163) + : +- ReusedExchange (166) + +- ReusedExchange (169) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(3) Filter [codegen id : 1] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) + +(4) BroadcastExchange +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [plan_id=1] + +(5) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(6) ColumnarToRow +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(7) Filter +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(8) Project +Output [2]: [sr_item_sk#13, sr_ticket_number#14] +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [ss_item_sk#1, ss_ticket_number#8] +Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 2] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#13, sr_ticket_number#14] + +(11) Exchange +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 3] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(15) Filter [codegen id : 4] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(16) Project [codegen id : 4] +Output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(17) Exchange +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: hashpartitioning(cs_item_sk#16, cs_order_number#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 5] +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: [cs_item_sk#16 ASC NULLS FIRST, cs_order_number#17 ASC NULLS FIRST], false, 0 + +(19) Scan parquet spark_catalog.default.catalog_returns +Output [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(21) Filter [codegen id : 6] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Condition : (isnotnull(cr_item_sk#20) AND isnotnull(cr_order_number#21)) + +(22) Project [codegen id : 6] +Output [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(23) Exchange +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: hashpartitioning(cr_item_sk#20, cr_order_number#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) Sort [codegen id : 7] +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cr_item_sk#20 ASC NULLS FIRST, cr_order_number#21 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin [codegen id : 8] +Left keys [2]: [cs_item_sk#16, cs_order_number#17] +Right keys [2]: [cr_item_sk#20, cr_order_number#21] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 8] +Output [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(27) HashAggregate [codegen id : 8] +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] +Aggregate Attributes [3]: [sum#26, sum#27, isEmpty#28] +Results [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] + +(28) Exchange +Input [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] +Arguments: hashpartitioning(cs_item_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) HashAggregate [codegen id : 9] +Input [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#18))#32, sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))#33] +Results [3]: [cs_item_sk#16, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#18))#32,17,2) AS sale#34, sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))#33 AS refund#35] + +(30) Filter [codegen id : 9] +Input [3]: [cs_item_sk#16, sale#34, refund#35] +Condition : ((isnotnull(sale#34) AND isnotnull(refund#35)) AND (cast(sale#34 as decimal(21,2)) > (2 * refund#35))) + +(31) Project [codegen id : 9] +Output [1]: [cs_item_sk#16] +Input [3]: [cs_item_sk#16, sale#34, refund#35] + +(32) Sort [codegen id : 9] +Input [1]: [cs_item_sk#16] +Arguments: [cs_item_sk#16 ASC NULLS FIRST], false, 0 + +(33) SortMergeJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [cs_item_sk#16] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 25] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#16] + +(35) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#36, d_year#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [2]: [d_date_sk#36, d_year#37] + +(37) Filter [codegen id : 10] +Input [2]: [d_date_sk#36, d_year#37] +Condition : ((isnotnull(d_year#37) AND (d_year#37 = 1999)) AND isnotnull(d_date_sk#36)) + +(38) BroadcastExchange +Input [2]: [d_date_sk#36, d_year#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#12] +Right keys [1]: [d_date_sk#36] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 25] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#36, d_year#37] + +(41) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 11] +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] + +(43) Filter [codegen id : 11] +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Condition : ((isnotnull(s_store_sk#38) AND isnotnull(s_store_name#39)) AND isnotnull(s_zip#40)) + +(44) BroadcastExchange +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(45) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#38] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 25] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_sk#38, s_store_name#39, s_zip#40] + +(47) Scan parquet spark_catalog.default.customer +Output [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 12] +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(49) Filter [codegen id : 12] +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Condition : (((((isnotnull(c_customer_sk#41) AND isnotnull(c_first_sales_date_sk#46)) AND isnotnull(c_first_shipto_date_sk#45)) AND isnotnull(c_current_cdemo_sk#42)) AND isnotnull(c_current_hdemo_sk#43)) AND isnotnull(c_current_addr_sk#44)) + +(50) BroadcastExchange +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(51) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#41] +Join type: Inner +Join condition: None + +(52) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(53) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#47, d_year#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 13] +Input [2]: [d_date_sk#47, d_year#48] + +(55) Filter [codegen id : 13] +Input [2]: [d_date_sk#47, d_year#48] +Condition : isnotnull(d_date_sk#47) + +(56) BroadcastExchange +Input [2]: [d_date_sk#47, d_year#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(57) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_first_sales_date_sk#46] +Right keys [1]: [d_date_sk#47] +Join type: Inner +Join condition: None + +(58) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, d_year#48] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46, d_date_sk#47, d_year#48] + +(59) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#49, d_year#50] + +(60) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_first_shipto_date_sk#45] +Right keys [1]: [d_date_sk#49] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, d_year#48, d_date_sk#49, d_year#50] + +(62) Scan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#51, cd_marital_status#52] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 15] +Input [2]: [cd_demo_sk#51, cd_marital_status#52] + +(64) Filter [codegen id : 15] +Input [2]: [cd_demo_sk#51, cd_marital_status#52] +Condition : (isnotnull(cd_demo_sk#51) AND isnotnull(cd_marital_status#52)) + +(65) BroadcastExchange +Input [2]: [cd_demo_sk#51, cd_marital_status#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(66) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#51] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_marital_status#52] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_demo_sk#51, cd_marital_status#52] + +(68) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#53, cd_marital_status#54] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_cdemo_sk#42] +Right keys [1]: [cd_demo_sk#53] +Join type: Inner +Join condition: NOT (cd_marital_status#52 = cd_marital_status#54) + +(70) Project [codegen id : 25] +Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_marital_status#52, cd_demo_sk#53, cd_marital_status#54] + +(71) Scan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#55] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 17] +Input [1]: [p_promo_sk#55] + +(73) Filter [codegen id : 17] +Input [1]: [p_promo_sk#55] +Condition : isnotnull(p_promo_sk#55) + +(74) BroadcastExchange +Input [1]: [p_promo_sk#55] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=11] + +(75) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_promo_sk#7] +Right keys [1]: [p_promo_sk#55] +Join type: Inner +Join condition: None + +(76) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, p_promo_sk#55] + +(77) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(78) ColumnarToRow [codegen id : 18] +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] + +(79) Filter [codegen id : 18] +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Condition : (isnotnull(hd_demo_sk#56) AND isnotnull(hd_income_band_sk#57)) + +(80) BroadcastExchange +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(81) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_hdemo_sk#4] +Right keys [1]: [hd_demo_sk#56] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_demo_sk#56, hd_income_band_sk#57] + +(83) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#58, hd_income_band_sk#59] + +(84) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_hdemo_sk#43] +Right keys [1]: [hd_demo_sk#58] +Join type: Inner +Join condition: None + +(85) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59] +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_demo_sk#58, hd_income_band_sk#59] + +(86) Scan parquet spark_catalog.default.customer_address +Output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(87) ColumnarToRow [codegen id : 20] +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(88) Filter [codegen id : 20] +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Condition : isnotnull(ca_address_sk#60) + +(89) BroadcastExchange +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] + +(90) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_addr_sk#5] +Right keys [1]: [ca_address_sk#60] +Join type: Inner +Join condition: None + +(91) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(92) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#65, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] + +(93) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_addr_sk#44] +Right keys [1]: [ca_address_sk#65] +Join type: Inner +Join condition: None + +(94) Project [codegen id : 25] +Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_address_sk#65, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] + +(95) Scan parquet spark_catalog.default.income_band +Output [1]: [ib_income_band_sk#70] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 22] +Input [1]: [ib_income_band_sk#70] + +(97) Filter [codegen id : 22] +Input [1]: [ib_income_band_sk#70] +Condition : isnotnull(ib_income_band_sk#70) + +(98) BroadcastExchange +Input [1]: [ib_income_band_sk#70] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] + +(99) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [hd_income_band_sk#57] +Right keys [1]: [ib_income_band_sk#70] +Join type: Inner +Join condition: None + +(100) Project [codegen id : 25] +Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, ib_income_band_sk#70] + +(101) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#71] + +(102) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [hd_income_band_sk#59] +Right keys [1]: [ib_income_band_sk#71] +Join type: Inner +Join condition: None + +(103) Project [codegen id : 25] +Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, ib_income_band_sk#71] + +(104) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood ,floral ,indian ,medium ,purple ,spring ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(105) ColumnarToRow [codegen id : 24] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] + +(106) Filter [codegen id : 24] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Condition : ((((((isnotnull(i_current_price#73) AND i_color#74 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#73 >= 64.00)) AND (i_current_price#73 <= 74.00)) AND (i_current_price#73 >= 65.00)) AND (i_current_price#73 <= 79.00)) AND isnotnull(i_item_sk#72)) + +(107) Project [codegen id : 24] +Output [2]: [i_item_sk#72, i_product_name#75] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] + +(108) BroadcastExchange +Input [2]: [i_item_sk#72, i_product_name#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15] + +(109) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#72] +Join type: Inner +Join condition: None + +(110) Project [codegen id : 25] +Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, d_year#48, d_year#50, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] + +(111) HashAggregate [codegen id : 25] +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, d_year#48, d_year#50, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] +Keys [15]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] +Aggregate Attributes [4]: [count#76, sum#77, sum#78, sum#79] +Results [19]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50, count#80, sum#81, sum#82, sum#83] + +(112) HashAggregate [codegen id : 25] +Input [19]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50, count#80, sum#81, sum#82, sum#83] +Keys [15]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] +Aggregate Attributes [4]: [count(1)#84, sum(UnscaledValue(ss_wholesale_cost#9))#85, sum(UnscaledValue(ss_list_price#10))#86, sum(UnscaledValue(ss_coupon_amt#11))#87] +Results [17]: [i_product_name#75 AS product_name#88, i_item_sk#72 AS item_sk#89, s_store_name#39 AS store_name#90, s_zip#40 AS store_zip#91, ca_street_number#61 AS b_street_number#92, ca_street_name#62 AS b_streen_name#93, ca_city#63 AS b_city#94, ca_zip#64 AS b_zip#95, ca_street_number#66 AS c_street_number#96, ca_street_name#67 AS c_street_name#97, ca_city#68 AS c_city#98, ca_zip#69 AS c_zip#99, d_year#37 AS syear#100, count(1)#84 AS cnt#101, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#85,17,2) AS s1#102, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#86,17,2) AS s2#103, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#87,17,2) AS s3#104] + +(113) Exchange +Input [17]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104] +Arguments: hashpartitioning(item_sk#89, store_name#90, store_zip#91, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(114) Sort [codegen id : 26] +Input [17]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104] +Arguments: [item_sk#89 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, store_zip#91 ASC NULLS FIRST], false, 0 + +(115) ReusedExchange [Reuses operator id: 11] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] + +(116) Sort [codegen id : 29] +Input [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] +Arguments: [ss_item_sk#105 ASC NULLS FIRST], false, 0 + +(117) ReusedExchange [Reuses operator id: 28] +Output [4]: [cs_item_sk#116, sum#117, sum#118, isEmpty#119] + +(118) HashAggregate [codegen id : 35] +Input [4]: [cs_item_sk#116, sum#117, sum#118, isEmpty#119] +Keys [1]: [cs_item_sk#116] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#120)), sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#120))#32, sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))#33] +Results [3]: [cs_item_sk#116, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#120))#32,17,2) AS sale#34, sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))#33 AS refund#35] + +(119) Filter [codegen id : 35] +Input [3]: [cs_item_sk#116, sale#34, refund#35] +Condition : ((isnotnull(sale#34) AND isnotnull(refund#35)) AND (cast(sale#34 as decimal(21,2)) > (2 * refund#35))) + +(120) Project [codegen id : 35] +Output [1]: [cs_item_sk#116] +Input [3]: [cs_item_sk#116, sale#34, refund#35] + +(121) Sort [codegen id : 35] +Input [1]: [cs_item_sk#116] +Arguments: [cs_item_sk#116 ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin [codegen id : 51] +Left keys [1]: [ss_item_sk#105] +Right keys [1]: [cs_item_sk#116] +Join type: Inner +Join condition: None + +(123) Project [codegen id : 51] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] +Input [12]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115, cs_item_sk#116] + +(124) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#124, d_year#125] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(125) ColumnarToRow [codegen id : 36] +Input [2]: [d_date_sk#124, d_year#125] + +(126) Filter [codegen id : 36] +Input [2]: [d_date_sk#124, d_year#125] +Condition : ((isnotnull(d_year#125) AND (d_year#125 = 2000)) AND isnotnull(d_date_sk#124)) + +(127) BroadcastExchange +Input [2]: [d_date_sk#124, d_year#125] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=17] + +(128) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_sold_date_sk#115] +Right keys [1]: [d_date_sk#124] +Join type: Inner +Join condition: None + +(129) Project [codegen id : 51] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125] +Input [13]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115, d_date_sk#124, d_year#125] + +(130) ReusedExchange [Reuses operator id: 44] +Output [3]: [s_store_sk#126, s_store_name#127, s_zip#128] + +(131) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_store_sk#110] +Right keys [1]: [s_store_sk#126] +Join type: Inner +Join condition: None + +(132) Project [codegen id : 51] +Output [12]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128] +Input [14]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_sk#126, s_store_name#127, s_zip#128] + +(133) ReusedExchange [Reuses operator id: 50] +Output [6]: [c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] + +(134) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_customer_sk#106] +Right keys [1]: [c_customer_sk#129] +Join type: Inner +Join condition: None + +(135) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] +Input [18]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] + +(136) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#135, d_year#136] + +(137) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_first_sales_date_sk#134] +Right keys [1]: [d_date_sk#135] +Join type: Inner +Join condition: None + +(138) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134, d_date_sk#135, d_year#136] + +(139) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#137, d_year#138] + +(140) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_first_shipto_date_sk#133] +Right keys [1]: [d_date_sk#137] +Join type: Inner +Join condition: None + +(141) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136, d_date_sk#137, d_year#138] + +(142) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#139, cd_marital_status#140] + +(143) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_cdemo_sk#107] +Right keys [1]: [cd_demo_sk#139] +Join type: Inner +Join condition: None + +(144) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_demo_sk#139, cd_marital_status#140] + +(145) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#141, cd_marital_status#142] + +(146) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_cdemo_sk#130] +Right keys [1]: [cd_demo_sk#141] +Join type: Inner +Join condition: NOT (cd_marital_status#140 = cd_marital_status#142) + +(147) Project [codegen id : 51] +Output [14]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [18]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140, cd_demo_sk#141, cd_marital_status#142] + +(148) ReusedExchange [Reuses operator id: 74] +Output [1]: [p_promo_sk#143] + +(149) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_promo_sk#111] +Right keys [1]: [p_promo_sk#143] +Join type: Inner +Join condition: None + +(150) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [15]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, p_promo_sk#143] + +(151) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#144, hd_income_band_sk#145] + +(152) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_hdemo_sk#108] +Right keys [1]: [hd_demo_sk#144] +Join type: Inner +Join condition: None + +(153) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145] +Input [15]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_demo_sk#144, hd_income_band_sk#145] + +(154) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#146, hd_income_band_sk#147] + +(155) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_hdemo_sk#131] +Right keys [1]: [hd_demo_sk#146] +Join type: Inner +Join condition: None + +(156) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147] +Input [15]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_demo_sk#146, hd_income_band_sk#147] + +(157) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] + +(158) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_addr_sk#109] +Right keys [1]: [ca_address_sk#148] +Join type: Inner +Join condition: None + +(159) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] +Input [18]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] + +(160) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] + +(161) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_addr_sk#132] +Right keys [1]: [ca_address_sk#153] +Join type: Inner +Join condition: None + +(162) Project [codegen id : 51] +Output [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [21]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] + +(163) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#158] + +(164) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [hd_income_band_sk#145] +Right keys [1]: [ib_income_band_sk#158] +Join type: Inner +Join condition: None + +(165) Project [codegen id : 51] +Output [18]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [20]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#158] + +(166) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#159] + +(167) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [hd_income_band_sk#147] +Right keys [1]: [ib_income_band_sk#159] +Join type: Inner +Join condition: None + +(168) Project [codegen id : 51] +Output [17]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#159] + +(169) ReusedExchange [Reuses operator id: 108] +Output [2]: [i_item_sk#160, i_product_name#161] + +(170) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_item_sk#105] +Right keys [1]: [i_item_sk#160] +Join type: Inner +Join condition: None + +(171) Project [codegen id : 51] +Output [18]: [ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] +Input [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] + +(172) HashAggregate [codegen id : 51] +Input [18]: [ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] +Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#112)), partial_sum(UnscaledValue(ss_list_price#113)), partial_sum(UnscaledValue(ss_coupon_amt#114))] +Aggregate Attributes [4]: [count#76, sum#162, sum#163, sum#164] +Results [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#80, sum#165, sum#166, sum#167] + +(173) HashAggregate [codegen id : 51] +Input [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#80, sum#165, sum#166, sum#167] +Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#112)), sum(UnscaledValue(ss_list_price#113)), sum(UnscaledValue(ss_coupon_amt#114))] +Aggregate Attributes [4]: [count(1)#84, sum(UnscaledValue(ss_wholesale_cost#112))#85, sum(UnscaledValue(ss_list_price#113))#86, sum(UnscaledValue(ss_coupon_amt#114))#87] +Results [8]: [i_item_sk#160 AS item_sk#168, s_store_name#127 AS store_name#169, s_zip#128 AS store_zip#170, d_year#125 AS syear#171, count(1)#84 AS cnt#172, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#112))#85,17,2) AS s1#173, MakeDecimal(sum(UnscaledValue(ss_list_price#113))#86,17,2) AS s2#174, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#114))#87,17,2) AS s3#175] + +(174) Exchange +Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] +Arguments: hashpartitioning(item_sk#168, store_name#169, store_zip#170, 5), ENSURE_REQUIREMENTS, [plan_id=18] + +(175) Sort [codegen id : 52] +Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] +Arguments: [item_sk#168 ASC NULLS FIRST, store_name#169 ASC NULLS FIRST, store_zip#170 ASC NULLS FIRST], false, 0 + +(176) SortMergeJoin [codegen id : 53] +Left keys [3]: [item_sk#89, store_name#90, store_zip#91] +Right keys [3]: [item_sk#168, store_name#169, store_zip#170] +Join type: Inner +Join condition: (cnt#172 <= cnt#101) + +(177) Project [codegen id : 53] +Output [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Input [25]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] + +(178) Exchange +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Arguments: rangepartitioning(product_name#88 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST, s1#102 ASC NULLS FIRST, s1#173 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=19] + +(179) Sort [codegen id : 54] +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Arguments: [product_name#88 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST, s1#102 ASC NULLS FIRST, s1#173 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_datafusion/simplified.txt new file mode 100644 index 0000000000..c010e0aea0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_datafusion/simplified.txt @@ -0,0 +1,270 @@ +WholeStageCodegen (54) + Sort [product_name,store_name,cnt,s1,s1] + InputAdapter + Exchange [product_name,store_name,cnt,s1,s1] #1 + WholeStageCodegen (53) + Project [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + SortMergeJoin [item_sk,store_name,store_zip,item_sk,store_name,store_zip,cnt,cnt] + InputAdapter + WholeStageCodegen (26) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #2 + WholeStageCodegen (25) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + SortMergeJoin [ss_item_sk,cs_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #3 + WholeStageCodegen (2) + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + WholeStageCodegen (9) + Sort [cs_item_sk] + Project [cs_item_sk] + Filter [sale,refund] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (8) + HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty] + Project [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (5) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #6 + WholeStageCodegen (4) + Project [cs_item_sk,cs_order_number,cs_ext_list_price] + Filter [cs_item_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (7) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #7 + WholeStageCodegen (6) + Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (11) + Filter [s_store_sk,s_store_name,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (12) + Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (13) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (15) + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (17) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (18) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (20) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (22) + Filter [ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.income_band [ib_income_band_sk] + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (24) + Project [i_item_sk,i_product_name] + Filter [i_current_price,i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color,i_product_name] + InputAdapter + WholeStageCodegen (52) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #18 + WholeStageCodegen (51) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + SortMergeJoin [ss_item_sk,cs_item_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #3 + InputAdapter + WholeStageCodegen (35) + Sort [cs_item_sk] + Project [cs_item_sk] + Filter [sale,refund] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty] + InputAdapter + ReusedExchange [cs_item_sk,sum,sum,isEmpty] #5 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (36) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_zip] #9 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + ReusedExchange [p_promo_sk] #13 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #17 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..b557c1012c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_iceberg_compat/explain.txt @@ -0,0 +1,999 @@ +== Physical Plan == +* Sort (179) ++- Exchange (178) + +- * Project (177) + +- * SortMergeJoin Inner (176) + :- * Sort (114) + : +- Exchange (113) + : +- * HashAggregate (112) + : +- * HashAggregate (111) + : +- * Project (110) + : +- * BroadcastHashJoin Inner BuildRight (109) + : :- * Project (103) + : : +- * BroadcastHashJoin Inner BuildRight (102) + : : :- * Project (100) + : : : +- * BroadcastHashJoin Inner BuildRight (99) + : : : :- * Project (94) + : : : : +- * BroadcastHashJoin Inner BuildRight (93) + : : : : :- * Project (91) + : : : : : +- * BroadcastHashJoin Inner BuildRight (90) + : : : : : :- * Project (85) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (84) + : : : : : : :- * Project (82) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (81) + : : : : : : : :- * Project (76) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (75) + : : : : : : : : :- * Project (70) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (69) + : : : : : : : : : :- * Project (67) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (66) + : : : : : : : : : : :- * Project (61) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (60) + : : : : : : : : : : : :- * Project (58) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (57) + : : : : : : : : : : : : :- * Project (52) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (51) + : : : : : : : : : : : : : :- * Project (46) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (45) + : : : : : : : : : : : : : : :- * Project (40) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (39) + : : : : : : : : : : : : : : : :- * Project (34) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (33) + : : : : : : : : : : : : : : : : :- * Sort (12) + : : : : : : : : : : : : : : : : : +- Exchange (11) + : : : : : : : : : : : : : : : : : +- * Project (10) + : : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildLeft (9) + : : : : : : : : : : : : : : : : : :- BroadcastExchange (4) + : : : : : : : : : : : : : : : : : : +- * Filter (3) + : : : : : : : : : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : : : : : : : : : : : : +- * Project (8) + : : : : : : : : : : : : : : : : : +- * Filter (7) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (6) + : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store_returns (5) + : : : : : : : : : : : : : : : : +- * Sort (32) + : : : : : : : : : : : : : : : : +- * Project (31) + : : : : : : : : : : : : : : : : +- * Filter (30) + : : : : : : : : : : : : : : : : +- * HashAggregate (29) + : : : : : : : : : : : : : : : : +- Exchange (28) + : : : : : : : : : : : : : : : : +- * HashAggregate (27) + : : : : : : : : : : : : : : : : +- * Project (26) + : : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (25) + : : : : : : : : : : : : : : : : :- * Sort (18) + : : : : : : : : : : : : : : : : : +- Exchange (17) + : : : : : : : : : : : : : : : : : +- * Project (16) + : : : : : : : : : : : : : : : : : +- * Filter (15) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (13) + : : : : : : : : : : : : : : : : +- * Sort (24) + : : : : : : : : : : : : : : : : +- Exchange (23) + : : : : : : : : : : : : : : : : +- * Project (22) + : : : : : : : : : : : : : : : : +- * Filter (21) + : : : : : : : : : : : : : : : : +- * ColumnarToRow (20) + : : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_returns (19) + : : : : : : : : : : : : : : : +- BroadcastExchange (38) + : : : : : : : : : : : : : : : +- * Filter (37) + : : : : : : : : : : : : : : : +- * ColumnarToRow (36) + : : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (35) + : : : : : : : : : : : : : : +- BroadcastExchange (44) + : : : : : : : : : : : : : : +- * Filter (43) + : : : : : : : : : : : : : : +- * ColumnarToRow (42) + : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.store (41) + : : : : : : : : : : : : : +- BroadcastExchange (50) + : : : : : : : : : : : : : +- * Filter (49) + : : : : : : : : : : : : : +- * ColumnarToRow (48) + : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.customer (47) + : : : : : : : : : : : : +- BroadcastExchange (56) + : : : : : : : : : : : : +- * Filter (55) + : : : : : : : : : : : : +- * ColumnarToRow (54) + : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (53) + : : : : : : : : : : : +- ReusedExchange (59) + : : : : : : : : : : +- BroadcastExchange (65) + : : : : : : : : : : +- * Filter (64) + : : : : : : : : : : +- * ColumnarToRow (63) + : : : : : : : : : : +- Scan parquet spark_catalog.default.customer_demographics (62) + : : : : : : : : : +- ReusedExchange (68) + : : : : : : : : +- BroadcastExchange (74) + : : : : : : : : +- * Filter (73) + : : : : : : : : +- * ColumnarToRow (72) + : : : : : : : : +- Scan parquet spark_catalog.default.promotion (71) + : : : : : : : +- BroadcastExchange (80) + : : : : : : : +- * Filter (79) + : : : : : : : +- * ColumnarToRow (78) + : : : : : : : +- Scan parquet spark_catalog.default.household_demographics (77) + : : : : : : +- ReusedExchange (83) + : : : : : +- BroadcastExchange (89) + : : : : : +- * Filter (88) + : : : : : +- * ColumnarToRow (87) + : : : : : +- Scan parquet spark_catalog.default.customer_address (86) + : : : : +- ReusedExchange (92) + : : : +- BroadcastExchange (98) + : : : +- * Filter (97) + : : : +- * ColumnarToRow (96) + : : : +- Scan parquet spark_catalog.default.income_band (95) + : : +- ReusedExchange (101) + : +- BroadcastExchange (108) + : +- * Project (107) + : +- * Filter (106) + : +- * ColumnarToRow (105) + : +- Scan parquet spark_catalog.default.item (104) + +- * Sort (175) + +- Exchange (174) + +- * HashAggregate (173) + +- * HashAggregate (172) + +- * Project (171) + +- * BroadcastHashJoin Inner BuildRight (170) + :- * Project (168) + : +- * BroadcastHashJoin Inner BuildRight (167) + : :- * Project (165) + : : +- * BroadcastHashJoin Inner BuildRight (164) + : : :- * Project (162) + : : : +- * BroadcastHashJoin Inner BuildRight (161) + : : : :- * Project (159) + : : : : +- * BroadcastHashJoin Inner BuildRight (158) + : : : : :- * Project (156) + : : : : : +- * BroadcastHashJoin Inner BuildRight (155) + : : : : : :- * Project (153) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (152) + : : : : : : :- * Project (150) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (149) + : : : : : : : :- * Project (147) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (146) + : : : : : : : : :- * Project (144) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (143) + : : : : : : : : : :- * Project (141) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (140) + : : : : : : : : : : :- * Project (138) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (137) + : : : : : : : : : : : :- * Project (135) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (134) + : : : : : : : : : : : : :- * Project (132) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (131) + : : : : : : : : : : : : : :- * Project (129) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (128) + : : : : : : : : : : : : : : :- * Project (123) + : : : : : : : : : : : : : : : +- * SortMergeJoin Inner (122) + : : : : : : : : : : : : : : : :- * Sort (116) + : : : : : : : : : : : : : : : : +- ReusedExchange (115) + : : : : : : : : : : : : : : : +- * Sort (121) + : : : : : : : : : : : : : : : +- * Project (120) + : : : : : : : : : : : : : : : +- * Filter (119) + : : : : : : : : : : : : : : : +- * HashAggregate (118) + : : : : : : : : : : : : : : : +- ReusedExchange (117) + : : : : : : : : : : : : : : +- BroadcastExchange (127) + : : : : : : : : : : : : : : +- * Filter (126) + : : : : : : : : : : : : : : +- * ColumnarToRow (125) + : : : : : : : : : : : : : : +- Scan parquet spark_catalog.default.date_dim (124) + : : : : : : : : : : : : : +- ReusedExchange (130) + : : : : : : : : : : : : +- ReusedExchange (133) + : : : : : : : : : : : +- ReusedExchange (136) + : : : : : : : : : : +- ReusedExchange (139) + : : : : : : : : : +- ReusedExchange (142) + : : : : : : : : +- ReusedExchange (145) + : : : : : : : +- ReusedExchange (148) + : : : : : : +- ReusedExchange (151) + : : : : : +- ReusedExchange (154) + : : : : +- ReusedExchange (157) + : : : +- ReusedExchange (160) + : : +- ReusedExchange (163) + : +- ReusedExchange (166) + +- ReusedExchange (169) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] + +(3) Filter [codegen id : 1] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) + +(4) BroadcastExchange +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [plan_id=1] + +(5) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(6) ColumnarToRow +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(7) Filter +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(8) Project +Output [2]: [sr_item_sk#13, sr_ticket_number#14] +Input [3]: [sr_item_sk#13, sr_ticket_number#14, sr_returned_date_sk#15] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [2]: [ss_item_sk#1, ss_ticket_number#8] +Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 2] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#13, sr_ticket_number#14] + +(11) Exchange +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(12) Sort [codegen id : 3] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(13) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 4] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(15) Filter [codegen id : 4] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(16) Project [codegen id : 4] +Output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Input [4]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cs_sold_date_sk#19] + +(17) Exchange +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: hashpartitioning(cs_item_sk#16, cs_order_number#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 5] +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Arguments: [cs_item_sk#16 ASC NULLS FIRST, cs_order_number#17 ASC NULLS FIRST], false, 0 + +(19) Scan parquet spark_catalog.default.catalog_returns +Output [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(21) Filter [codegen id : 6] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] +Condition : (isnotnull(cr_item_sk#20) AND isnotnull(cr_order_number#21)) + +(22) Project [codegen id : 6] +Output [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Input [6]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24, cr_returned_date_sk#25] + +(23) Exchange +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: hashpartitioning(cr_item_sk#20, cr_order_number#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) Sort [codegen id : 7] +Input [5]: [cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Arguments: [cr_item_sk#20 ASC NULLS FIRST, cr_order_number#21 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin [codegen id : 8] +Left keys [2]: [cs_item_sk#16, cs_order_number#17] +Right keys [2]: [cr_item_sk#20, cr_order_number#21] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 8] +Output [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#20, cr_order_number#21, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] + +(27) HashAggregate [codegen id : 8] +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#22, cr_reversed_charge#23, cr_store_credit#24] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] +Aggregate Attributes [3]: [sum#26, sum#27, isEmpty#28] +Results [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] + +(28) Exchange +Input [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] +Arguments: hashpartitioning(cs_item_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) HashAggregate [codegen id : 9] +Input [4]: [cs_item_sk#16, sum#29, sum#30, isEmpty#31] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#18))#32, sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))#33] +Results [3]: [cs_item_sk#16, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#18))#32,17,2) AS sale#34, sum(((cr_refunded_cash#22 + cr_reversed_charge#23) + cr_store_credit#24))#33 AS refund#35] + +(30) Filter [codegen id : 9] +Input [3]: [cs_item_sk#16, sale#34, refund#35] +Condition : ((isnotnull(sale#34) AND isnotnull(refund#35)) AND (cast(sale#34 as decimal(21,2)) > (2 * refund#35))) + +(31) Project [codegen id : 9] +Output [1]: [cs_item_sk#16] +Input [3]: [cs_item_sk#16, sale#34, refund#35] + +(32) Sort [codegen id : 9] +Input [1]: [cs_item_sk#16] +Arguments: [cs_item_sk#16 ASC NULLS FIRST], false, 0 + +(33) SortMergeJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [cs_item_sk#16] +Join type: Inner +Join condition: None + +(34) Project [codegen id : 25] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#16] + +(35) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#36, d_year#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [2]: [d_date_sk#36, d_year#37] + +(37) Filter [codegen id : 10] +Input [2]: [d_date_sk#36, d_year#37] +Condition : ((isnotnull(d_year#37) AND (d_year#37 = 1999)) AND isnotnull(d_date_sk#36)) + +(38) BroadcastExchange +Input [2]: [d_date_sk#36, d_year#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#12] +Right keys [1]: [d_date_sk#36] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 25] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#36, d_year#37] + +(41) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 11] +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] + +(43) Filter [codegen id : 11] +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Condition : ((isnotnull(s_store_sk#38) AND isnotnull(s_store_name#39)) AND isnotnull(s_zip#40)) + +(44) BroadcastExchange +Input [3]: [s_store_sk#38, s_store_name#39, s_zip#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(45) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#38] +Join type: Inner +Join condition: None + +(46) Project [codegen id : 25] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_sk#38, s_store_name#39, s_zip#40] + +(47) Scan parquet spark_catalog.default.customer +Output [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 12] +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(49) Filter [codegen id : 12] +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Condition : (((((isnotnull(c_customer_sk#41) AND isnotnull(c_first_sales_date_sk#46)) AND isnotnull(c_first_shipto_date_sk#45)) AND isnotnull(c_current_cdemo_sk#42)) AND isnotnull(c_current_hdemo_sk#43)) AND isnotnull(c_current_addr_sk#44)) + +(50) BroadcastExchange +Input [6]: [c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(51) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#41] +Join type: Inner +Join condition: None + +(52) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_customer_sk#41, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46] + +(53) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#47, d_year#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 13] +Input [2]: [d_date_sk#47, d_year#48] + +(55) Filter [codegen id : 13] +Input [2]: [d_date_sk#47, d_year#48] +Condition : isnotnull(d_date_sk#47) + +(56) BroadcastExchange +Input [2]: [d_date_sk#47, d_year#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(57) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_first_sales_date_sk#46] +Right keys [1]: [d_date_sk#47] +Join type: Inner +Join condition: None + +(58) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, d_year#48] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, c_first_sales_date_sk#46, d_date_sk#47, d_year#48] + +(59) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#49, d_year#50] + +(60) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_first_shipto_date_sk#45] +Right keys [1]: [d_date_sk#49] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, c_first_shipto_date_sk#45, d_year#48, d_date_sk#49, d_year#50] + +(62) Scan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#51, cd_marital_status#52] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 15] +Input [2]: [cd_demo_sk#51, cd_marital_status#52] + +(64) Filter [codegen id : 15] +Input [2]: [cd_demo_sk#51, cd_marital_status#52] +Condition : (isnotnull(cd_demo_sk#51) AND isnotnull(cd_marital_status#52)) + +(65) BroadcastExchange +Input [2]: [cd_demo_sk#51, cd_marital_status#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(66) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#51] +Join type: Inner +Join condition: None + +(67) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_marital_status#52] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_demo_sk#51, cd_marital_status#52] + +(68) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#53, cd_marital_status#54] + +(69) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_cdemo_sk#42] +Right keys [1]: [cd_demo_sk#53] +Join type: Inner +Join condition: NOT (cd_marital_status#52 = cd_marital_status#54) + +(70) Project [codegen id : 25] +Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_cdemo_sk#42, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, cd_marital_status#52, cd_demo_sk#53, cd_marital_status#54] + +(71) Scan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#55] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 17] +Input [1]: [p_promo_sk#55] + +(73) Filter [codegen id : 17] +Input [1]: [p_promo_sk#55] +Condition : isnotnull(p_promo_sk#55) + +(74) BroadcastExchange +Input [1]: [p_promo_sk#55] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=11] + +(75) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_promo_sk#7] +Right keys [1]: [p_promo_sk#55] +Join type: Inner +Join condition: None + +(76) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, p_promo_sk#55] + +(77) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(78) ColumnarToRow [codegen id : 18] +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] + +(79) Filter [codegen id : 18] +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Condition : (isnotnull(hd_demo_sk#56) AND isnotnull(hd_income_band_sk#57)) + +(80) BroadcastExchange +Input [2]: [hd_demo_sk#56, hd_income_band_sk#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(81) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_hdemo_sk#4] +Right keys [1]: [hd_demo_sk#56] +Join type: Inner +Join condition: None + +(82) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_demo_sk#56, hd_income_band_sk#57] + +(83) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#58, hd_income_band_sk#59] + +(84) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_hdemo_sk#43] +Right keys [1]: [hd_demo_sk#58] +Join type: Inner +Join condition: None + +(85) Project [codegen id : 25] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59] +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_hdemo_sk#43, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_demo_sk#58, hd_income_band_sk#59] + +(86) Scan parquet spark_catalog.default.customer_address +Output [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(87) ColumnarToRow [codegen id : 20] +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(88) Filter [codegen id : 20] +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Condition : isnotnull(ca_address_sk#60) + +(89) BroadcastExchange +Input [5]: [ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] + +(90) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_addr_sk#5] +Right keys [1]: [ca_address_sk#60] +Join type: Inner +Join condition: None + +(91) Project [codegen id : 25] +Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_address_sk#60, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64] + +(92) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#65, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] + +(93) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_current_addr_sk#44] +Right keys [1]: [ca_address_sk#65] +Join type: Inner +Join condition: None + +(94) Project [codegen id : 25] +Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, c_current_addr_sk#44, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_address_sk#65, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] + +(95) Scan parquet spark_catalog.default.income_band +Output [1]: [ib_income_band_sk#70] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 22] +Input [1]: [ib_income_band_sk#70] + +(97) Filter [codegen id : 22] +Input [1]: [ib_income_band_sk#70] +Condition : isnotnull(ib_income_band_sk#70) + +(98) BroadcastExchange +Input [1]: [ib_income_band_sk#70] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] + +(99) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [hd_income_band_sk#57] +Right keys [1]: [ib_income_band_sk#70] +Join type: Inner +Join condition: None + +(100) Project [codegen id : 25] +Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#57, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, ib_income_band_sk#70] + +(101) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#71] + +(102) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [hd_income_band_sk#59] +Right keys [1]: [ib_income_band_sk#71] +Join type: Inner +Join condition: None + +(103) Project [codegen id : 25] +Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, hd_income_band_sk#59, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, ib_income_band_sk#71] + +(104) Scan parquet spark_catalog.default.item +Output [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood ,floral ,indian ,medium ,purple ,spring ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(105) ColumnarToRow [codegen id : 24] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] + +(106) Filter [codegen id : 24] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] +Condition : ((((((isnotnull(i_current_price#73) AND i_color#74 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#73 >= 64.00)) AND (i_current_price#73 <= 74.00)) AND (i_current_price#73 >= 65.00)) AND (i_current_price#73 <= 79.00)) AND isnotnull(i_item_sk#72)) + +(107) Project [codegen id : 24] +Output [2]: [i_item_sk#72, i_product_name#75] +Input [4]: [i_item_sk#72, i_current_price#73, i_color#74, i_product_name#75] + +(108) BroadcastExchange +Input [2]: [i_item_sk#72, i_product_name#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15] + +(109) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#72] +Join type: Inner +Join condition: None + +(110) Project [codegen id : 25] +Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, d_year#48, d_year#50, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, s_store_name#39, s_zip#40, d_year#48, d_year#50, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] + +(111) HashAggregate [codegen id : 25] +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#37, d_year#48, d_year#50, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, i_item_sk#72, i_product_name#75] +Keys [15]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] +Aggregate Attributes [4]: [count#76, sum#77, sum#78, sum#79] +Results [19]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50, count#80, sum#81, sum#82, sum#83] + +(112) HashAggregate [codegen id : 25] +Input [19]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50, count#80, sum#81, sum#82, sum#83] +Keys [15]: [i_product_name#75, i_item_sk#72, s_store_name#39, s_zip#40, ca_street_number#61, ca_street_name#62, ca_city#63, ca_zip#64, ca_street_number#66, ca_street_name#67, ca_city#68, ca_zip#69, d_year#37, d_year#48, d_year#50] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] +Aggregate Attributes [4]: [count(1)#84, sum(UnscaledValue(ss_wholesale_cost#9))#85, sum(UnscaledValue(ss_list_price#10))#86, sum(UnscaledValue(ss_coupon_amt#11))#87] +Results [17]: [i_product_name#75 AS product_name#88, i_item_sk#72 AS item_sk#89, s_store_name#39 AS store_name#90, s_zip#40 AS store_zip#91, ca_street_number#61 AS b_street_number#92, ca_street_name#62 AS b_streen_name#93, ca_city#63 AS b_city#94, ca_zip#64 AS b_zip#95, ca_street_number#66 AS c_street_number#96, ca_street_name#67 AS c_street_name#97, ca_city#68 AS c_city#98, ca_zip#69 AS c_zip#99, d_year#37 AS syear#100, count(1)#84 AS cnt#101, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#85,17,2) AS s1#102, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#86,17,2) AS s2#103, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#87,17,2) AS s3#104] + +(113) Exchange +Input [17]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104] +Arguments: hashpartitioning(item_sk#89, store_name#90, store_zip#91, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(114) Sort [codegen id : 26] +Input [17]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104] +Arguments: [item_sk#89 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, store_zip#91 ASC NULLS FIRST], false, 0 + +(115) ReusedExchange [Reuses operator id: 11] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] + +(116) Sort [codegen id : 29] +Input [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] +Arguments: [ss_item_sk#105 ASC NULLS FIRST], false, 0 + +(117) ReusedExchange [Reuses operator id: 28] +Output [4]: [cs_item_sk#116, sum#117, sum#118, isEmpty#119] + +(118) HashAggregate [codegen id : 35] +Input [4]: [cs_item_sk#116, sum#117, sum#118, isEmpty#119] +Keys [1]: [cs_item_sk#116] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#120)), sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#120))#32, sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))#33] +Results [3]: [cs_item_sk#116, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#120))#32,17,2) AS sale#34, sum(((cr_refunded_cash#121 + cr_reversed_charge#122) + cr_store_credit#123))#33 AS refund#35] + +(119) Filter [codegen id : 35] +Input [3]: [cs_item_sk#116, sale#34, refund#35] +Condition : ((isnotnull(sale#34) AND isnotnull(refund#35)) AND (cast(sale#34 as decimal(21,2)) > (2 * refund#35))) + +(120) Project [codegen id : 35] +Output [1]: [cs_item_sk#116] +Input [3]: [cs_item_sk#116, sale#34, refund#35] + +(121) Sort [codegen id : 35] +Input [1]: [cs_item_sk#116] +Arguments: [cs_item_sk#116 ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin [codegen id : 51] +Left keys [1]: [ss_item_sk#105] +Right keys [1]: [cs_item_sk#116] +Join type: Inner +Join condition: None + +(123) Project [codegen id : 51] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115] +Input [12]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115, cs_item_sk#116] + +(124) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#124, d_year#125] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(125) ColumnarToRow [codegen id : 36] +Input [2]: [d_date_sk#124, d_year#125] + +(126) Filter [codegen id : 36] +Input [2]: [d_date_sk#124, d_year#125] +Condition : ((isnotnull(d_year#125) AND (d_year#125 = 2000)) AND isnotnull(d_date_sk#124)) + +(127) BroadcastExchange +Input [2]: [d_date_sk#124, d_year#125] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=17] + +(128) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_sold_date_sk#115] +Right keys [1]: [d_date_sk#124] +Join type: Inner +Join condition: None + +(129) Project [codegen id : 51] +Output [11]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125] +Input [13]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, ss_sold_date_sk#115, d_date_sk#124, d_year#125] + +(130) ReusedExchange [Reuses operator id: 44] +Output [3]: [s_store_sk#126, s_store_name#127, s_zip#128] + +(131) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_store_sk#110] +Right keys [1]: [s_store_sk#126] +Join type: Inner +Join condition: None + +(132) Project [codegen id : 51] +Output [12]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128] +Input [14]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_store_sk#110, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_sk#126, s_store_name#127, s_zip#128] + +(133) ReusedExchange [Reuses operator id: 50] +Output [6]: [c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] + +(134) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_customer_sk#106] +Right keys [1]: [c_customer_sk#129] +Join type: Inner +Join condition: None + +(135) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] +Input [18]: [ss_item_sk#105, ss_customer_sk#106, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134] + +(136) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#135, d_year#136] + +(137) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_first_sales_date_sk#134] +Right keys [1]: [d_date_sk#135] +Join type: Inner +Join condition: None + +(138) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134, d_date_sk#135, d_year#136] + +(139) ReusedExchange [Reuses operator id: 56] +Output [2]: [d_date_sk#137, d_year#138] + +(140) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_first_shipto_date_sk#133] +Right keys [1]: [d_date_sk#137] +Join type: Inner +Join condition: None + +(141) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136, d_date_sk#137, d_year#138] + +(142) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#139, cd_marital_status#140] + +(143) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_cdemo_sk#107] +Right keys [1]: [cd_demo_sk#139] +Join type: Inner +Join condition: None + +(144) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140] +Input [18]: [ss_item_sk#105, ss_cdemo_sk#107, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_demo_sk#139, cd_marital_status#140] + +(145) ReusedExchange [Reuses operator id: 65] +Output [2]: [cd_demo_sk#141, cd_marital_status#142] + +(146) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_cdemo_sk#130] +Right keys [1]: [cd_demo_sk#141] +Join type: Inner +Join condition: NOT (cd_marital_status#140 = cd_marital_status#142) + +(147) Project [codegen id : 51] +Output [14]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [18]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140, cd_demo_sk#141, cd_marital_status#142] + +(148) ReusedExchange [Reuses operator id: 74] +Output [1]: [p_promo_sk#143] + +(149) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_promo_sk#111] +Right keys [1]: [p_promo_sk#143] +Join type: Inner +Join condition: None + +(150) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138] +Input [15]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_promo_sk#111, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, p_promo_sk#143] + +(151) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#144, hd_income_band_sk#145] + +(152) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_hdemo_sk#108] +Right keys [1]: [hd_demo_sk#144] +Join type: Inner +Join condition: None + +(153) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145] +Input [15]: [ss_item_sk#105, ss_hdemo_sk#108, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_demo_sk#144, hd_income_band_sk#145] + +(154) ReusedExchange [Reuses operator id: 80] +Output [2]: [hd_demo_sk#146, hd_income_band_sk#147] + +(155) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_hdemo_sk#131] +Right keys [1]: [hd_demo_sk#146] +Join type: Inner +Join condition: None + +(156) Project [codegen id : 51] +Output [13]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147] +Input [15]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_demo_sk#146, hd_income_band_sk#147] + +(157) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] + +(158) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_addr_sk#109] +Right keys [1]: [ca_address_sk#148] +Join type: Inner +Join condition: None + +(159) Project [codegen id : 51] +Output [16]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] +Input [18]: [ss_item_sk#105, ss_addr_sk#109, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152] + +(160) ReusedExchange [Reuses operator id: 89] +Output [5]: [ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] + +(161) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [c_current_addr_sk#132] +Right keys [1]: [ca_address_sk#153] +Join type: Inner +Join condition: None + +(162) Project [codegen id : 51] +Output [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [21]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] + +(163) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#158] + +(164) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [hd_income_band_sk#145] +Right keys [1]: [ib_income_band_sk#158] +Join type: Inner +Join condition: None + +(165) Project [codegen id : 51] +Output [18]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [20]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#158] + +(166) ReusedExchange [Reuses operator id: 98] +Output [1]: [ib_income_band_sk#159] + +(167) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [hd_income_band_sk#147] +Right keys [1]: [ib_income_band_sk#159] +Join type: Inner +Join condition: None + +(168) Project [codegen id : 51] +Output [17]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157] +Input [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#159] + +(169) ReusedExchange [Reuses operator id: 108] +Output [2]: [i_item_sk#160, i_product_name#161] + +(170) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [ss_item_sk#105] +Right keys [1]: [i_item_sk#160] +Join type: Inner +Join condition: None + +(171) Project [codegen id : 51] +Output [18]: [ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] +Input [19]: [ss_item_sk#105, ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] + +(172) HashAggregate [codegen id : 51] +Input [18]: [ss_wholesale_cost#112, ss_list_price#113, ss_coupon_amt#114, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161] +Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#112)), partial_sum(UnscaledValue(ss_list_price#113)), partial_sum(UnscaledValue(ss_coupon_amt#114))] +Aggregate Attributes [4]: [count#76, sum#162, sum#163, sum#164] +Results [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#80, sum#165, sum#166, sum#167] + +(173) HashAggregate [codegen id : 51] +Input [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#80, sum#165, sum#166, sum#167] +Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#112)), sum(UnscaledValue(ss_list_price#113)), sum(UnscaledValue(ss_coupon_amt#114))] +Aggregate Attributes [4]: [count(1)#84, sum(UnscaledValue(ss_wholesale_cost#112))#85, sum(UnscaledValue(ss_list_price#113))#86, sum(UnscaledValue(ss_coupon_amt#114))#87] +Results [8]: [i_item_sk#160 AS item_sk#168, s_store_name#127 AS store_name#169, s_zip#128 AS store_zip#170, d_year#125 AS syear#171, count(1)#84 AS cnt#172, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#112))#85,17,2) AS s1#173, MakeDecimal(sum(UnscaledValue(ss_list_price#113))#86,17,2) AS s2#174, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#114))#87,17,2) AS s3#175] + +(174) Exchange +Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] +Arguments: hashpartitioning(item_sk#168, store_name#169, store_zip#170, 5), ENSURE_REQUIREMENTS, [plan_id=18] + +(175) Sort [codegen id : 52] +Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] +Arguments: [item_sk#168 ASC NULLS FIRST, store_name#169 ASC NULLS FIRST, store_zip#170 ASC NULLS FIRST], false, 0 + +(176) SortMergeJoin [codegen id : 53] +Left keys [3]: [item_sk#89, store_name#90, store_zip#91] +Right keys [3]: [item_sk#168, store_name#169, store_zip#170] +Join type: Inner +Join condition: (cnt#172 <= cnt#101) + +(177) Project [codegen id : 53] +Output [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Input [25]: [product_name#88, item_sk#89, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175] + +(178) Exchange +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Arguments: rangepartitioning(product_name#88 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST, s1#102 ASC NULLS FIRST, s1#173 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=19] + +(179) Sort [codegen id : 54] +Input [21]: [product_name#88, store_name#90, store_zip#91, b_street_number#92, b_streen_name#93, b_city#94, b_zip#95, c_street_number#96, c_street_name#97, c_city#98, c_zip#99, syear#100, cnt#101, s1#102, s2#103, s3#104, s1#173, s2#174, s3#175, syear#171, cnt#172] +Arguments: [product_name#88 ASC NULLS FIRST, store_name#90 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST, s1#102 ASC NULLS FIRST, s1#173 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..c010e0aea0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.native_iceberg_compat/simplified.txt @@ -0,0 +1,270 @@ +WholeStageCodegen (54) + Sort [product_name,store_name,cnt,s1,s1] + InputAdapter + Exchange [product_name,store_name,cnt,s1,s1] #1 + WholeStageCodegen (53) + Project [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + SortMergeJoin [item_sk,store_name,store_zip,item_sk,store_name,store_zip,cnt,cnt] + InputAdapter + WholeStageCodegen (26) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #2 + WholeStageCodegen (25) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + SortMergeJoin [ss_item_sk,cs_item_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #3 + WholeStageCodegen (2) + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + WholeStageCodegen (9) + Sort [cs_item_sk] + Project [cs_item_sk] + Filter [sale,refund] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (8) + HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty] + Project [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (5) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #6 + WholeStageCodegen (4) + Project [cs_item_sk,cs_order_number,cs_ext_list_price] + Filter [cs_item_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (7) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #7 + WholeStageCodegen (6) + Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (10) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (11) + Filter [s_store_sk,s_store_name,s_zip] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (12) + Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (13) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (15) + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (17) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (18) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen (20) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (22) + Filter [ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.income_band [ib_income_band_sk] + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (24) + Project [i_item_sk,i_product_name] + Filter [i_current_price,i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color,i_product_name] + InputAdapter + WholeStageCodegen (52) + Sort [item_sk,store_name,store_zip] + InputAdapter + Exchange [item_sk,store_name,store_zip] #18 + WholeStageCodegen (51) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] + SortMergeJoin [ss_item_sk,cs_item_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk] #3 + InputAdapter + WholeStageCodegen (35) + Sort [cs_item_sk] + Project [cs_item_sk] + Filter [sale,refund] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty] + InputAdapter + ReusedExchange [cs_item_sk,sum,sum,isEmpty] #5 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (36) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_zip] #9 + InputAdapter + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #10 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [d_date_sk,d_year] #11 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] #12 + InputAdapter + ReusedExchange [p_promo_sk] #13 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] #14 + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15 + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + ReusedExchange [ib_income_band_sk] #16 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] #17 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_datafusion/explain.txt new file mode 100644 index 0000000000..3a48ca867b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_datafusion/explain.txt @@ -0,0 +1,441 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Filter (70) + +- Window (69) + +- * Sort (68) + +- Exchange (67) + +- Union (66) + :- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.item (17) + :- * HashAggregate (30) + : +- Exchange (29) + : +- * HashAggregate (28) + : +- * HashAggregate (27) + : +- ReusedExchange (26) + :- * HashAggregate (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- * HashAggregate (32) + : +- ReusedExchange (31) + :- * HashAggregate (40) + : +- Exchange (39) + : +- * HashAggregate (38) + : +- * HashAggregate (37) + : +- ReusedExchange (36) + :- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * HashAggregate (42) + : +- ReusedExchange (41) + :- * HashAggregate (50) + : +- Exchange (49) + : +- * HashAggregate (48) + : +- * HashAggregate (47) + : +- ReusedExchange (46) + :- * HashAggregate (55) + : +- Exchange (54) + : +- * HashAggregate (53) + : +- * HashAggregate (52) + : +- ReusedExchange (51) + :- * HashAggregate (60) + : +- Exchange (59) + : +- * HashAggregate (58) + : +- * HashAggregate (57) + : +- ReusedExchange (56) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * HashAggregate (62) + +- ReusedExchange (61) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(6) Filter [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1212)) AND (d_month_seq#7 <= 1223)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(8) BroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_store_id#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#11, s_store_id#12] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#11, s_store_id#12] +Condition : isnotnull(s_store_sk#11) + +(14) BroadcastExchange +Input [2]: [s_store_sk#11, s_store_id#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] + +(17) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(19) Filter [codegen id : 3] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) + +(20) BroadcastExchange +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#13] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(23) HashAggregate [codegen id : 4] +Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] + +(24) Exchange +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 as decimal(38,2)) AS sumsales#23] + +(26) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25] + +(27) HashAggregate [codegen id : 10] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(28) HashAggregate [codegen id : 10] +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sumsales#26] +Keys [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#27, isEmpty#28] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] + +(29) Exchange +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(30) HashAggregate [codegen id : 11] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] +Keys [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#31] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null AS s_store_id#32, sum(sumsales#26)#31 AS sumsales#33] + +(31) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#34, isEmpty#35] + +(32) HashAggregate [codegen id : 16] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#34, isEmpty#35] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(33) HashAggregate [codegen id : 16] +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sumsales#26] +Keys [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] + +(34) Exchange +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) HashAggregate [codegen id : 17] +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] +Keys [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#40] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null AS d_moy#41, null AS s_store_id#42, sum(sumsales#26)#40 AS sumsales#43] + +(36) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#44, isEmpty#45] + +(37) HashAggregate [codegen id : 22] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#44, isEmpty#45] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(38) HashAggregate [codegen id : 22] +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sumsales#26] +Keys [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#46, isEmpty#47] +Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] + +(39) Exchange +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(40) HashAggregate [codegen id : 23] +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] +Keys [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#50] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null AS d_qoy#51, null AS d_moy#52, null AS s_store_id#53, sum(sumsales#26)#50 AS sumsales#54] + +(41) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#55, isEmpty#56] + +(42) HashAggregate [codegen id : 28] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#55, isEmpty#56] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(43) HashAggregate [codegen id : 28] +Input [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sumsales#26] +Keys [4]: [i_category#16, i_class#15, i_brand#14, i_product_name#17] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#57, isEmpty#58] +Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] + +(44) Exchange +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(45) HashAggregate [codegen id : 29] +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] +Keys [4]: [i_category#16, i_class#15, i_brand#14, i_product_name#17] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#61] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, null AS d_year#62, null AS d_qoy#63, null AS d_moy#64, null AS s_store_id#65, sum(sumsales#26)#61 AS sumsales#66] + +(46) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#67, isEmpty#68] + +(47) HashAggregate [codegen id : 34] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#67, isEmpty#68] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [4]: [i_category#16, i_class#15, i_brand#14, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(48) HashAggregate [codegen id : 34] +Input [4]: [i_category#16, i_class#15, i_brand#14, sumsales#26] +Keys [3]: [i_category#16, i_class#15, i_brand#14] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#69, isEmpty#70] +Results [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] + +(49) Exchange +Input [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(50) HashAggregate [codegen id : 35] +Input [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] +Keys [3]: [i_category#16, i_class#15, i_brand#14] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#73] +Results [9]: [i_category#16, i_class#15, i_brand#14, null AS i_product_name#74, null AS d_year#75, null AS d_qoy#76, null AS d_moy#77, null AS s_store_id#78, sum(sumsales#26)#73 AS sumsales#79] + +(51) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#80, isEmpty#81] + +(52) HashAggregate [codegen id : 40] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#80, isEmpty#81] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [3]: [i_category#16, i_class#15, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(53) HashAggregate [codegen id : 40] +Input [3]: [i_category#16, i_class#15, sumsales#26] +Keys [2]: [i_category#16, i_class#15] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#82, isEmpty#83] +Results [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] + +(54) Exchange +Input [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] +Arguments: hashpartitioning(i_category#16, i_class#15, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(55) HashAggregate [codegen id : 41] +Input [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] +Keys [2]: [i_category#16, i_class#15] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#86] +Results [9]: [i_category#16, i_class#15, null AS i_brand#87, null AS i_product_name#88, null AS d_year#89, null AS d_qoy#90, null AS d_moy#91, null AS s_store_id#92, sum(sumsales#26)#86 AS sumsales#93] + +(56) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#94, isEmpty#95] + +(57) HashAggregate [codegen id : 46] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#94, isEmpty#95] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [2]: [i_category#16, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(58) HashAggregate [codegen id : 46] +Input [2]: [i_category#16, sumsales#26] +Keys [1]: [i_category#16] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#96, isEmpty#97] +Results [3]: [i_category#16, sum#98, isEmpty#99] + +(59) Exchange +Input [3]: [i_category#16, sum#98, isEmpty#99] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(60) HashAggregate [codegen id : 47] +Input [3]: [i_category#16, sum#98, isEmpty#99] +Keys [1]: [i_category#16] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#100] +Results [9]: [i_category#16, null AS i_class#101, null AS i_brand#102, null AS i_product_name#103, null AS d_year#104, null AS d_qoy#105, null AS d_moy#106, null AS s_store_id#107, sum(sumsales#26)#100 AS sumsales#108] + +(61) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#109, isEmpty#110] + +(62) HashAggregate [codegen id : 52] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#109, isEmpty#110] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(63) HashAggregate [codegen id : 52] +Input [1]: [sumsales#26] +Keys: [] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#111, isEmpty#112] +Results [2]: [sum#113, isEmpty#114] + +(64) Exchange +Input [2]: [sum#113, isEmpty#114] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] + +(65) HashAggregate [codegen id : 53] +Input [2]: [sum#113, isEmpty#114] +Keys: [] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#115] +Results [9]: [null AS i_category#116, null AS i_class#117, null AS i_brand#118, null AS i_product_name#119, null AS d_year#120, null AS d_qoy#121, null AS d_moy#122, null AS s_store_id#123, sum(sumsales#26)#115 AS sumsales#124] + +(66) Union + +(67) Exchange +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(68) Sort [codegen id : 54] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: [i_category#16 ASC NULLS FIRST, sumsales#23 DESC NULLS LAST], false, 0 + +(69) Window +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: [rank(sumsales#23) windowspecdefinition(i_category#16, sumsales#23 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#125], [i_category#16], [sumsales#23 DESC NULLS LAST] + +(70) Filter [codegen id : 55] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] +Condition : (rk#125 <= 100) + +(71) TakeOrderedAndProject +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] +Arguments: 100, [i_category#16 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_product_name#17 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, d_moy#9 ASC NULLS FIRST, s_store_id#12 ASC NULLS FIRST, sumsales#23 ASC NULLS FIRST, rk#125 ASC NULLS FIRST], [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..51f848bf7b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_datafusion/simplified.txt @@ -0,0 +1,120 @@ +TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] + WholeStageCodegen (55) + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WholeStageCodegen (54) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + Union + WholeStageCodegen (5) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,ss_sales_price,ss_quantity] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_year,d_moy,d_qoy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + WholeStageCodegen (11) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sum,isEmpty] [sum(sumsales),s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy] #6 + WholeStageCodegen (10) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (17) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sum,isEmpty] [sum(sumsales),d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy] #7 + WholeStageCodegen (16) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (23) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sum,isEmpty] [sum(sumsales),d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year] #8 + WholeStageCodegen (22) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (29) + HashAggregate [i_category,i_class,i_brand,i_product_name,sum,isEmpty] [sum(sumsales),d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name] #9 + WholeStageCodegen (28) + HashAggregate [i_category,i_class,i_brand,i_product_name,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (35) + HashAggregate [i_category,i_class,i_brand,sum,isEmpty] [sum(sumsales),i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand] #10 + WholeStageCodegen (34) + HashAggregate [i_category,i_class,i_brand,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (41) + HashAggregate [i_category,i_class,sum,isEmpty] [sum(sumsales),i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class] #11 + WholeStageCodegen (40) + HashAggregate [i_category,i_class,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (47) + HashAggregate [i_category,sum,isEmpty] [sum(sumsales),i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category] #12 + WholeStageCodegen (46) + HashAggregate [i_category,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (53) + HashAggregate [sum,isEmpty] [sum(sumsales),i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange #13 + WholeStageCodegen (52) + HashAggregate [sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..3a48ca867b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_iceberg_compat/explain.txt @@ -0,0 +1,441 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Filter (70) + +- Window (69) + +- * Sort (68) + +- Exchange (67) + +- Union (66) + :- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet spark_catalog.default.item (17) + :- * HashAggregate (30) + : +- Exchange (29) + : +- * HashAggregate (28) + : +- * HashAggregate (27) + : +- ReusedExchange (26) + :- * HashAggregate (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- * HashAggregate (32) + : +- ReusedExchange (31) + :- * HashAggregate (40) + : +- Exchange (39) + : +- * HashAggregate (38) + : +- * HashAggregate (37) + : +- ReusedExchange (36) + :- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * HashAggregate (42) + : +- ReusedExchange (41) + :- * HashAggregate (50) + : +- Exchange (49) + : +- * HashAggregate (48) + : +- * HashAggregate (47) + : +- ReusedExchange (46) + :- * HashAggregate (55) + : +- Exchange (54) + : +- * HashAggregate (53) + : +- * HashAggregate (52) + : +- ReusedExchange (51) + :- * HashAggregate (60) + : +- Exchange (59) + : +- * HashAggregate (58) + : +- * HashAggregate (57) + : +- ReusedExchange (56) + +- * HashAggregate (65) + +- Exchange (64) + +- * HashAggregate (63) + +- * HashAggregate (62) + +- ReusedExchange (61) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#5)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) + +(4) Scan parquet spark_catalog.default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(6) Filter [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1212)) AND (d_month_seq#7 <= 1223)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(8) BroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(11) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#11, s_store_id#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#11, s_store_id#12] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#11, s_store_id#12] +Condition : isnotnull(s_store_sk#11) + +(14) BroadcastExchange +Input [2]: [s_store_sk#11, s_store_id#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#11] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 4] +Output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] + +(17) Scan parquet spark_catalog.default.item +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(19) Filter [codegen id : 3] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) + +(20) BroadcastExchange +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#13] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] + +(23) HashAggregate [codegen id : 4] +Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] + +(24) Exchange +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(25) HashAggregate [codegen id : 5] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 as decimal(38,2)) AS sumsales#23] + +(26) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25] + +(27) HashAggregate [codegen id : 10] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(28) HashAggregate [codegen id : 10] +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sumsales#26] +Keys [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#27, isEmpty#28] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] + +(29) Exchange +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(30) HashAggregate [codegen id : 11] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] +Keys [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#31] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null AS s_store_id#32, sum(sumsales#26)#31 AS sumsales#33] + +(31) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#34, isEmpty#35] + +(32) HashAggregate [codegen id : 16] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#34, isEmpty#35] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(33) HashAggregate [codegen id : 16] +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sumsales#26] +Keys [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] + +(34) Exchange +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) HashAggregate [codegen id : 17] +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] +Keys [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#40] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null AS d_moy#41, null AS s_store_id#42, sum(sumsales#26)#40 AS sumsales#43] + +(36) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#44, isEmpty#45] + +(37) HashAggregate [codegen id : 22] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#44, isEmpty#45] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(38) HashAggregate [codegen id : 22] +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sumsales#26] +Keys [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#46, isEmpty#47] +Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] + +(39) Exchange +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(40) HashAggregate [codegen id : 23] +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] +Keys [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#50] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null AS d_qoy#51, null AS d_moy#52, null AS s_store_id#53, sum(sumsales#26)#50 AS sumsales#54] + +(41) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#55, isEmpty#56] + +(42) HashAggregate [codegen id : 28] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#55, isEmpty#56] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(43) HashAggregate [codegen id : 28] +Input [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sumsales#26] +Keys [4]: [i_category#16, i_class#15, i_brand#14, i_product_name#17] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#57, isEmpty#58] +Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] + +(44) Exchange +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(45) HashAggregate [codegen id : 29] +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] +Keys [4]: [i_category#16, i_class#15, i_brand#14, i_product_name#17] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#61] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, null AS d_year#62, null AS d_qoy#63, null AS d_moy#64, null AS s_store_id#65, sum(sumsales#26)#61 AS sumsales#66] + +(46) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#67, isEmpty#68] + +(47) HashAggregate [codegen id : 34] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#67, isEmpty#68] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [4]: [i_category#16, i_class#15, i_brand#14, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(48) HashAggregate [codegen id : 34] +Input [4]: [i_category#16, i_class#15, i_brand#14, sumsales#26] +Keys [3]: [i_category#16, i_class#15, i_brand#14] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#69, isEmpty#70] +Results [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] + +(49) Exchange +Input [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(50) HashAggregate [codegen id : 35] +Input [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] +Keys [3]: [i_category#16, i_class#15, i_brand#14] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#73] +Results [9]: [i_category#16, i_class#15, i_brand#14, null AS i_product_name#74, null AS d_year#75, null AS d_qoy#76, null AS d_moy#77, null AS s_store_id#78, sum(sumsales#26)#73 AS sumsales#79] + +(51) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#80, isEmpty#81] + +(52) HashAggregate [codegen id : 40] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#80, isEmpty#81] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [3]: [i_category#16, i_class#15, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(53) HashAggregate [codegen id : 40] +Input [3]: [i_category#16, i_class#15, sumsales#26] +Keys [2]: [i_category#16, i_class#15] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#82, isEmpty#83] +Results [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] + +(54) Exchange +Input [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] +Arguments: hashpartitioning(i_category#16, i_class#15, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(55) HashAggregate [codegen id : 41] +Input [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] +Keys [2]: [i_category#16, i_class#15] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#86] +Results [9]: [i_category#16, i_class#15, null AS i_brand#87, null AS i_product_name#88, null AS d_year#89, null AS d_qoy#90, null AS d_moy#91, null AS s_store_id#92, sum(sumsales#26)#86 AS sumsales#93] + +(56) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#94, isEmpty#95] + +(57) HashAggregate [codegen id : 46] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#94, isEmpty#95] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [2]: [i_category#16, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(58) HashAggregate [codegen id : 46] +Input [2]: [i_category#16, sumsales#26] +Keys [1]: [i_category#16] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#96, isEmpty#97] +Results [3]: [i_category#16, sum#98, isEmpty#99] + +(59) Exchange +Input [3]: [i_category#16, sum#98, isEmpty#99] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(60) HashAggregate [codegen id : 47] +Input [3]: [i_category#16, sum#98, isEmpty#99] +Keys [1]: [i_category#16] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#100] +Results [9]: [i_category#16, null AS i_class#101, null AS i_brand#102, null AS i_product_name#103, null AS d_year#104, null AS d_qoy#105, null AS d_moy#106, null AS s_store_id#107, sum(sumsales#26)#100 AS sumsales#108] + +(61) ReusedExchange [Reuses operator id: 24] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#109, isEmpty#110] + +(62) HashAggregate [codegen id : 52] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#109, isEmpty#110] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))] +Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22] +Results [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26] + +(63) HashAggregate [codegen id : 52] +Input [1]: [sumsales#26] +Keys: [] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#111, isEmpty#112] +Results [2]: [sum#113, isEmpty#114] + +(64) Exchange +Input [2]: [sum#113, isEmpty#114] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] + +(65) HashAggregate [codegen id : 53] +Input [2]: [sum#113, isEmpty#114] +Keys: [] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#115] +Results [9]: [null AS i_category#116, null AS i_class#117, null AS i_brand#118, null AS i_product_name#119, null AS d_year#120, null AS d_qoy#121, null AS d_moy#122, null AS s_store_id#123, sum(sumsales#26)#115 AS sumsales#124] + +(66) Union + +(67) Exchange +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(68) Sort [codegen id : 54] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: [i_category#16 ASC NULLS FIRST, sumsales#23 DESC NULLS LAST], false, 0 + +(69) Window +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: [rank(sumsales#23) windowspecdefinition(i_category#16, sumsales#23 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#125], [i_category#16], [sumsales#23 DESC NULLS LAST] + +(70) Filter [codegen id : 55] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] +Condition : (rk#125 <= 100) + +(71) TakeOrderedAndProject +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] +Arguments: 100, [i_category#16 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_product_name#17 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, d_moy#9 ASC NULLS FIRST, s_store_id#12 ASC NULLS FIRST, sumsales#23 ASC NULLS FIRST, rk#125 ASC NULLS FIRST], [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..51f848bf7b --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.native_iceberg_compat/simplified.txt @@ -0,0 +1,120 @@ +TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] + WholeStageCodegen (55) + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WholeStageCodegen (54) + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + Union + WholeStageCodegen (5) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,ss_sales_price,ss_quantity] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_year,d_moy,d_qoy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + WholeStageCodegen (11) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sum,isEmpty] [sum(sumsales),s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy] #6 + WholeStageCodegen (10) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (17) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sum,isEmpty] [sum(sumsales),d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy] #7 + WholeStageCodegen (16) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (23) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sum,isEmpty] [sum(sumsales),d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name,d_year] #8 + WholeStageCodegen (22) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (29) + HashAggregate [i_category,i_class,i_brand,i_product_name,sum,isEmpty] [sum(sumsales),d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand,i_product_name] #9 + WholeStageCodegen (28) + HashAggregate [i_category,i_class,i_brand,i_product_name,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (35) + HashAggregate [i_category,i_class,i_brand,sum,isEmpty] [sum(sumsales),i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class,i_brand] #10 + WholeStageCodegen (34) + HashAggregate [i_category,i_class,i_brand,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (41) + HashAggregate [i_category,i_class,sum,isEmpty] [sum(sumsales),i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category,i_class] #11 + WholeStageCodegen (40) + HashAggregate [i_category,i_class,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (47) + HashAggregate [i_category,sum,isEmpty] [sum(sumsales),i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange [i_category] #12 + WholeStageCodegen (46) + HashAggregate [i_category,sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 + WholeStageCodegen (53) + HashAggregate [sum,isEmpty] [sum(sumsales),i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,sum,isEmpty] + InputAdapter + Exchange #13 + WholeStageCodegen (52) + HashAggregate [sumsales] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty] + InputAdapter + ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_datafusion/explain.txt new file mode 100644 index 0000000000..e0548b3cc1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_datafusion/explain.txt @@ -0,0 +1,351 @@ +== Physical Plan == +TakeOrderedAndProject (59) ++- * Project (58) + +- Window (57) + +- * Sort (56) + +- Exchange (55) + +- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- Union (51) + :- * HashAggregate (40) + : +- Exchange (39) + : +- * HashAggregate (38) + : +- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (35) + : +- * BroadcastHashJoin LeftSemi BuildRight (34) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (33) + : +- * Project (32) + : +- * Filter (31) + : +- Window (30) + : +- * Sort (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Filter (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet spark_catalog.default.store_sales (14) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet spark_catalog.default.store (17) + : +- ReusedExchange (23) + :- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * HashAggregate (42) + : +- ReusedExchange (41) + +- * HashAggregate (50) + +- Exchange (49) + +- * HashAggregate (48) + +- * HashAggregate (47) + +- ReusedExchange (46) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 8] +Output [2]: [ss_store_sk#1, ss_net_profit#2] +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#6, s_county#7, s_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 7] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] + +(13) Filter [codegen id : 7] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Condition : isnotnull(s_store_sk#6) + +(14) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#11)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] + +(16) Filter [codegen id : 4] +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_store_sk#9) + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#12, s_state#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_state#13] + +(19) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_state#13] +Condition : isnotnull(s_store_sk#12) + +(20) BroadcastExchange +Input [2]: [s_store_sk#12, s_state#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#9] +Right keys [1]: [s_store_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] +Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#14] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output [2]: [ss_net_profit#10, s_state#13] +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] + +(26) HashAggregate [codegen id : 4] +Input [2]: [ss_net_profit#10, s_state#13] +Keys [1]: [s_state#13] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [s_state#13, sum#16] + +(27) Exchange +Input [2]: [s_state#13, sum#16] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(28) HashAggregate [codegen id : 5] +Input [2]: [s_state#13, sum#16] +Keys [1]: [s_state#13] +Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17] +Results [3]: [s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w0#18, s_state#13] + +(29) Sort [codegen id : 5] +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [s_state#13 ASC NULLS FIRST, _w0#18 DESC NULLS LAST], false, 0 + +(30) Window +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [rank(_w0#18) windowspecdefinition(s_state#13, _w0#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w0#18 DESC NULLS LAST] + +(31) Filter [codegen id : 6] +Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19] +Condition : (ranking#19 <= 5) + +(32) Project [codegen id : 6] +Output [1]: [s_state#13] +Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19] + +(33) BroadcastExchange +Input [1]: [s_state#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] + +(34) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [s_state#8] +Right keys [1]: [s_state#13] +Join type: LeftSemi +Join condition: None + +(35) BroadcastExchange +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(36) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#6] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 8] +Output [3]: [ss_net_profit#2, s_county#7, s_state#8] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] + +(38) HashAggregate [codegen id : 8] +Input [3]: [ss_net_profit#2, s_county#7, s_state#8] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#20] +Results [3]: [s_state#8, s_county#7, sum#21] + +(39) Exchange +Input [3]: [s_state#8, s_county#7, sum#21] +Arguments: hashpartitioning(s_state#8, s_county#7, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(40) HashAggregate [codegen id : 9] +Input [3]: [s_state#8, s_county#7, sum#21] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) as decimal(27,2)) AS total_sum#23, s_state#8, s_county#7, 0 AS g_state#24, 0 AS g_county#25, 0 AS lochierarchy#26] + +(41) ReusedExchange [Reuses operator id: 39] +Output [3]: [s_state#8, s_county#7, sum#27] + +(42) HashAggregate [codegen id : 18] +Input [3]: [s_state#8, s_county#7, sum#27] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) AS total_sum#28, s_state#8] + +(43) HashAggregate [codegen id : 18] +Input [2]: [total_sum#28, s_state#8] +Keys [1]: [s_state#8] +Functions [1]: [partial_sum(total_sum#28)] +Aggregate Attributes [2]: [sum#29, isEmpty#30] +Results [3]: [s_state#8, sum#31, isEmpty#32] + +(44) Exchange +Input [3]: [s_state#8, sum#31, isEmpty#32] +Arguments: hashpartitioning(s_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 19] +Input [3]: [s_state#8, sum#31, isEmpty#32] +Keys [1]: [s_state#8] +Functions [1]: [sum(total_sum#28)] +Aggregate Attributes [1]: [sum(total_sum#28)#33] +Results [6]: [sum(total_sum#28)#33 AS total_sum#34, s_state#8, null AS s_county#35, 0 AS g_state#36, 1 AS g_county#37, 1 AS lochierarchy#38] + +(46) ReusedExchange [Reuses operator id: 39] +Output [3]: [s_state#8, s_county#7, sum#39] + +(47) HashAggregate [codegen id : 28] +Input [3]: [s_state#8, s_county#7, sum#39] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) AS total_sum#28] + +(48) HashAggregate [codegen id : 28] +Input [1]: [total_sum#28] +Keys: [] +Functions [1]: [partial_sum(total_sum#28)] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [2]: [sum#42, isEmpty#43] + +(49) Exchange +Input [2]: [sum#42, isEmpty#43] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(50) HashAggregate [codegen id : 29] +Input [2]: [sum#42, isEmpty#43] +Keys: [] +Functions [1]: [sum(total_sum#28)] +Aggregate Attributes [1]: [sum(total_sum#28)#44] +Results [6]: [sum(total_sum#28)#44 AS total_sum#45, null AS s_state#46, null AS s_county#47, 1 AS g_state#48, 1 AS g_county#49, 2 AS lochierarchy#50] + +(51) Union + +(52) HashAggregate [codegen id : 30] +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Keys [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] + +(53) Exchange +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Arguments: hashpartitioning(total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(54) HashAggregate [codegen id : 31] +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Keys [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, CASE WHEN (g_county#25 = 0) THEN s_state#8 END AS _w0#51] + +(55) Exchange +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: hashpartitioning(lochierarchy#26, _w0#51, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(56) Sort [codegen id : 32] +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#51 ASC NULLS FIRST, total_sum#23 DESC NULLS LAST], false, 0 + +(57) Window +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: [rank(total_sum#23) windowspecdefinition(lochierarchy#26, _w0#51, total_sum#23 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#52], [lochierarchy#26, _w0#51], [total_sum#23 DESC NULLS LAST] + +(58) Project [codegen id : 33] +Output [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] +Input [6]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51, rank_within_parent#52] + +(59) TakeOrderedAndProject +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] +Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN s_state#8 END ASC NULLS FIRST, rank_within_parent#52 ASC NULLS FIRST], [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..4a20e8104c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_datafusion/simplified.txt @@ -0,0 +1,96 @@ +TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] + WholeStageCodegen (33) + Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] + InputAdapter + Window [total_sum,lochierarchy,_w0] + WholeStageCodegen (32) + Sort [lochierarchy,_w0,total_sum] + InputAdapter + Exchange [lochierarchy,_w0] #1 + WholeStageCodegen (31) + HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] [_w0] + InputAdapter + Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #2 + WholeStageCodegen (30) + HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] + InputAdapter + Union + WholeStageCodegen (9) + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum] + InputAdapter + Exchange [s_state,s_county] #3 + WholeStageCodegen (8) + HashAggregate [s_state,s_county,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_county,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + BroadcastHashJoin [s_state,s_state] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w0,s_state] + WholeStageCodegen (5) + Sort [s_state,_w0] + HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),_w0,s_state,sum] + InputAdapter + Exchange [s_state] #7 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_state] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_net_profit,ss_sold_date_sk,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (19) + HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty] + InputAdapter + Exchange [s_state] #9 + WholeStageCodegen (18) + HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] + InputAdapter + ReusedExchange [s_state,s_county,sum] #3 + WholeStageCodegen (29) + HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,s_state,s_county,g_state,g_county,lochierarchy,sum,isEmpty] + InputAdapter + Exchange #10 + WholeStageCodegen (28) + HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] + InputAdapter + ReusedExchange [s_state,s_county,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..e0548b3cc1 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_iceberg_compat/explain.txt @@ -0,0 +1,351 @@ +== Physical Plan == +TakeOrderedAndProject (59) ++- * Project (58) + +- Window (57) + +- * Sort (56) + +- Exchange (55) + +- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- Union (51) + :- * HashAggregate (40) + : +- Exchange (39) + : +- * HashAggregate (38) + : +- * Project (37) + : +- * BroadcastHashJoin Inner BuildRight (36) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (35) + : +- * BroadcastHashJoin LeftSemi BuildRight (34) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet spark_catalog.default.store (11) + : +- BroadcastExchange (33) + : +- * Project (32) + : +- * Filter (31) + : +- Window (30) + : +- * Sort (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Filter (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet spark_catalog.default.store_sales (14) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet spark_catalog.default.store (17) + : +- ReusedExchange (23) + :- * HashAggregate (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- * HashAggregate (42) + : +- ReusedExchange (41) + +- * HashAggregate (50) + +- Exchange (49) + +- * HashAggregate (48) + +- * HashAggregate (47) + +- ReusedExchange (46) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 8] +Input [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 8] +Output [2]: [ss_store_sk#1, ss_net_profit#2] +Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.store +Output [3]: [s_store_sk#6, s_county#7, s_state#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 7] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] + +(13) Filter [codegen id : 7] +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Condition : isnotnull(s_store_sk#6) + +(14) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#11)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] + +(16) Filter [codegen id : 4] +Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_store_sk#9) + +(17) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#12, s_state#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_state#13] + +(19) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_state#13] +Condition : isnotnull(s_store_sk#12) + +(20) BroadcastExchange +Input [2]: [s_store_sk#12, s_state#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#9] +Right keys [1]: [s_store_sk#12] +Join type: Inner +Join condition: None + +(22) Project [codegen id : 4] +Output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] +Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#14] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#14] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 4] +Output [2]: [ss_net_profit#10, s_state#13] +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] + +(26) HashAggregate [codegen id : 4] +Input [2]: [ss_net_profit#10, s_state#13] +Keys [1]: [s_state#13] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [s_state#13, sum#16] + +(27) Exchange +Input [2]: [s_state#13, sum#16] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(28) HashAggregate [codegen id : 5] +Input [2]: [s_state#13, sum#16] +Keys [1]: [s_state#13] +Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17] +Results [3]: [s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w0#18, s_state#13] + +(29) Sort [codegen id : 5] +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [s_state#13 ASC NULLS FIRST, _w0#18 DESC NULLS LAST], false, 0 + +(30) Window +Input [3]: [s_state#13, _w0#18, s_state#13] +Arguments: [rank(_w0#18) windowspecdefinition(s_state#13, _w0#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w0#18 DESC NULLS LAST] + +(31) Filter [codegen id : 6] +Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19] +Condition : (ranking#19 <= 5) + +(32) Project [codegen id : 6] +Output [1]: [s_state#13] +Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19] + +(33) BroadcastExchange +Input [1]: [s_state#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] + +(34) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [s_state#8] +Right keys [1]: [s_state#13] +Join type: LeftSemi +Join condition: None + +(35) BroadcastExchange +Input [3]: [s_store_sk#6, s_county#7, s_state#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] + +(36) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#6] +Join type: Inner +Join condition: None + +(37) Project [codegen id : 8] +Output [3]: [ss_net_profit#2, s_county#7, s_state#8] +Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] + +(38) HashAggregate [codegen id : 8] +Input [3]: [ss_net_profit#2, s_county#7, s_state#8] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum#20] +Results [3]: [s_state#8, s_county#7, sum#21] + +(39) Exchange +Input [3]: [s_state#8, s_county#7, sum#21] +Arguments: hashpartitioning(s_state#8, s_county#7, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(40) HashAggregate [codegen id : 9] +Input [3]: [s_state#8, s_county#7, sum#21] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) as decimal(27,2)) AS total_sum#23, s_state#8, s_county#7, 0 AS g_state#24, 0 AS g_county#25, 0 AS lochierarchy#26] + +(41) ReusedExchange [Reuses operator id: 39] +Output [3]: [s_state#8, s_county#7, sum#27] + +(42) HashAggregate [codegen id : 18] +Input [3]: [s_state#8, s_county#7, sum#27] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) AS total_sum#28, s_state#8] + +(43) HashAggregate [codegen id : 18] +Input [2]: [total_sum#28, s_state#8] +Keys [1]: [s_state#8] +Functions [1]: [partial_sum(total_sum#28)] +Aggregate Attributes [2]: [sum#29, isEmpty#30] +Results [3]: [s_state#8, sum#31, isEmpty#32] + +(44) Exchange +Input [3]: [s_state#8, sum#31, isEmpty#32] +Arguments: hashpartitioning(s_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(45) HashAggregate [codegen id : 19] +Input [3]: [s_state#8, sum#31, isEmpty#32] +Keys [1]: [s_state#8] +Functions [1]: [sum(total_sum#28)] +Aggregate Attributes [1]: [sum(total_sum#28)#33] +Results [6]: [sum(total_sum#28)#33 AS total_sum#34, s_state#8, null AS s_county#35, 0 AS g_state#36, 1 AS g_county#37, 1 AS lochierarchy#38] + +(46) ReusedExchange [Reuses operator id: 39] +Output [3]: [s_state#8, s_county#7, sum#39] + +(47) HashAggregate [codegen id : 28] +Input [3]: [s_state#8, s_county#7, sum#39] +Keys [2]: [s_state#8, s_county#7] +Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) AS total_sum#28] + +(48) HashAggregate [codegen id : 28] +Input [1]: [total_sum#28] +Keys: [] +Functions [1]: [partial_sum(total_sum#28)] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [2]: [sum#42, isEmpty#43] + +(49) Exchange +Input [2]: [sum#42, isEmpty#43] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(50) HashAggregate [codegen id : 29] +Input [2]: [sum#42, isEmpty#43] +Keys: [] +Functions [1]: [sum(total_sum#28)] +Aggregate Attributes [1]: [sum(total_sum#28)#44] +Results [6]: [sum(total_sum#28)#44 AS total_sum#45, null AS s_state#46, null AS s_county#47, 1 AS g_state#48, 1 AS g_county#49, 2 AS lochierarchy#50] + +(51) Union + +(52) HashAggregate [codegen id : 30] +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Keys [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] + +(53) Exchange +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Arguments: hashpartitioning(total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(54) HashAggregate [codegen id : 31] +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Keys [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Functions: [] +Aggregate Attributes: [] +Results [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, CASE WHEN (g_county#25 = 0) THEN s_state#8 END AS _w0#51] + +(55) Exchange +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: hashpartitioning(lochierarchy#26, _w0#51, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(56) Sort [codegen id : 32] +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#51 ASC NULLS FIRST, total_sum#23 DESC NULLS LAST], false, 0 + +(57) Window +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: [rank(total_sum#23) windowspecdefinition(lochierarchy#26, _w0#51, total_sum#23 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#52], [lochierarchy#26, _w0#51], [total_sum#23 DESC NULLS LAST] + +(58) Project [codegen id : 33] +Output [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] +Input [6]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51, rank_within_parent#52] + +(59) TakeOrderedAndProject +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] +Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN s_state#8 END ASC NULLS FIRST, rank_within_parent#52 ASC NULLS FIRST], [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..4a20e8104c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.native_iceberg_compat/simplified.txt @@ -0,0 +1,96 @@ +TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] + WholeStageCodegen (33) + Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] + InputAdapter + Window [total_sum,lochierarchy,_w0] + WholeStageCodegen (32) + Sort [lochierarchy,_w0,total_sum] + InputAdapter + Exchange [lochierarchy,_w0] #1 + WholeStageCodegen (31) + HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] [_w0] + InputAdapter + Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #2 + WholeStageCodegen (30) + HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] + InputAdapter + Union + WholeStageCodegen (9) + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum] + InputAdapter + Exchange [s_state,s_county] #3 + WholeStageCodegen (8) + HashAggregate [s_state,s_county,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_county,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (7) + BroadcastHashJoin [s_state,s_state] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (6) + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w0,s_state] + WholeStageCodegen (5) + Sort [s_state,_w0] + HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),_w0,s_state,sum] + InputAdapter + Exchange [s_state] #7 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_state] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_net_profit,ss_sold_date_sk,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_state] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (19) + HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty] + InputAdapter + Exchange [s_state] #9 + WholeStageCodegen (18) + HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] + InputAdapter + ReusedExchange [s_state,s_county,sum] #3 + WholeStageCodegen (29) + HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,s_state,s_county,g_state,g_county,lochierarchy,sum,isEmpty] + InputAdapter + Exchange #10 + WholeStageCodegen (28) + HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum] + InputAdapter + ReusedExchange [s_state,s_county,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_datafusion/explain.txt new file mode 100644 index 0000000000..9d2596a538 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_datafusion/explain.txt @@ -0,0 +1,423 @@ +== Physical Plan == +TakeOrderedAndProject (74) ++- * HashAggregate (73) + +- Exchange (72) + +- * HashAggregate (71) + +- * Project (70) + +- * SortMergeJoin LeftOuter (69) + :- * Sort (62) + : +- Exchange (61) + : +- * Project (60) + : +- * BroadcastHashJoin LeftOuter BuildRight (59) + : :- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Project (35) + : : : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : : : :- * Project (28) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : : :- * Project (21) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : : :- * Project (15) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : : : :- * Project (9) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : : : : :- * Filter (3) + : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : : : : : : +- BroadcastExchange (7) + : : : : : : : : : +- * Filter (6) + : : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : : +- Scan parquet spark_catalog.default.inventory (4) + : : : : : : : : +- BroadcastExchange (13) + : : : : : : : : +- * Filter (12) + : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : +- Scan parquet spark_catalog.default.warehouse (10) + : : : : : : : +- BroadcastExchange (19) + : : : : : : : +- * Filter (18) + : : : : : : : +- * ColumnarToRow (17) + : : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : : +- BroadcastExchange (26) + : : : : : : +- * Project (25) + : : : : : : +- * Filter (24) + : : : : : : +- * ColumnarToRow (23) + : : : : : : +- Scan parquet spark_catalog.default.customer_demographics (22) + : : : : : +- BroadcastExchange (33) + : : : : : +- * Project (32) + : : : : : +- * Filter (31) + : : : : : +- * ColumnarToRow (30) + : : : : : +- Scan parquet spark_catalog.default.household_demographics (29) + : : : : +- BroadcastExchange (40) + : : : : +- * Project (39) + : : : : +- * Filter (38) + : : : : +- * ColumnarToRow (37) + : : : : +- Scan parquet spark_catalog.default.date_dim (36) + : : : +- BroadcastExchange (46) + : : : +- * Filter (45) + : : : +- * ColumnarToRow (44) + : : : +- Scan parquet spark_catalog.default.date_dim (43) + : : +- BroadcastExchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet spark_catalog.default.date_dim (49) + : +- BroadcastExchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet spark_catalog.default.promotion (55) + +- * Sort (68) + +- Exchange (67) + +- * Project (66) + +- * Filter (65) + +- * ColumnarToRow (64) + +- Scan parquet spark_catalog.default.catalog_returns (63) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 10] +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] + +(3) Filter [codegen id : 10] +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1)) + +(4) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#12)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(6) Filter [codegen id : 1] +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Condition : ((isnotnull(inv_quantity_on_hand#11) AND isnotnull(inv_item_sk#9)) AND isnotnull(inv_warehouse_sk#10)) + +(7) BroadcastExchange +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [inv_item_sk#9] +Join type: Inner +Join condition: (inv_quantity_on_hand#11 < cs_quantity#7) + +(9) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] +Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(10) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Condition : isnotnull(w_warehouse_sk#13) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [inv_warehouse_sk#10] +Right keys [1]: [w_warehouse_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12, w_warehouse_sk#13, w_warehouse_name#14] + +(16) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_desc#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_desc#16] + +(18) Filter [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_desc#16] +Condition : isnotnull(i_item_sk#15) + +(19) BroadcastExchange +Input [2]: [i_item_sk#15, i_item_desc#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 10] +Output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_sk#15, i_item_desc#16] + +(22) Scan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#17, cd_marital_status#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,M), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] + +(24) Filter [codegen id : 4] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Condition : ((isnotnull(cd_marital_status#18) AND (cd_marital_status#18 = M)) AND isnotnull(cd_demo_sk#17)) + +(25) Project [codegen id : 4] +Output [1]: [cd_demo_sk#17] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] + +(26) BroadcastExchange +Input [1]: [cd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, cd_demo_sk#17] + +(29) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#19, hd_buy_potential#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,1001-5000 ), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] + +(31) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Condition : ((isnotnull(hd_buy_potential#20) AND (hd_buy_potential#20 = 1001-5000 )) AND isnotnull(hd_demo_sk#19)) + +(32) Project [codegen id : 5] +Output [1]: [hd_demo_sk#19] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] + +(33) BroadcastExchange +Input [1]: [hd_demo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_bill_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#19] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 10] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, hd_demo_sk#19] + +(36) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(38) Filter [codegen id : 6] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Condition : ((((isnotnull(d_year#24) AND (d_year#24 = 2001)) AND isnotnull(d_date_sk#21)) AND isnotnull(d_week_seq#23)) AND isnotnull(d_date#22)) + +(39) Project [codegen id : 6] +Output [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(40) BroadcastExchange +Input [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#8] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date_sk#21, d_date#22, d_week_seq#23] + +(43) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#25, d_week_seq#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#25, d_week_seq#26] + +(45) Filter [codegen id : 7] +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) + +(46) BroadcastExchange +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [d_week_seq#23, inv_date_sk#12] +Right keys [2]: [d_week_seq#26, d_date_sk#25] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#25, d_week_seq#26] + +(49) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#27, d_date#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#27, d_date#28] + +(51) Filter [codegen id : 8] +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) + +(52) BroadcastExchange +Input [2]: [d_date_sk#27, d_date#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(53) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: (d_date#28 > date_add(d_date#22, 5)) + +(54) Project [codegen id : 10] +Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#27, d_date#28] + +(55) Scan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 9] +Input [1]: [p_promo_sk#29] + +(57) Filter [codegen id : 9] +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) + +(58) BroadcastExchange +Input [1]: [p_promo_sk#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(59) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_promo_sk#5] +Right keys [1]: [p_promo_sk#29] +Join type: LeftOuter +Join condition: None + +(60) Project [codegen id : 10] +Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, p_promo_sk#29] + +(61) Exchange +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(62) Sort [codegen id : 11] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0 + +(63) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(64) ColumnarToRow [codegen id : 12] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(65) Filter [codegen id : 12] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) + +(66) Project [codegen id : 12] +Output [2]: [cr_item_sk#30, cr_order_number#31] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(67) Exchange +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(68) Sort [codegen id : 13] +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST], false, 0 + +(69) SortMergeJoin [codegen id : 14] +Left keys [2]: [cs_item_sk#4, cs_order_number#6] +Right keys [2]: [cr_item_sk#30, cr_order_number#31] +Join type: LeftOuter +Join condition: None + +(70) Project [codegen id : 14] +Output [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, cr_item_sk#30, cr_order_number#31] + +(71) HashAggregate [codegen id : 14] +Input [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#33] +Results [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] + +(72) Exchange +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] +Arguments: hashpartitioning(i_item_desc#16, w_warehouse_name#14, d_week_seq#23, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(73) HashAggregate [codegen id : 15] +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#35] +Results [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count(1)#35 AS no_promo#36, count(1)#35 AS promo#37, count(1)#35 AS total_cnt#38] + +(74) TakeOrderedAndProject +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#36, promo#37, total_cnt#38] +Arguments: 100, [total_cnt#38 DESC NULLS LAST, i_item_desc#16 ASC NULLS FIRST, w_warehouse_name#14 ASC NULLS FIRST, d_week_seq#23 ASC NULLS FIRST], [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#36, promo#37, total_cnt#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_datafusion/simplified.txt new file mode 100644 index 0000000000..37b3b330ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_datafusion/simplified.txt @@ -0,0 +1,114 @@ +TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo] + WholeStageCodegen (15) + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] [count(1),no_promo,promo,total_cnt,count] + InputAdapter + Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + WholeStageCodegen (14) + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq] [count,count] + Project [w_warehouse_name,i_item_desc,d_week_seq] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (11) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #2 + WholeStageCodegen (10) + Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] + BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Project [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk] + BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] + Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [cd_demo_sk] + Filter [cd_marital_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [d_date_sk,d_date,d_week_seq] + Filter [d_year,d_date_sk,d_week_seq,d_date] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + Filter [d_week_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (9) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk] + InputAdapter + WholeStageCodegen (13) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #12 + WholeStageCodegen (12) + Project [cr_item_sk,cr_order_number] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..9d2596a538 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_iceberg_compat/explain.txt @@ -0,0 +1,423 @@ +== Physical Plan == +TakeOrderedAndProject (74) ++- * HashAggregate (73) + +- Exchange (72) + +- * HashAggregate (71) + +- * Project (70) + +- * SortMergeJoin LeftOuter (69) + :- * Sort (62) + : +- Exchange (61) + : +- * Project (60) + : +- * BroadcastHashJoin LeftOuter BuildRight (59) + : :- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Project (35) + : : : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : : : :- * Project (28) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : : :- * Project (21) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : : :- * Project (15) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : : : :- * Project (9) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : : : : :- * Filter (3) + : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : : : : : : +- BroadcastExchange (7) + : : : : : : : : : +- * Filter (6) + : : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : : +- Scan parquet spark_catalog.default.inventory (4) + : : : : : : : : +- BroadcastExchange (13) + : : : : : : : : +- * Filter (12) + : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : +- Scan parquet spark_catalog.default.warehouse (10) + : : : : : : : +- BroadcastExchange (19) + : : : : : : : +- * Filter (18) + : : : : : : : +- * ColumnarToRow (17) + : : : : : : : +- Scan parquet spark_catalog.default.item (16) + : : : : : : +- BroadcastExchange (26) + : : : : : : +- * Project (25) + : : : : : : +- * Filter (24) + : : : : : : +- * ColumnarToRow (23) + : : : : : : +- Scan parquet spark_catalog.default.customer_demographics (22) + : : : : : +- BroadcastExchange (33) + : : : : : +- * Project (32) + : : : : : +- * Filter (31) + : : : : : +- * ColumnarToRow (30) + : : : : : +- Scan parquet spark_catalog.default.household_demographics (29) + : : : : +- BroadcastExchange (40) + : : : : +- * Project (39) + : : : : +- * Filter (38) + : : : : +- * ColumnarToRow (37) + : : : : +- Scan parquet spark_catalog.default.date_dim (36) + : : : +- BroadcastExchange (46) + : : : +- * Filter (45) + : : : +- * ColumnarToRow (44) + : : : +- Scan parquet spark_catalog.default.date_dim (43) + : : +- BroadcastExchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet spark_catalog.default.date_dim (49) + : +- BroadcastExchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet spark_catalog.default.promotion (55) + +- * Sort (68) + +- Exchange (67) + +- * Project (66) + +- * Filter (65) + +- * ColumnarToRow (64) + +- Scan parquet spark_catalog.default.catalog_returns (63) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#8)] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 10] +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] + +(3) Filter [codegen id : 10] +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] +Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1)) + +(4) Scan parquet spark_catalog.default.inventory +Output [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(inv_date_sk#12)] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(6) Filter [codegen id : 1] +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Condition : ((isnotnull(inv_quantity_on_hand#11) AND isnotnull(inv_item_sk#9)) AND isnotnull(inv_warehouse_sk#10)) + +(7) BroadcastExchange +Input [4]: [inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [inv_item_sk#9] +Join type: Inner +Join condition: (inv_quantity_on_hand#11 < cs_quantity#7) + +(9) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12] +Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#9, inv_warehouse_sk#10, inv_quantity_on_hand#11, inv_date_sk#12] + +(10) Scan parquet spark_catalog.default.warehouse +Output [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Condition : isnotnull(w_warehouse_sk#13) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#13, w_warehouse_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [inv_warehouse_sk#10] +Right keys [1]: [w_warehouse_sk#13] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#10, inv_date_sk#12, w_warehouse_sk#13, w_warehouse_name#14] + +(16) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#15, i_item_desc#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_desc#16] + +(18) Filter [codegen id : 3] +Input [2]: [i_item_sk#15, i_item_desc#16] +Condition : isnotnull(i_item_sk#15) + +(19) BroadcastExchange +Input [2]: [i_item_sk#15, i_item_desc#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(20) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#15] +Join type: Inner +Join condition: None + +(21) Project [codegen id : 10] +Output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_sk#15, i_item_desc#16] + +(22) Scan parquet spark_catalog.default.customer_demographics +Output [2]: [cd_demo_sk#17, cd_marital_status#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,M), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] + +(24) Filter [codegen id : 4] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] +Condition : ((isnotnull(cd_marital_status#18) AND (cd_marital_status#18 = M)) AND isnotnull(cd_demo_sk#17)) + +(25) Project [codegen id : 4] +Output [1]: [cd_demo_sk#17] +Input [2]: [cd_demo_sk#17, cd_marital_status#18] + +(26) BroadcastExchange +Input [1]: [cd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] + +(27) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#17] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, cd_demo_sk#17] + +(29) Scan parquet spark_catalog.default.household_demographics +Output [2]: [hd_demo_sk#19, hd_buy_potential#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,1001-5000 ), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] + +(31) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] +Condition : ((isnotnull(hd_buy_potential#20) AND (hd_buy_potential#20 = 1001-5000 )) AND isnotnull(hd_demo_sk#19)) + +(32) Project [codegen id : 5] +Output [1]: [hd_demo_sk#19] +Input [2]: [hd_demo_sk#19, hd_buy_potential#20] + +(33) BroadcastExchange +Input [1]: [hd_demo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_bill_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#19] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 10] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16] +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, hd_demo_sk#19] + +(36) Scan parquet spark_catalog.default.date_dim +Output [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(38) Filter [codegen id : 6] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] +Condition : ((((isnotnull(d_year#24) AND (d_year#24 = 2001)) AND isnotnull(d_date_sk#21)) AND isnotnull(d_week_seq#23)) AND isnotnull(d_date#22)) + +(39) Project [codegen id : 6] +Output [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Input [4]: [d_date_sk#21, d_date#22, d_week_seq#23, d_year#24] + +(40) BroadcastExchange +Input [3]: [d_date_sk#21, d_date#22, d_week_seq#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(41) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#8] +Right keys [1]: [d_date_sk#21] +Join type: Inner +Join condition: None + +(42) Project [codegen id : 10] +Output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date_sk#21, d_date#22, d_week_seq#23] + +(43) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#25, d_week_seq#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#25, d_week_seq#26] + +(45) Filter [codegen id : 7] +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) + +(46) BroadcastExchange +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [plan_id=7] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [d_week_seq#23, inv_date_sk#12] +Right keys [2]: [d_week_seq#26, d_date_sk#25] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#12, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#25, d_week_seq#26] + +(49) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#27, d_date#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#27, d_date#28] + +(51) Filter [codegen id : 8] +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) + +(52) BroadcastExchange +Input [2]: [d_date_sk#27, d_date#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] + +(53) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#27] +Join type: Inner +Join condition: (d_date#28 > date_add(d_date#22, 5)) + +(54) Project [codegen id : 10] +Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_date#22, d_week_seq#23, d_date_sk#27, d_date#28] + +(55) Scan parquet spark_catalog.default.promotion +Output [1]: [p_promo_sk#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 9] +Input [1]: [p_promo_sk#29] + +(57) Filter [codegen id : 9] +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) + +(58) BroadcastExchange +Input [1]: [p_promo_sk#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(59) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_promo_sk#5] +Right keys [1]: [p_promo_sk#29] +Join type: LeftOuter +Join condition: None + +(60) Project [codegen id : 10] +Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, p_promo_sk#29] + +(61) Exchange +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(62) Sort [codegen id : 11] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0 + +(63) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(64) ColumnarToRow [codegen id : 12] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(65) Filter [codegen id : 12] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) + +(66) Project [codegen id : 12] +Output [2]: [cr_item_sk#30, cr_order_number#31] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] + +(67) Exchange +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(68) Sort [codegen id : 13] +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST], false, 0 + +(69) SortMergeJoin [codegen id : 14] +Left keys [2]: [cs_item_sk#4, cs_order_number#6] +Right keys [2]: [cr_item_sk#30, cr_order_number#31] +Join type: LeftOuter +Join condition: None + +(70) Project [codegen id : 14] +Output [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#14, i_item_desc#16, d_week_seq#23, cr_item_sk#30, cr_order_number#31] + +(71) HashAggregate [codegen id : 14] +Input [3]: [w_warehouse_name#14, i_item_desc#16, d_week_seq#23] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#33] +Results [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] + +(72) Exchange +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] +Arguments: hashpartitioning(i_item_desc#16, w_warehouse_name#14, d_week_seq#23, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(73) HashAggregate [codegen id : 15] +Input [4]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count#34] +Keys [3]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#35] +Results [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, count(1)#35 AS no_promo#36, count(1)#35 AS promo#37, count(1)#35 AS total_cnt#38] + +(74) TakeOrderedAndProject +Input [6]: [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#36, promo#37, total_cnt#38] +Arguments: 100, [total_cnt#38 DESC NULLS LAST, i_item_desc#16 ASC NULLS FIRST, w_warehouse_name#14 ASC NULLS FIRST, d_week_seq#23 ASC NULLS FIRST], [i_item_desc#16, w_warehouse_name#14, d_week_seq#23, no_promo#36, promo#37, total_cnt#38] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..37b3b330ae --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.native_iceberg_compat/simplified.txt @@ -0,0 +1,114 @@ +TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo] + WholeStageCodegen (15) + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] [count(1),no_promo,promo,total_cnt,count] + InputAdapter + Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + WholeStageCodegen (14) + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq] [count,count] + Project [w_warehouse_name,i_item_desc,d_week_seq] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (11) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #2 + WholeStageCodegen (10) + Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] + BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Project [cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_sold_date_sk,inv_warehouse_sk,inv_date_sk] + BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] + Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (4) + Project [cd_demo_sk] + Filter [cd_marital_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (5) + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (6) + Project [d_date_sk,d_date,d_week_seq] + Filter [d_year,d_date_sk,d_week_seq,d_date] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (7) + Filter [d_week_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (8) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (9) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk] + InputAdapter + WholeStageCodegen (13) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #12 + WholeStageCodegen (12) + Project [cr_item_sk,cr_order_number] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_datafusion/explain.txt new file mode 100644 index 0000000000..62c5a478de --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_datafusion/explain.txt @@ -0,0 +1,418 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Project (70) + +- * BroadcastHashJoin Inner BuildRight (69) + :- * Project (55) + : +- * BroadcastHashJoin Inner BuildRight (54) + : :- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet spark_catalog.default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet spark_catalog.default.date_dim (26) + : +- BroadcastExchange (53) + : +- * Filter (52) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Project (45) + : : +- * BroadcastHashJoin Inner BuildRight (44) + : : :- * Filter (39) + : : : +- * ColumnarToRow (38) + : : : +- Scan parquet spark_catalog.default.customer (37) + : : +- BroadcastExchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet spark_catalog.default.web_sales (40) + : +- ReusedExchange (46) + +- BroadcastExchange (68) + +- * HashAggregate (67) + +- Exchange (66) + +- * HashAggregate (65) + +- * Project (64) + +- * BroadcastHashJoin Inner BuildRight (63) + :- * Project (61) + : +- * BroadcastHashJoin Inner BuildRight (60) + : :- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet spark_catalog.default.customer (56) + : +- ReusedExchange (59) + +- ReusedExchange (62) + + +(1) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(3) Filter [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_customer_sk#5) + +(7) BroadcastExchange +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_year#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#9] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#9] +Condition : (((isnotnull(d_year#9) AND (d_year#9 = 2001)) AND d_year#9 IN (2001,2002)) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#8, d_year#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9] + +(16) HashAggregate [codegen id : 3] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))] +Aggregate Attributes [1]: [sum#10] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] + +(17) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 16] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [sum(UnscaledValue(ss_net_paid#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#6))#12] +Results [2]: [c_customer_id#2 AS customer_id#13, MakeDecimal(sum(UnscaledValue(ss_net_paid#6))#12,17,2) AS year_total#14] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#13, year_total#14] +Condition : (isnotnull(year_total#14) AND (year_total#14 > 0.00)) + +(20) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] + +(22) Filter [codegen id : 6] +Input [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_customer_id#16)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [3]: [ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#15] +Right keys [1]: [ss_customer_sk#19] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21] +Input [7]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18, ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#22, d_year#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] +Condition : (((isnotnull(d_year#23) AND (d_year#23 = 2002)) AND d_year#23 IN (2001,2002)) AND isnotnull(d_date_sk#22)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#22, d_year#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] +Input [7]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21, d_date_sk#22, d_year#23] + +(32) HashAggregate [codegen id : 6] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] +Keys [4]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#20))] +Aggregate Attributes [1]: [sum#24] +Results [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] + +(33) Exchange +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] +Arguments: hashpartitioning(c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(34) HashAggregate [codegen id : 7] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] +Keys [4]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23] +Functions [1]: [sum(UnscaledValue(ss_net_paid#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#20))#12] +Results [4]: [c_customer_id#16 AS customer_id#26, c_first_name#17 AS customer_first_name#27, c_last_name#18 AS customer_last_name#28, MakeDecimal(sum(UnscaledValue(ss_net_paid#20))#12,17,2) AS year_total#29] + +(35) BroadcastExchange +Input [4]: [customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#26] +Join type: Inner +Join condition: None + +(37) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] + +(39) Filter [codegen id : 10] +Input [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] +Condition : (isnotnull(c_customer_sk#30) AND isnotnull(c_customer_id#31)) + +(40) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#36)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] + +(42) Filter [codegen id : 8] +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Condition : isnotnull(ws_bill_customer_sk#34) + +(43) BroadcastExchange +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#30] +Right keys [1]: [ws_bill_customer_sk#34] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 10] +Output [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, ws_sold_date_sk#36] +Input [7]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33, ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#37, d_year#38] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#36] +Right keys [1]: [d_date_sk#37] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, d_year#38] +Input [7]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, ws_sold_date_sk#36, d_date_sk#37, d_year#38] + +(49) HashAggregate [codegen id : 10] +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, d_year#38] +Keys [4]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#35))] +Aggregate Attributes [1]: [sum#39] +Results [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] + +(50) Exchange +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] +Arguments: hashpartitioning(c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 11] +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] +Keys [4]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38] +Functions [1]: [sum(UnscaledValue(ws_net_paid#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#35))#41] +Results [2]: [c_customer_id#31 AS customer_id#42, MakeDecimal(sum(UnscaledValue(ws_net_paid#35))#41,17,2) AS year_total#43] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#42, year_total#43] +Condition : (isnotnull(year_total#43) AND (year_total#43 > 0.00)) + +(53) BroadcastExchange +Input [2]: [customer_id#42, year_total#43] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(54) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#42] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 16] +Output [7]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#43] +Input [8]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, customer_id#42, year_total#43] + +(56) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 14] +Input [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] + +(58) Filter [codegen id : 14] +Input [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] +Condition : (isnotnull(c_customer_sk#44) AND isnotnull(c_customer_id#45)) + +(59) ReusedExchange [Reuses operator id: 43] +Output [3]: [ws_bill_customer_sk#48, ws_net_paid#49, ws_sold_date_sk#50] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#44] +Right keys [1]: [ws_bill_customer_sk#48] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 14] +Output [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, ws_sold_date_sk#50] +Input [7]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47, ws_bill_customer_sk#48, ws_net_paid#49, ws_sold_date_sk#50] + +(62) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#51, d_year#52] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#50] +Right keys [1]: [d_date_sk#51] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 14] +Output [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, d_year#52] +Input [7]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, ws_sold_date_sk#50, d_date_sk#51, d_year#52] + +(65) HashAggregate [codegen id : 14] +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, d_year#52] +Keys [4]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#49))] +Aggregate Attributes [1]: [sum#53] +Results [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] + +(66) Exchange +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] +Arguments: hashpartitioning(c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(67) HashAggregate [codegen id : 15] +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] +Keys [4]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52] +Functions [1]: [sum(UnscaledValue(ws_net_paid#49))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#49))#41] +Results [2]: [c_customer_id#45 AS customer_id#55, MakeDecimal(sum(UnscaledValue(ws_net_paid#49))#41,17,2) AS year_total#56] + +(68) BroadcastExchange +Input [2]: [customer_id#55, year_total#56] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(69) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#55] +Join type: Inner +Join condition: (CASE WHEN (year_total#43 > 0.00) THEN (year_total#56 / year_total#43) END > CASE WHEN (year_total#14 > 0.00) THEN (year_total#29 / year_total#14) END) + +(70) Project [codegen id : 16] +Output [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Input [9]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#43, customer_id#55, year_total#56] + +(71) TakeOrderedAndProject +Input [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Arguments: 100, [customer_first_name#27 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST, customer_last_name#28 ASC NULLS FIRST], [customer_id#26, customer_first_name#27, customer_last_name#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8ad902e176 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_datafusion/simplified.txt @@ -0,0 +1,106 @@ +TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name] + WholeStageCodegen (16) + Project [customer_id,customer_first_name,customer_last_name] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,customer_first_name,customer_last_name,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..62c5a478de --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_iceberg_compat/explain.txt @@ -0,0 +1,418 @@ +== Physical Plan == +TakeOrderedAndProject (71) ++- * Project (70) + +- * BroadcastHashJoin Inner BuildRight (69) + :- * Project (55) + : +- * BroadcastHashJoin Inner BuildRight (54) + : :- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet spark_catalog.default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet spark_catalog.default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet spark_catalog.default.date_dim (26) + : +- BroadcastExchange (53) + : +- * Filter (52) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Project (45) + : : +- * BroadcastHashJoin Inner BuildRight (44) + : : :- * Filter (39) + : : : +- * ColumnarToRow (38) + : : : +- Scan parquet spark_catalog.default.customer (37) + : : +- BroadcastExchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet spark_catalog.default.web_sales (40) + : +- ReusedExchange (46) + +- BroadcastExchange (68) + +- * HashAggregate (67) + +- Exchange (66) + +- * HashAggregate (65) + +- * Project (64) + +- * BroadcastHashJoin Inner BuildRight (63) + :- * Project (61) + : +- * BroadcastHashJoin Inner BuildRight (60) + : :- * Filter (58) + : : +- * ColumnarToRow (57) + : : +- Scan parquet spark_catalog.default.customer (56) + : +- ReusedExchange (59) + +- ReusedExchange (62) + + +(1) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(3) Filter [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(6) Filter [codegen id : 1] +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Condition : isnotnull(ss_customer_sk#5) + +(7) BroadcastExchange +Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#5] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#8, d_year#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#9] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#9] +Condition : (((isnotnull(d_year#9) AND (d_year#9 = 2001)) AND d_year#9 IN (2001,2002)) AND isnotnull(d_date_sk#8)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#8, d_year#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#8] +Join type: Inner +Join condition: None + +(15) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#8, d_year#9] + +(16) HashAggregate [codegen id : 3] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#9] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))] +Aggregate Attributes [1]: [sum#10] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] + +(17) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) HashAggregate [codegen id : 16] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9, sum#11] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#9] +Functions [1]: [sum(UnscaledValue(ss_net_paid#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#6))#12] +Results [2]: [c_customer_id#2 AS customer_id#13, MakeDecimal(sum(UnscaledValue(ss_net_paid#6))#12,17,2) AS year_total#14] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#13, year_total#14] +Condition : (isnotnull(year_total#14) AND (year_total#14 > 0.00)) + +(20) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] + +(22) Filter [codegen id : 6] +Input [4]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_customer_id#16)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [3]: [ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#15] +Right keys [1]: [ss_customer_sk#19] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21] +Input [7]: [c_customer_sk#15, c_customer_id#16, c_first_name#17, c_last_name#18, ss_customer_sk#19, ss_net_paid#20, ss_sold_date_sk#21] + +(26) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#22, d_year#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#22, d_year#23] +Condition : (((isnotnull(d_year#23) AND (d_year#23 = 2002)) AND d_year#23 IN (2001,2002)) AND isnotnull(d_date_sk#22)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#22, d_year#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] +Join type: Inner +Join condition: None + +(31) Project [codegen id : 6] +Output [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] +Input [7]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, ss_sold_date_sk#21, d_date_sk#22, d_year#23] + +(32) HashAggregate [codegen id : 6] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, ss_net_paid#20, d_year#23] +Keys [4]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#20))] +Aggregate Attributes [1]: [sum#24] +Results [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] + +(33) Exchange +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] +Arguments: hashpartitioning(c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(34) HashAggregate [codegen id : 7] +Input [5]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23, sum#25] +Keys [4]: [c_customer_id#16, c_first_name#17, c_last_name#18, d_year#23] +Functions [1]: [sum(UnscaledValue(ss_net_paid#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#20))#12] +Results [4]: [c_customer_id#16 AS customer_id#26, c_first_name#17 AS customer_first_name#27, c_last_name#18 AS customer_last_name#28, MakeDecimal(sum(UnscaledValue(ss_net_paid#20))#12,17,2) AS year_total#29] + +(35) BroadcastExchange +Input [4]: [customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#26] +Join type: Inner +Join condition: None + +(37) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] + +(39) Filter [codegen id : 10] +Input [4]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33] +Condition : (isnotnull(c_customer_sk#30) AND isnotnull(c_customer_id#31)) + +(40) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#36)] +PushedFilters: [IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] + +(42) Filter [codegen id : 8] +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Condition : isnotnull(ws_bill_customer_sk#34) + +(43) BroadcastExchange +Input [3]: [ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#30] +Right keys [1]: [ws_bill_customer_sk#34] +Join type: Inner +Join condition: None + +(45) Project [codegen id : 10] +Output [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, ws_sold_date_sk#36] +Input [7]: [c_customer_sk#30, c_customer_id#31, c_first_name#32, c_last_name#33, ws_bill_customer_sk#34, ws_net_paid#35, ws_sold_date_sk#36] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#37, d_year#38] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#36] +Right keys [1]: [d_date_sk#37] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 10] +Output [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, d_year#38] +Input [7]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, ws_sold_date_sk#36, d_date_sk#37, d_year#38] + +(49) HashAggregate [codegen id : 10] +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, ws_net_paid#35, d_year#38] +Keys [4]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#35))] +Aggregate Attributes [1]: [sum#39] +Results [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] + +(50) Exchange +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] +Arguments: hashpartitioning(c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate [codegen id : 11] +Input [5]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38, sum#40] +Keys [4]: [c_customer_id#31, c_first_name#32, c_last_name#33, d_year#38] +Functions [1]: [sum(UnscaledValue(ws_net_paid#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#35))#41] +Results [2]: [c_customer_id#31 AS customer_id#42, MakeDecimal(sum(UnscaledValue(ws_net_paid#35))#41,17,2) AS year_total#43] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#42, year_total#43] +Condition : (isnotnull(year_total#43) AND (year_total#43 > 0.00)) + +(53) BroadcastExchange +Input [2]: [customer_id#42, year_total#43] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] + +(54) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#42] +Join type: Inner +Join condition: None + +(55) Project [codegen id : 16] +Output [7]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#43] +Input [8]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, customer_id#42, year_total#43] + +(56) Scan parquet spark_catalog.default.customer +Output [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 14] +Input [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] + +(58) Filter [codegen id : 14] +Input [4]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47] +Condition : (isnotnull(c_customer_sk#44) AND isnotnull(c_customer_id#45)) + +(59) ReusedExchange [Reuses operator id: 43] +Output [3]: [ws_bill_customer_sk#48, ws_net_paid#49, ws_sold_date_sk#50] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#44] +Right keys [1]: [ws_bill_customer_sk#48] +Join type: Inner +Join condition: None + +(61) Project [codegen id : 14] +Output [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, ws_sold_date_sk#50] +Input [7]: [c_customer_sk#44, c_customer_id#45, c_first_name#46, c_last_name#47, ws_bill_customer_sk#48, ws_net_paid#49, ws_sold_date_sk#50] + +(62) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#51, d_year#52] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#50] +Right keys [1]: [d_date_sk#51] +Join type: Inner +Join condition: None + +(64) Project [codegen id : 14] +Output [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, d_year#52] +Input [7]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, ws_sold_date_sk#50, d_date_sk#51, d_year#52] + +(65) HashAggregate [codegen id : 14] +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, ws_net_paid#49, d_year#52] +Keys [4]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#49))] +Aggregate Attributes [1]: [sum#53] +Results [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] + +(66) Exchange +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] +Arguments: hashpartitioning(c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(67) HashAggregate [codegen id : 15] +Input [5]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52, sum#54] +Keys [4]: [c_customer_id#45, c_first_name#46, c_last_name#47, d_year#52] +Functions [1]: [sum(UnscaledValue(ws_net_paid#49))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#49))#41] +Results [2]: [c_customer_id#45 AS customer_id#55, MakeDecimal(sum(UnscaledValue(ws_net_paid#49))#41,17,2) AS year_total#56] + +(68) BroadcastExchange +Input [2]: [customer_id#55, year_total#56] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] + +(69) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#13] +Right keys [1]: [customer_id#55] +Join type: Inner +Join condition: (CASE WHEN (year_total#43 > 0.00) THEN (year_total#56 / year_total#43) END > CASE WHEN (year_total#14 > 0.00) THEN (year_total#29 / year_total#14) END) + +(70) Project [codegen id : 16] +Output [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Input [9]: [customer_id#13, year_total#14, customer_id#26, customer_first_name#27, customer_last_name#28, year_total#29, year_total#43, customer_id#55, year_total#56] + +(71) TakeOrderedAndProject +Input [3]: [customer_id#26, customer_first_name#27, customer_last_name#28] +Arguments: 100, [customer_first_name#27 ASC NULLS FIRST, customer_id#26 ASC NULLS FIRST, customer_last_name#28 ASC NULLS FIRST], [customer_id#26, customer_first_name#27, customer_last_name#28] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8ad902e176 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.native_iceberg_compat/simplified.txt @@ -0,0 +1,106 @@ +TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name] + WholeStageCodegen (16) + Project [customer_id,customer_first_name,customer_last_name] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,customer_first_name,customer_last_name,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (11) + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Filter [ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_datafusion/explain.txt new file mode 100644 index 0000000000..bf7bc3e171 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_datafusion/explain.txt @@ -0,0 +1,754 @@ +== Physical Plan == +TakeOrderedAndProject (135) ++- * Project (134) + +- * SortMergeJoin Inner (133) + :- * Sort (74) + : +- Exchange (73) + : +- * Filter (72) + : +- * HashAggregate (71) + : +- Exchange (70) + : +- * HashAggregate (69) + : +- * HashAggregate (68) + : +- Exchange (67) + : +- * HashAggregate (66) + : +- Union (65) + : :- * Project (26) + : : +- * SortMergeJoin LeftOuter (25) + : : :- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.item (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.date_dim (11) + : : +- * Sort (24) + : : +- Exchange (23) + : : +- * Project (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet spark_catalog.default.catalog_returns (19) + : :- * Project (45) + : : +- * SortMergeJoin LeftOuter (44) + : : :- * Sort (37) + : : : +- Exchange (36) + : : : +- * Project (35) + : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : :- * Project (32) + : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : :- * Filter (29) + : : : : : +- * ColumnarToRow (28) + : : : : : +- Scan parquet spark_catalog.default.store_sales (27) + : : : : +- ReusedExchange (30) + : : : +- ReusedExchange (33) + : : +- * Sort (43) + : : +- Exchange (42) + : : +- * Project (41) + : : +- * Filter (40) + : : +- * ColumnarToRow (39) + : : +- Scan parquet spark_catalog.default.store_returns (38) + : +- * Project (64) + : +- * SortMergeJoin LeftOuter (63) + : :- * Sort (56) + : : +- Exchange (55) + : : +- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (51) + : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : :- * Filter (48) + : : : : +- * ColumnarToRow (47) + : : : : +- Scan parquet spark_catalog.default.web_sales (46) + : : : +- ReusedExchange (49) + : : +- ReusedExchange (52) + : +- * Sort (62) + : +- Exchange (61) + : +- * Project (60) + : +- * Filter (59) + : +- * ColumnarToRow (58) + : +- Scan parquet spark_catalog.default.web_returns (57) + +- * Sort (132) + +- Exchange (131) + +- * Filter (130) + +- * HashAggregate (129) + +- Exchange (128) + +- * HashAggregate (127) + +- * HashAggregate (126) + +- Exchange (125) + +- * HashAggregate (124) + +- Union (123) + :- * Project (92) + : +- * SortMergeJoin LeftOuter (91) + : :- * Sort (88) + : : +- Exchange (87) + : : +- * Project (86) + : : +- * BroadcastHashJoin Inner BuildRight (85) + : : :- * Project (80) + : : : +- * BroadcastHashJoin Inner BuildRight (79) + : : : :- * Filter (77) + : : : : +- * ColumnarToRow (76) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (75) + : : : +- ReusedExchange (78) + : : +- BroadcastExchange (84) + : : +- * Filter (83) + : : +- * ColumnarToRow (82) + : : +- Scan parquet spark_catalog.default.date_dim (81) + : +- * Sort (90) + : +- ReusedExchange (89) + :- * Project (107) + : +- * SortMergeJoin LeftOuter (106) + : :- * Sort (103) + : : +- Exchange (102) + : : +- * Project (101) + : : +- * BroadcastHashJoin Inner BuildRight (100) + : : :- * Project (98) + : : : +- * BroadcastHashJoin Inner BuildRight (97) + : : : :- * Filter (95) + : : : : +- * ColumnarToRow (94) + : : : : +- Scan parquet spark_catalog.default.store_sales (93) + : : : +- ReusedExchange (96) + : : +- ReusedExchange (99) + : +- * Sort (105) + : +- ReusedExchange (104) + +- * Project (122) + +- * SortMergeJoin LeftOuter (121) + :- * Sort (118) + : +- Exchange (117) + : +- * Project (116) + : +- * BroadcastHashJoin Inner BuildRight (115) + : :- * Project (113) + : : +- * BroadcastHashJoin Inner BuildRight (112) + : : :- * Filter (110) + : : : +- * ColumnarToRow (109) + : : : +- Scan parquet spark_catalog.default.web_sales (108) + : : +- ReusedExchange (111) + : +- ReusedExchange (114) + +- * Sort (120) + +- ReusedExchange (119) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] + +(3) Filter [codegen id : 3] +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books ), IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_manufact_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books )) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(7) Project [codegen id : 1] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(8) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_year#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#12, d_year#13] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2002)) AND isnotnull(d_date_sk#12)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#12, d_year#13] + +(17) Exchange +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 4] +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST], false, 0 + +(19) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(21) Filter [codegen id : 5] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Condition : (isnotnull(cr_order_number#15) AND isnotnull(cr_item_sk#14)) + +(22) Project [codegen id : 5] +Output [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(23) Exchange +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: hashpartitioning(cr_order_number#15, cr_item_sk#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) Sort [codegen id : 6] +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cr_order_number#15 ASC NULLS FIRST, cr_item_sk#14 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin [codegen id : 7] +Left keys [2]: [cs_order_number#2, cs_item_sk#1] +Right keys [2]: [cr_order_number#15, cr_item_sk#14] +Join type: LeftOuter +Join condition: None + +(26) Project [codegen id : 7] +Output [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#3 - coalesce(cr_return_quantity#16, 0)) AS sales_cnt#19, (cs_ext_sales_price#4 - coalesce(cr_return_amount#17, 0.00)) AS sales_amt#20] +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13, cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] + +(27) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#25)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 10] +Input [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] + +(29) Filter [codegen id : 10] +Input [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Condition : isnotnull(ss_item_sk#21) + +(30) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(31) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_item_sk#21] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 10] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] +Input [10]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(33) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#31, d_year#32] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#25] +Right keys [1]: [d_date_sk#31] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 10] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Input [11]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_date_sk#31, d_year#32] + +(36) Exchange +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: hashpartitioning(ss_ticket_number#22, ss_item_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(37) Sort [codegen id : 11] +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: [ss_ticket_number#22 ASC NULLS FIRST, ss_item_sk#21 ASC NULLS FIRST], false, 0 + +(38) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 12] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] + +(40) Filter [codegen id : 12] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Condition : (isnotnull(sr_ticket_number#34) AND isnotnull(sr_item_sk#33)) + +(41) Project [codegen id : 12] +Output [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] + +(42) Exchange +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: hashpartitioning(sr_ticket_number#34, sr_item_sk#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(43) Sort [codegen id : 13] +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: [sr_ticket_number#34 ASC NULLS FIRST, sr_item_sk#33 ASC NULLS FIRST], false, 0 + +(44) SortMergeJoin [codegen id : 14] +Left keys [2]: [ss_ticket_number#22, ss_item_sk#21] +Right keys [2]: [sr_ticket_number#34, sr_item_sk#33] +Join type: LeftOuter +Join condition: None + +(45) Project [codegen id : 14] +Output [7]: [d_year#32, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, (ss_quantity#23 - coalesce(sr_return_quantity#35, 0)) AS sales_cnt#38, (ss_ext_sales_price#24 - coalesce(sr_return_amt#36, 0.00)) AS sales_amt#39] +Input [13]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32, sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] + +(46) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#44)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 17] +Input [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] + +(48) Filter [codegen id : 17] +Input [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Condition : isnotnull(ws_item_sk#40) + +(49) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(50) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#40] +Right keys [1]: [i_item_sk#45] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 17] +Output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] +Input [10]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(52) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#50, d_year#51] + +(53) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#44] +Right keys [1]: [d_date_sk#50] +Join type: Inner +Join condition: None + +(54) Project [codegen id : 17] +Output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Input [11]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_date_sk#50, d_year#51] + +(55) Exchange +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: hashpartitioning(ws_order_number#41, ws_item_sk#40, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(56) Sort [codegen id : 18] +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: [ws_order_number#41 ASC NULLS FIRST, ws_item_sk#40 ASC NULLS FIRST], false, 0 + +(57) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 19] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] + +(59) Filter [codegen id : 19] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Condition : (isnotnull(wr_order_number#53) AND isnotnull(wr_item_sk#52)) + +(60) Project [codegen id : 19] +Output [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] + +(61) Exchange +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: hashpartitioning(wr_order_number#53, wr_item_sk#52, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(62) Sort [codegen id : 20] +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: [wr_order_number#53 ASC NULLS FIRST, wr_item_sk#52 ASC NULLS FIRST], false, 0 + +(63) SortMergeJoin [codegen id : 21] +Left keys [2]: [ws_order_number#41, ws_item_sk#40] +Right keys [2]: [wr_order_number#53, wr_item_sk#52] +Join type: LeftOuter +Join condition: None + +(64) Project [codegen id : 21] +Output [7]: [d_year#51, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, (ws_quantity#42 - coalesce(wr_return_quantity#54, 0)) AS sales_cnt#57, (ws_ext_sales_price#43 - coalesce(wr_return_amt#55, 0.00)) AS sales_amt#58] +Input [13]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51, wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] + +(65) Union + +(66) HashAggregate [codegen id : 22] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] + +(67) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(68) HashAggregate [codegen id : 23] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] + +(69) HashAggregate [codegen id : 23] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum#59, sum#60] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] + +(70) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(71) HashAggregate [codegen id : 24] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum(sales_cnt#19)#63, sum(UnscaledValue(sales_amt#20))#64] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(sales_cnt#19)#63 AS sales_cnt#65, MakeDecimal(sum(UnscaledValue(sales_amt#20))#64,18,2) AS sales_amt#66] + +(72) Filter [codegen id : 24] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Condition : isnotnull(sales_cnt#65) + +(73) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(74) Sort [codegen id : 25] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0 + +(75) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#71)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 28] +Input [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] + +(77) Filter [codegen id : 28] +Input [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] +Condition : isnotnull(cs_item_sk#67) + +(78) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#72, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] + +(79) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [cs_item_sk#67] +Right keys [1]: [i_item_sk#72] +Join type: Inner +Join condition: None + +(80) Project [codegen id : 28] +Output [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Input [10]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_item_sk#72, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] + +(81) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#77, d_year#78] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(82) ColumnarToRow [codegen id : 27] +Input [2]: [d_date_sk#77, d_year#78] + +(83) Filter [codegen id : 27] +Input [2]: [d_date_sk#77, d_year#78] +Condition : ((isnotnull(d_year#78) AND (d_year#78 = 2001)) AND isnotnull(d_date_sk#77)) + +(84) BroadcastExchange +Input [2]: [d_date_sk#77, d_year#78] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(85) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [cs_sold_date_sk#71] +Right keys [1]: [d_date_sk#77] +Join type: Inner +Join condition: None + +(86) Project [codegen id : 28] +Output [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Input [11]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_date_sk#77, d_year#78] + +(87) Exchange +Input [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Arguments: hashpartitioning(cs_order_number#68, cs_item_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(88) Sort [codegen id : 29] +Input [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Arguments: [cs_order_number#68 ASC NULLS FIRST, cs_item_sk#67 ASC NULLS FIRST], false, 0 + +(89) ReusedExchange [Reuses operator id: 23] +Output [4]: [cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] + +(90) Sort [codegen id : 31] +Input [4]: [cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] +Arguments: [cr_order_number#80 ASC NULLS FIRST, cr_item_sk#79 ASC NULLS FIRST], false, 0 + +(91) SortMergeJoin [codegen id : 32] +Left keys [2]: [cs_order_number#68, cs_item_sk#67] +Right keys [2]: [cr_order_number#80, cr_item_sk#79] +Join type: LeftOuter +Join condition: None + +(92) Project [codegen id : 32] +Output [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, (cs_quantity#69 - coalesce(cr_return_quantity#81, 0)) AS sales_cnt#19, (cs_ext_sales_price#70 - coalesce(cr_return_amount#82, 0.00)) AS sales_amt#20] +Input [13]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78, cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] + +(93) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#87)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(94) ColumnarToRow [codegen id : 35] +Input [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] + +(95) Filter [codegen id : 35] +Input [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] +Condition : isnotnull(ss_item_sk#83) + +(96) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] + +(97) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ss_item_sk#83] +Right keys [1]: [i_item_sk#88] +Join type: Inner +Join condition: None + +(98) Project [codegen id : 35] +Output [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] +Input [10]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] + +(99) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#93, d_year#94] + +(100) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ss_sold_date_sk#87] +Right keys [1]: [d_date_sk#93] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 35] +Output [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Input [11]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_date_sk#93, d_year#94] + +(102) Exchange +Input [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Arguments: hashpartitioning(ss_ticket_number#84, ss_item_sk#83, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(103) Sort [codegen id : 36] +Input [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Arguments: [ss_ticket_number#84 ASC NULLS FIRST, ss_item_sk#83 ASC NULLS FIRST], false, 0 + +(104) ReusedExchange [Reuses operator id: 42] +Output [4]: [sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] + +(105) Sort [codegen id : 38] +Input [4]: [sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] +Arguments: [sr_ticket_number#96 ASC NULLS FIRST, sr_item_sk#95 ASC NULLS FIRST], false, 0 + +(106) SortMergeJoin [codegen id : 39] +Left keys [2]: [ss_ticket_number#84, ss_item_sk#83] +Right keys [2]: [sr_ticket_number#96, sr_item_sk#95] +Join type: LeftOuter +Join condition: None + +(107) Project [codegen id : 39] +Output [7]: [d_year#94, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, (ss_quantity#85 - coalesce(sr_return_quantity#97, 0)) AS sales_cnt#38, (ss_ext_sales_price#86 - coalesce(sr_return_amt#98, 0.00)) AS sales_amt#39] +Input [13]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94, sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] + +(108) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#103)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(109) ColumnarToRow [codegen id : 42] +Input [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] + +(110) Filter [codegen id : 42] +Input [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] +Condition : isnotnull(ws_item_sk#99) + +(111) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#104, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] + +(112) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_item_sk#99] +Right keys [1]: [i_item_sk#104] +Join type: Inner +Join condition: None + +(113) Project [codegen id : 42] +Output [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] +Input [10]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_item_sk#104, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] + +(114) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#109, d_year#110] + +(115) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_sold_date_sk#103] +Right keys [1]: [d_date_sk#109] +Join type: Inner +Join condition: None + +(116) Project [codegen id : 42] +Output [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Input [11]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_date_sk#109, d_year#110] + +(117) Exchange +Input [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Arguments: hashpartitioning(ws_order_number#100, ws_item_sk#99, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(118) Sort [codegen id : 43] +Input [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Arguments: [ws_order_number#100 ASC NULLS FIRST, ws_item_sk#99 ASC NULLS FIRST], false, 0 + +(119) ReusedExchange [Reuses operator id: 61] +Output [4]: [wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] + +(120) Sort [codegen id : 45] +Input [4]: [wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] +Arguments: [wr_order_number#112 ASC NULLS FIRST, wr_item_sk#111 ASC NULLS FIRST], false, 0 + +(121) SortMergeJoin [codegen id : 46] +Left keys [2]: [ws_order_number#100, ws_item_sk#99] +Right keys [2]: [wr_order_number#112, wr_item_sk#111] +Join type: LeftOuter +Join condition: None + +(122) Project [codegen id : 46] +Output [7]: [d_year#110, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, (ws_quantity#101 - coalesce(wr_return_quantity#113, 0)) AS sales_cnt#57, (ws_ext_sales_price#102 - coalesce(wr_return_amt#114, 0.00)) AS sales_amt#58] +Input [13]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110, wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] + +(123) Union + +(124) HashAggregate [codegen id : 47] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] + +(125) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(126) HashAggregate [codegen id : 48] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] + +(127) HashAggregate [codegen id : 48] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum#59, sum#115] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] + +(128) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] +Arguments: hashpartitioning(d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, 5), ENSURE_REQUIREMENTS, [plan_id=17] + +(129) HashAggregate [codegen id : 49] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] +Keys [5]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum(sales_cnt#19)#63, sum(UnscaledValue(sales_amt#20))#64] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum(sales_cnt#19)#63 AS sales_cnt#117, MakeDecimal(sum(UnscaledValue(sales_amt#20))#64,18,2) AS sales_amt#118] + +(130) Filter [codegen id : 49] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Condition : isnotnull(sales_cnt#117) + +(131) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Arguments: hashpartitioning(i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, 5), ENSURE_REQUIREMENTS, [plan_id=18] + +(132) Sort [codegen id : 50] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Arguments: [i_brand_id#73 ASC NULLS FIRST, i_class_id#74 ASC NULLS FIRST, i_category_id#75 ASC NULLS FIRST, i_manufact_id#76 ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin [codegen id : 51] +Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right keys [4]: [i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Join type: Inner +Join condition: ((cast(sales_cnt#65 as decimal(17,2)) / cast(sales_cnt#117 as decimal(17,2))) < 0.90000000000000000000) + +(134) Project [codegen id : 51] +Output [10]: [d_year#78 AS prev_year#119, d_year#13 AS year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#117 AS prev_yr_cnt#121, sales_cnt#65 AS curr_yr_cnt#122, (sales_cnt#65 - sales_cnt#117) AS sales_cnt_diff#123, (sales_amt#66 - sales_amt#118) AS sales_amt_diff#124] +Input [14]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66, d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] + +(135) TakeOrderedAndProject +Input [10]: [prev_year#119, year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#121, curr_yr_cnt#122, sales_cnt_diff#123, sales_amt_diff#124] +Arguments: 100, [sales_cnt_diff#123 ASC NULLS FIRST, sales_amt_diff#124 ASC NULLS FIRST], [prev_year#119, year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#121, curr_yr_cnt#122, sales_cnt_diff#123, sales_amt_diff#124] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_datafusion/simplified.txt new file mode 100644 index 0000000000..8fd51946e0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_datafusion/simplified.txt @@ -0,0 +1,232 @@ +TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt] + WholeStageCodegen (51) + Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt] + SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt] + InputAdapter + WholeStageCodegen (25) + Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 + WholeStageCodegen (24) + Filter [sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 + WholeStageCodegen (23) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 + WholeStageCodegen (22) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Union + WholeStageCodegen (7) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #4 + WholeStageCodegen (3) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (6) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #7 + WholeStageCodegen (5) + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #8 + WholeStageCodegen (10) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + WholeStageCodegen (13) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #9 + WholeStageCodegen (12) + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + WholeStageCodegen (21) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (18) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #10 + WholeStageCodegen (17) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + WholeStageCodegen (20) + Sort [wr_order_number,wr_item_sk] + InputAdapter + Exchange [wr_order_number,wr_item_sk] #11 + WholeStageCodegen (19) + Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Filter [wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + InputAdapter + WholeStageCodegen (50) + Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12 + WholeStageCodegen (49) + Filter [sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13 + WholeStageCodegen (48) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14 + WholeStageCodegen (47) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Union + WholeStageCodegen (32) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (29) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #15 + WholeStageCodegen (28) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (27) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (31) + Sort [cr_order_number,cr_item_sk] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + WholeStageCodegen (39) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (36) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #17 + WholeStageCodegen (35) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #16 + InputAdapter + WholeStageCodegen (38) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9 + WholeStageCodegen (46) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (43) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #18 + WholeStageCodegen (42) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #16 + InputAdapter + WholeStageCodegen (45) + Sort [wr_order_number,wr_item_sk] + InputAdapter + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..bf7bc3e171 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_iceberg_compat/explain.txt @@ -0,0 +1,754 @@ +== Physical Plan == +TakeOrderedAndProject (135) ++- * Project (134) + +- * SortMergeJoin Inner (133) + :- * Sort (74) + : +- Exchange (73) + : +- * Filter (72) + : +- * HashAggregate (71) + : +- Exchange (70) + : +- * HashAggregate (69) + : +- * HashAggregate (68) + : +- Exchange (67) + : +- * HashAggregate (66) + : +- Union (65) + : :- * Project (26) + : : +- * SortMergeJoin LeftOuter (25) + : : :- * Sort (18) + : : : +- Exchange (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.catalog_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.item (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.date_dim (11) + : : +- * Sort (24) + : : +- Exchange (23) + : : +- * Project (22) + : : +- * Filter (21) + : : +- * ColumnarToRow (20) + : : +- Scan parquet spark_catalog.default.catalog_returns (19) + : :- * Project (45) + : : +- * SortMergeJoin LeftOuter (44) + : : :- * Sort (37) + : : : +- Exchange (36) + : : : +- * Project (35) + : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : :- * Project (32) + : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : :- * Filter (29) + : : : : : +- * ColumnarToRow (28) + : : : : : +- Scan parquet spark_catalog.default.store_sales (27) + : : : : +- ReusedExchange (30) + : : : +- ReusedExchange (33) + : : +- * Sort (43) + : : +- Exchange (42) + : : +- * Project (41) + : : +- * Filter (40) + : : +- * ColumnarToRow (39) + : : +- Scan parquet spark_catalog.default.store_returns (38) + : +- * Project (64) + : +- * SortMergeJoin LeftOuter (63) + : :- * Sort (56) + : : +- Exchange (55) + : : +- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (51) + : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : :- * Filter (48) + : : : : +- * ColumnarToRow (47) + : : : : +- Scan parquet spark_catalog.default.web_sales (46) + : : : +- ReusedExchange (49) + : : +- ReusedExchange (52) + : +- * Sort (62) + : +- Exchange (61) + : +- * Project (60) + : +- * Filter (59) + : +- * ColumnarToRow (58) + : +- Scan parquet spark_catalog.default.web_returns (57) + +- * Sort (132) + +- Exchange (131) + +- * Filter (130) + +- * HashAggregate (129) + +- Exchange (128) + +- * HashAggregate (127) + +- * HashAggregate (126) + +- Exchange (125) + +- * HashAggregate (124) + +- Union (123) + :- * Project (92) + : +- * SortMergeJoin LeftOuter (91) + : :- * Sort (88) + : : +- Exchange (87) + : : +- * Project (86) + : : +- * BroadcastHashJoin Inner BuildRight (85) + : : :- * Project (80) + : : : +- * BroadcastHashJoin Inner BuildRight (79) + : : : :- * Filter (77) + : : : : +- * ColumnarToRow (76) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (75) + : : : +- ReusedExchange (78) + : : +- BroadcastExchange (84) + : : +- * Filter (83) + : : +- * ColumnarToRow (82) + : : +- Scan parquet spark_catalog.default.date_dim (81) + : +- * Sort (90) + : +- ReusedExchange (89) + :- * Project (107) + : +- * SortMergeJoin LeftOuter (106) + : :- * Sort (103) + : : +- Exchange (102) + : : +- * Project (101) + : : +- * BroadcastHashJoin Inner BuildRight (100) + : : :- * Project (98) + : : : +- * BroadcastHashJoin Inner BuildRight (97) + : : : :- * Filter (95) + : : : : +- * ColumnarToRow (94) + : : : : +- Scan parquet spark_catalog.default.store_sales (93) + : : : +- ReusedExchange (96) + : : +- ReusedExchange (99) + : +- * Sort (105) + : +- ReusedExchange (104) + +- * Project (122) + +- * SortMergeJoin LeftOuter (121) + :- * Sort (118) + : +- Exchange (117) + : +- * Project (116) + : +- * BroadcastHashJoin Inner BuildRight (115) + : :- * Project (113) + : : +- * BroadcastHashJoin Inner BuildRight (112) + : : :- * Filter (110) + : : : +- * ColumnarToRow (109) + : : : +- Scan parquet spark_catalog.default.web_sales (108) + : : +- ReusedExchange (111) + : +- ReusedExchange (114) + +- * Sort (120) + +- ReusedExchange (119) + + +(1) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#5)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] + +(3) Filter [codegen id : 3] +Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books ), IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_manufact_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books )) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(7) Project [codegen id : 1] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(8) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(11) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#12, d_year#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#12, d_year#13] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2002)) AND isnotnull(d_date_sk#12)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#12, d_year#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#12] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#12, d_year#13] + +(17) Exchange +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(18) Sort [codegen id : 4] +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13] +Arguments: [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST], false, 0 + +(19) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(21) Filter [codegen id : 5] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] +Condition : (isnotnull(cr_order_number#15) AND isnotnull(cr_item_sk#14)) + +(22) Project [codegen id : 5] +Output [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Input [5]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17, cr_returned_date_sk#18] + +(23) Exchange +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: hashpartitioning(cr_order_number#15, cr_item_sk#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) Sort [codegen id : 6] +Input [4]: [cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] +Arguments: [cr_order_number#15 ASC NULLS FIRST, cr_item_sk#14 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin [codegen id : 7] +Left keys [2]: [cs_order_number#2, cs_item_sk#1] +Right keys [2]: [cr_order_number#15, cr_item_sk#14] +Join type: LeftOuter +Join condition: None + +(26) Project [codegen id : 7] +Output [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#3 - coalesce(cr_return_quantity#16, 0)) AS sales_cnt#19, (cs_ext_sales_price#4 - coalesce(cr_return_amount#17, 0.00)) AS sales_amt#20] +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#13, cr_item_sk#14, cr_order_number#15, cr_return_quantity#16, cr_return_amount#17] + +(27) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#25)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 10] +Input [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] + +(29) Filter [codegen id : 10] +Input [5]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Condition : isnotnull(ss_item_sk#21) + +(30) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(31) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_item_sk#21] +Right keys [1]: [i_item_sk#26] +Join type: Inner +Join condition: None + +(32) Project [codegen id : 10] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] +Input [10]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_item_sk#26, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30] + +(33) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#31, d_year#32] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#25] +Right keys [1]: [d_date_sk#31] +Join type: Inner +Join condition: None + +(35) Project [codegen id : 10] +Output [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Input [11]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, ss_sold_date_sk#25, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_date_sk#31, d_year#32] + +(36) Exchange +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: hashpartitioning(ss_ticket_number#22, ss_item_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(37) Sort [codegen id : 11] +Input [9]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32] +Arguments: [ss_ticket_number#22 ASC NULLS FIRST, ss_item_sk#21 ASC NULLS FIRST], false, 0 + +(38) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 12] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] + +(40) Filter [codegen id : 12] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] +Condition : (isnotnull(sr_ticket_number#34) AND isnotnull(sr_item_sk#33)) + +(41) Project [codegen id : 12] +Output [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Input [5]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36, sr_returned_date_sk#37] + +(42) Exchange +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: hashpartitioning(sr_ticket_number#34, sr_item_sk#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(43) Sort [codegen id : 13] +Input [4]: [sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] +Arguments: [sr_ticket_number#34 ASC NULLS FIRST, sr_item_sk#33 ASC NULLS FIRST], false, 0 + +(44) SortMergeJoin [codegen id : 14] +Left keys [2]: [ss_ticket_number#22, ss_item_sk#21] +Right keys [2]: [sr_ticket_number#34, sr_item_sk#33] +Join type: LeftOuter +Join condition: None + +(45) Project [codegen id : 14] +Output [7]: [d_year#32, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, (ss_quantity#23 - coalesce(sr_return_quantity#35, 0)) AS sales_cnt#38, (ss_ext_sales_price#24 - coalesce(sr_return_amt#36, 0.00)) AS sales_amt#39] +Input [13]: [ss_item_sk#21, ss_ticket_number#22, ss_quantity#23, ss_ext_sales_price#24, i_brand_id#27, i_class_id#28, i_category_id#29, i_manufact_id#30, d_year#32, sr_item_sk#33, sr_ticket_number#34, sr_return_quantity#35, sr_return_amt#36] + +(46) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#44)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 17] +Input [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] + +(48) Filter [codegen id : 17] +Input [5]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44] +Condition : isnotnull(ws_item_sk#40) + +(49) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(50) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#40] +Right keys [1]: [i_item_sk#45] +Join type: Inner +Join condition: None + +(51) Project [codegen id : 17] +Output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] +Input [10]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_item_sk#45, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49] + +(52) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#50, d_year#51] + +(53) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#44] +Right keys [1]: [d_date_sk#50] +Join type: Inner +Join condition: None + +(54) Project [codegen id : 17] +Output [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Input [11]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, ws_sold_date_sk#44, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_date_sk#50, d_year#51] + +(55) Exchange +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: hashpartitioning(ws_order_number#41, ws_item_sk#40, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(56) Sort [codegen id : 18] +Input [9]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51] +Arguments: [ws_order_number#41 ASC NULLS FIRST, ws_item_sk#40 ASC NULLS FIRST], false, 0 + +(57) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 19] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] + +(59) Filter [codegen id : 19] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] +Condition : (isnotnull(wr_order_number#53) AND isnotnull(wr_item_sk#52)) + +(60) Project [codegen id : 19] +Output [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Input [5]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55, wr_returned_date_sk#56] + +(61) Exchange +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: hashpartitioning(wr_order_number#53, wr_item_sk#52, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(62) Sort [codegen id : 20] +Input [4]: [wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] +Arguments: [wr_order_number#53 ASC NULLS FIRST, wr_item_sk#52 ASC NULLS FIRST], false, 0 + +(63) SortMergeJoin [codegen id : 21] +Left keys [2]: [ws_order_number#41, ws_item_sk#40] +Right keys [2]: [wr_order_number#53, wr_item_sk#52] +Join type: LeftOuter +Join condition: None + +(64) Project [codegen id : 21] +Output [7]: [d_year#51, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, (ws_quantity#42 - coalesce(wr_return_quantity#54, 0)) AS sales_cnt#57, (ws_ext_sales_price#43 - coalesce(wr_return_amt#55, 0.00)) AS sales_amt#58] +Input [13]: [ws_item_sk#40, ws_order_number#41, ws_quantity#42, ws_ext_sales_price#43, i_brand_id#46, i_class_id#47, i_category_id#48, i_manufact_id#49, d_year#51, wr_item_sk#52, wr_order_number#53, wr_return_quantity#54, wr_return_amt#55] + +(65) Union + +(66) HashAggregate [codegen id : 22] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] + +(67) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(68) HashAggregate [codegen id : 23] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] + +(69) HashAggregate [codegen id : 23] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum#59, sum#60] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] + +(70) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] +Arguments: hashpartitioning(d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(71) HashAggregate [codegen id : 24] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#61, sum#62] +Keys [5]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum(sales_cnt#19)#63, sum(UnscaledValue(sales_amt#20))#64] +Results [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(sales_cnt#19)#63 AS sales_cnt#65, MakeDecimal(sum(UnscaledValue(sales_amt#20))#64,18,2) AS sales_amt#66] + +(72) Filter [codegen id : 24] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Condition : isnotnull(sales_cnt#65) + +(73) Exchange +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(74) Sort [codegen id : 25] +Input [7]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66] +Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0 + +(75) Scan parquet spark_catalog.default.catalog_sales +Output [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#71)] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 28] +Input [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] + +(77) Filter [codegen id : 28] +Input [5]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71] +Condition : isnotnull(cs_item_sk#67) + +(78) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#72, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] + +(79) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [cs_item_sk#67] +Right keys [1]: [i_item_sk#72] +Join type: Inner +Join condition: None + +(80) Project [codegen id : 28] +Output [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Input [10]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_item_sk#72, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] + +(81) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#77, d_year#78] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(82) ColumnarToRow [codegen id : 27] +Input [2]: [d_date_sk#77, d_year#78] + +(83) Filter [codegen id : 27] +Input [2]: [d_date_sk#77, d_year#78] +Condition : ((isnotnull(d_year#78) AND (d_year#78 = 2001)) AND isnotnull(d_date_sk#77)) + +(84) BroadcastExchange +Input [2]: [d_date_sk#77, d_year#78] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] + +(85) BroadcastHashJoin [codegen id : 28] +Left keys [1]: [cs_sold_date_sk#71] +Right keys [1]: [d_date_sk#77] +Join type: Inner +Join condition: None + +(86) Project [codegen id : 28] +Output [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Input [11]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, cs_sold_date_sk#71, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_date_sk#77, d_year#78] + +(87) Exchange +Input [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Arguments: hashpartitioning(cs_order_number#68, cs_item_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(88) Sort [codegen id : 29] +Input [9]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78] +Arguments: [cs_order_number#68 ASC NULLS FIRST, cs_item_sk#67 ASC NULLS FIRST], false, 0 + +(89) ReusedExchange [Reuses operator id: 23] +Output [4]: [cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] + +(90) Sort [codegen id : 31] +Input [4]: [cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] +Arguments: [cr_order_number#80 ASC NULLS FIRST, cr_item_sk#79 ASC NULLS FIRST], false, 0 + +(91) SortMergeJoin [codegen id : 32] +Left keys [2]: [cs_order_number#68, cs_item_sk#67] +Right keys [2]: [cr_order_number#80, cr_item_sk#79] +Join type: LeftOuter +Join condition: None + +(92) Project [codegen id : 32] +Output [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, (cs_quantity#69 - coalesce(cr_return_quantity#81, 0)) AS sales_cnt#19, (cs_ext_sales_price#70 - coalesce(cr_return_amount#82, 0.00)) AS sales_amt#20] +Input [13]: [cs_item_sk#67, cs_order_number#68, cs_quantity#69, cs_ext_sales_price#70, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, d_year#78, cr_item_sk#79, cr_order_number#80, cr_return_quantity#81, cr_return_amount#82] + +(93) Scan parquet spark_catalog.default.store_sales +Output [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#87)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(94) ColumnarToRow [codegen id : 35] +Input [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] + +(95) Filter [codegen id : 35] +Input [5]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87] +Condition : isnotnull(ss_item_sk#83) + +(96) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] + +(97) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ss_item_sk#83] +Right keys [1]: [i_item_sk#88] +Join type: Inner +Join condition: None + +(98) Project [codegen id : 35] +Output [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] +Input [10]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92] + +(99) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#93, d_year#94] + +(100) BroadcastHashJoin [codegen id : 35] +Left keys [1]: [ss_sold_date_sk#87] +Right keys [1]: [d_date_sk#93] +Join type: Inner +Join condition: None + +(101) Project [codegen id : 35] +Output [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Input [11]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, ss_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_date_sk#93, d_year#94] + +(102) Exchange +Input [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Arguments: hashpartitioning(ss_ticket_number#84, ss_item_sk#83, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(103) Sort [codegen id : 36] +Input [9]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94] +Arguments: [ss_ticket_number#84 ASC NULLS FIRST, ss_item_sk#83 ASC NULLS FIRST], false, 0 + +(104) ReusedExchange [Reuses operator id: 42] +Output [4]: [sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] + +(105) Sort [codegen id : 38] +Input [4]: [sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] +Arguments: [sr_ticket_number#96 ASC NULLS FIRST, sr_item_sk#95 ASC NULLS FIRST], false, 0 + +(106) SortMergeJoin [codegen id : 39] +Left keys [2]: [ss_ticket_number#84, ss_item_sk#83] +Right keys [2]: [sr_ticket_number#96, sr_item_sk#95] +Join type: LeftOuter +Join condition: None + +(107) Project [codegen id : 39] +Output [7]: [d_year#94, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, (ss_quantity#85 - coalesce(sr_return_quantity#97, 0)) AS sales_cnt#38, (ss_ext_sales_price#86 - coalesce(sr_return_amt#98, 0.00)) AS sales_amt#39] +Input [13]: [ss_item_sk#83, ss_ticket_number#84, ss_quantity#85, ss_ext_sales_price#86, i_brand_id#89, i_class_id#90, i_category_id#91, i_manufact_id#92, d_year#94, sr_item_sk#95, sr_ticket_number#96, sr_return_quantity#97, sr_return_amt#98] + +(108) Scan parquet spark_catalog.default.web_sales +Output [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#103)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(109) ColumnarToRow [codegen id : 42] +Input [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] + +(110) Filter [codegen id : 42] +Input [5]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103] +Condition : isnotnull(ws_item_sk#99) + +(111) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#104, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] + +(112) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_item_sk#99] +Right keys [1]: [i_item_sk#104] +Join type: Inner +Join condition: None + +(113) Project [codegen id : 42] +Output [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] +Input [10]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_item_sk#104, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108] + +(114) ReusedExchange [Reuses operator id: 84] +Output [2]: [d_date_sk#109, d_year#110] + +(115) BroadcastHashJoin [codegen id : 42] +Left keys [1]: [ws_sold_date_sk#103] +Right keys [1]: [d_date_sk#109] +Join type: Inner +Join condition: None + +(116) Project [codegen id : 42] +Output [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Input [11]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, ws_sold_date_sk#103, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_date_sk#109, d_year#110] + +(117) Exchange +Input [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Arguments: hashpartitioning(ws_order_number#100, ws_item_sk#99, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(118) Sort [codegen id : 43] +Input [9]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110] +Arguments: [ws_order_number#100 ASC NULLS FIRST, ws_item_sk#99 ASC NULLS FIRST], false, 0 + +(119) ReusedExchange [Reuses operator id: 61] +Output [4]: [wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] + +(120) Sort [codegen id : 45] +Input [4]: [wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] +Arguments: [wr_order_number#112 ASC NULLS FIRST, wr_item_sk#111 ASC NULLS FIRST], false, 0 + +(121) SortMergeJoin [codegen id : 46] +Left keys [2]: [ws_order_number#100, ws_item_sk#99] +Right keys [2]: [wr_order_number#112, wr_item_sk#111] +Join type: LeftOuter +Join condition: None + +(122) Project [codegen id : 46] +Output [7]: [d_year#110, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, (ws_quantity#101 - coalesce(wr_return_quantity#113, 0)) AS sales_cnt#57, (ws_ext_sales_price#102 - coalesce(wr_return_amt#114, 0.00)) AS sales_amt#58] +Input [13]: [ws_item_sk#99, ws_order_number#100, ws_quantity#101, ws_ext_sales_price#102, i_brand_id#105, i_class_id#106, i_category_id#107, i_manufact_id#108, d_year#110, wr_item_sk#111, wr_order_number#112, wr_return_quantity#113, wr_return_amt#114] + +(123) Union + +(124) HashAggregate [codegen id : 47] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] + +(125) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Arguments: hashpartitioning(d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(126) HashAggregate [codegen id : 48] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] + +(127) HashAggregate [codegen id : 48] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#19, sales_amt#20] +Keys [5]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Functions [2]: [partial_sum(sales_cnt#19), partial_sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum#59, sum#115] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] + +(128) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] +Arguments: hashpartitioning(d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, 5), ENSURE_REQUIREMENTS, [plan_id=17] + +(129) HashAggregate [codegen id : 49] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum#61, sum#116] +Keys [5]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Functions [2]: [sum(sales_cnt#19), sum(UnscaledValue(sales_amt#20))] +Aggregate Attributes [2]: [sum(sales_cnt#19)#63, sum(UnscaledValue(sales_amt#20))#64] +Results [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sum(sales_cnt#19)#63 AS sales_cnt#117, MakeDecimal(sum(UnscaledValue(sales_amt#20))#64,18,2) AS sales_amt#118] + +(130) Filter [codegen id : 49] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Condition : isnotnull(sales_cnt#117) + +(131) Exchange +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Arguments: hashpartitioning(i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, 5), ENSURE_REQUIREMENTS, [plan_id=18] + +(132) Sort [codegen id : 50] +Input [7]: [d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] +Arguments: [i_brand_id#73 ASC NULLS FIRST, i_class_id#74 ASC NULLS FIRST, i_category_id#75 ASC NULLS FIRST, i_manufact_id#76 ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin [codegen id : 51] +Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right keys [4]: [i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76] +Join type: Inner +Join condition: ((cast(sales_cnt#65 as decimal(17,2)) / cast(sales_cnt#117 as decimal(17,2))) < 0.90000000000000000000) + +(134) Project [codegen id : 51] +Output [10]: [d_year#78 AS prev_year#119, d_year#13 AS year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#117 AS prev_yr_cnt#121, sales_cnt#65 AS curr_yr_cnt#122, (sales_cnt#65 - sales_cnt#117) AS sales_cnt_diff#123, (sales_amt#66 - sales_amt#118) AS sales_amt_diff#124] +Input [14]: [d_year#13, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#65, sales_amt#66, d_year#78, i_brand_id#73, i_class_id#74, i_category_id#75, i_manufact_id#76, sales_cnt#117, sales_amt#118] + +(135) TakeOrderedAndProject +Input [10]: [prev_year#119, year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#121, curr_yr_cnt#122, sales_cnt_diff#123, sales_amt_diff#124] +Arguments: 100, [sales_cnt_diff#123 ASC NULLS FIRST, sales_amt_diff#124 ASC NULLS FIRST], [prev_year#119, year#120, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#121, curr_yr_cnt#122, sales_cnt_diff#123, sales_amt_diff#124] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..8fd51946e0 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.native_iceberg_compat/simplified.txt @@ -0,0 +1,232 @@ +TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt] + WholeStageCodegen (51) + Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt] + SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt] + InputAdapter + WholeStageCodegen (25) + Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 + WholeStageCodegen (24) + Filter [sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2 + WholeStageCodegen (23) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 + WholeStageCodegen (22) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Union + WholeStageCodegen (7) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (4) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #4 + WholeStageCodegen (3) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (6) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #7 + WholeStageCodegen (5) + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (11) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #8 + WholeStageCodegen (10) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + WholeStageCodegen (13) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #9 + WholeStageCodegen (12) + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk] + WholeStageCodegen (21) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (18) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #10 + WholeStageCodegen (17) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 + InputAdapter + WholeStageCodegen (20) + Sort [wr_order_number,wr_item_sk] + InputAdapter + Exchange [wr_order_number,wr_item_sk] #11 + WholeStageCodegen (19) + Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Filter [wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk] + InputAdapter + WholeStageCodegen (50) + Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12 + WholeStageCodegen (49) + Filter [sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13 + WholeStageCodegen (48) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14 + WholeStageCodegen (47) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] + InputAdapter + Union + WholeStageCodegen (32) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (29) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #15 + WholeStageCodegen (28) + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen (27) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (31) + Sort [cr_order_number,cr_item_sk] + InputAdapter + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7 + WholeStageCodegen (39) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (36) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #17 + WholeStageCodegen (35) + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #16 + InputAdapter + WholeStageCodegen (38) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9 + WholeStageCodegen (46) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (43) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #18 + WholeStageCodegen (42) + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5 + InputAdapter + ReusedExchange [d_date_sk,d_year] #16 + InputAdapter + WholeStageCodegen (45) + Sort [wr_order_number,wr_item_sk] + InputAdapter + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_datafusion/explain.txt new file mode 100644 index 0000000000..8f0ae3b3f2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_datafusion/explain.txt @@ -0,0 +1,612 @@ +== Physical Plan == +TakeOrderedAndProject (102) ++- * HashAggregate (101) + +- Exchange (100) + +- * HashAggregate (99) + +- Union (98) + :- * HashAggregate (87) + : +- Exchange (86) + : +- * HashAggregate (85) + : +- Union (84) + : :- * Project (34) + : : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : : :- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.store (11) + : : +- BroadcastExchange (32) + : : +- * HashAggregate (31) + : : +- Exchange (30) + : : +- * HashAggregate (29) + : : +- * Project (28) + : : +- * BroadcastHashJoin Inner BuildRight (27) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet spark_catalog.default.store_returns (20) + : : : +- ReusedExchange (23) + : : +- ReusedExchange (26) + : :- * Project (53) + : : +- * BroadcastNestedLoopJoin Inner BuildLeft (52) + : : :- BroadcastExchange (43) + : : : +- * HashAggregate (42) + : : : +- Exchange (41) + : : : +- * HashAggregate (40) + : : : +- * Project (39) + : : : +- * BroadcastHashJoin Inner BuildRight (38) + : : : :- * ColumnarToRow (36) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (35) + : : : +- ReusedExchange (37) + : : +- * HashAggregate (51) + : : +- Exchange (50) + : : +- * HashAggregate (49) + : : +- * Project (48) + : : +- * BroadcastHashJoin Inner BuildRight (47) + : : :- * ColumnarToRow (45) + : : : +- Scan parquet spark_catalog.default.catalog_returns (44) + : : +- ReusedExchange (46) + : +- * Project (83) + : +- * BroadcastHashJoin LeftOuter BuildRight (82) + : :- * HashAggregate (68) + : : +- Exchange (67) + : : +- * HashAggregate (66) + : : +- * Project (65) + : : +- * BroadcastHashJoin Inner BuildRight (64) + : : :- * Project (59) + : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : :- * Filter (56) + : : : : +- * ColumnarToRow (55) + : : : : +- Scan parquet spark_catalog.default.web_sales (54) + : : : +- ReusedExchange (57) + : : +- BroadcastExchange (63) + : : +- * Filter (62) + : : +- * ColumnarToRow (61) + : : +- Scan parquet spark_catalog.default.web_page (60) + : +- BroadcastExchange (81) + : +- * HashAggregate (80) + : +- Exchange (79) + : +- * HashAggregate (78) + : +- * Project (77) + : +- * BroadcastHashJoin Inner BuildRight (76) + : :- * Project (74) + : : +- * BroadcastHashJoin Inner BuildRight (73) + : : :- * Filter (71) + : : : +- * ColumnarToRow (70) + : : : +- Scan parquet spark_catalog.default.web_returns (69) + : : +- ReusedExchange (72) + : +- ReusedExchange (75) + :- * HashAggregate (92) + : +- Exchange (91) + : +- * HashAggregate (90) + : +- * HashAggregate (89) + : +- ReusedExchange (88) + +- * HashAggregate (97) + +- Exchange (96) + +- * HashAggregate (95) + +- * HashAggregate (94) + +- ReusedExchange (93) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 1998-08-04)) AND (d_date#6 <= 1998-09-03)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_date#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] +Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#7] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#7] +Condition : isnotnull(s_store_sk#7) + +(14) BroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Keys [1]: [s_store_sk#7] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#8, sum#9] +Results [3]: [s_store_sk#7, sum#10, sum#11] + +(18) Exchange +Input [3]: [s_store_sk#7, sum#10, sum#11] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 8] +Input [3]: [s_store_sk#7, sum#10, sum#11] +Keys [1]: [s_store_sk#7] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13] +Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15] + +(20) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#19)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] + +(22) Filter [codegen id : 6] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Condition : isnotnull(sr_store_sk#16) + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#20] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [d_date_sk#20] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [3]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18] +Input [5]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19, d_date_sk#20] + +(26) ReusedExchange [Reuses operator id: 14] +Output [1]: [s_store_sk#21] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_store_sk#16] +Right keys [1]: [s_store_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, s_store_sk#21] + +(29) HashAggregate [codegen id : 6] +Input [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Keys [1]: [s_store_sk#21] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#17)), partial_sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum#22, sum#23] +Results [3]: [s_store_sk#21, sum#24, sum#25] + +(30) Exchange +Input [3]: [s_store_sk#21, sum#24, sum#25] +Arguments: hashpartitioning(s_store_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 7] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Keys [1]: [s_store_sk#21] +Functions [2]: [sum(UnscaledValue(sr_return_amt#17)), sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#17))#26, sum(UnscaledValue(sr_net_loss#18))#27] +Results [3]: [s_store_sk#21, MakeDecimal(sum(UnscaledValue(sr_return_amt#17))#26,17,2) AS returns#28, MakeDecimal(sum(UnscaledValue(sr_net_loss#18))#27,17,2) AS profit_loss#29] + +(32) BroadcastExchange +Input [3]: [s_store_sk#21, returns#28, profit_loss#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_store_sk#7] +Right keys [1]: [s_store_sk#21] +Join type: LeftOuter +Join condition: None + +(34) Project [codegen id : 8] +Output [5]: [store channel AS channel#30, s_store_sk#7 AS id#31, sales#14, coalesce(returns#28, 0.00) AS returns#32, (profit#15 - coalesce(profit_loss#29, 0.00)) AS profit#33] +Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29] + +(35) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#37)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] + +(37) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#38] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Input [5]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37, d_date_sk#38] + +(40) HashAggregate [codegen id : 10] +Input [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum#39, sum#40] +Results [3]: [cs_call_center_sk#34, sum#41, sum#42] + +(41) Exchange +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Arguments: hashpartitioning(cs_call_center_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(42) HashAggregate [codegen id : 11] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44] +Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46] + +(43) BroadcastExchange +Input [3]: [cs_call_center_sk#34, sales#45, profit#46] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(44) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#49)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 13] +Input [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] + +(46) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#50] + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cr_returned_date_sk#49] +Right keys [1]: [d_date_sk#50] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 13] +Output [2]: [cr_return_amount#47, cr_net_loss#48] +Input [4]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49, d_date_sk#50] + +(49) HashAggregate [codegen id : 13] +Input [2]: [cr_return_amount#47, cr_net_loss#48] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#47)), partial_sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [2]: [sum#53, sum#54] + +(50) Exchange +Input [2]: [sum#53, sum#54] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate +Input [2]: [sum#53, sum#54] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#47)), sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(UnscaledValue(cr_net_loss#48))#56] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58] + +(52) BroadcastNestedLoopJoin [codegen id : 14] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 14] +Output [5]: [catalog channel AS channel#59, cs_call_center_sk#34 AS id#60, sales#45, returns#57, (profit#46 - profit_loss#58) AS profit#61] +Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58] + +(54) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#65)] +PushedFilters: [IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 17] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] + +(56) Filter [codegen id : 17] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Condition : isnotnull(ws_web_page_sk#62) + +(57) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#66] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 17] +Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64] +Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66] + +(60) Scan parquet spark_catalog.default.web_page +Output [1]: [wp_web_page_sk#67] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 16] +Input [1]: [wp_web_page_sk#67] + +(62) Filter [codegen id : 16] +Input [1]: [wp_web_page_sk#67] +Condition : isnotnull(wp_web_page_sk#67) + +(63) BroadcastExchange +Input [1]: [wp_web_page_sk#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(64) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_web_page_sk#62] +Right keys [1]: [wp_web_page_sk#67] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 17] +Output [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] + +(66) HashAggregate [codegen id : 17] +Input [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#63)), partial_sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum#68, sum#69] +Results [3]: [wp_web_page_sk#67, sum#70, sum#71] + +(67) Exchange +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Arguments: hashpartitioning(wp_web_page_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(68) HashAggregate [codegen id : 22] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73] +Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75] + +(69) Scan parquet spark_catalog.default.web_returns +Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#79)] +PushedFilters: [IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(70) ColumnarToRow [codegen id : 20] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] + +(71) Filter [codegen id : 20] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Condition : isnotnull(wr_web_page_sk#76) + +(72) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#80] + +(73) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [wr_returned_date_sk#79] +Right keys [1]: [d_date_sk#80] +Join type: Inner +Join condition: None + +(74) Project [codegen id : 20] +Output [3]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78] +Input [5]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79, d_date_sk#80] + +(75) ReusedExchange [Reuses operator id: 63] +Output [1]: [wp_web_page_sk#81] + +(76) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [wr_web_page_sk#76] +Right keys [1]: [wp_web_page_sk#81] +Join type: Inner +Join condition: None + +(77) Project [codegen id : 20] +Output [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] + +(78) HashAggregate [codegen id : 20] +Input [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#77)), partial_sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum#82, sum#83] +Results [3]: [wp_web_page_sk#81, sum#84, sum#85] + +(79) Exchange +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Arguments: hashpartitioning(wp_web_page_sk#81, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(80) HashAggregate [codegen id : 21] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [sum(UnscaledValue(wr_return_amt#77)), sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#77))#86, sum(UnscaledValue(wr_net_loss#78))#87] +Results [3]: [wp_web_page_sk#81, MakeDecimal(sum(UnscaledValue(wr_return_amt#77))#86,17,2) AS returns#88, MakeDecimal(sum(UnscaledValue(wr_net_loss#78))#87,17,2) AS profit_loss#89] + +(81) BroadcastExchange +Input [3]: [wp_web_page_sk#81, returns#88, profit_loss#89] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(82) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [wp_web_page_sk#67] +Right keys [1]: [wp_web_page_sk#81] +Join type: LeftOuter +Join condition: None + +(83) Project [codegen id : 22] +Output [5]: [web channel AS channel#90, wp_web_page_sk#67 AS id#91, sales#74, coalesce(returns#88, 0.00) AS returns#92, (profit#75 - coalesce(profit_loss#89, 0.00)) AS profit#93] +Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89] + +(84) Union + +(85) HashAggregate [codegen id : 23] +Input [5]: [channel#30, id#31, sales#14, returns#32, profit#33] +Keys [2]: [channel#30, id#31] +Functions [3]: [partial_sum(sales#14), partial_sum(returns#32), partial_sum(profit#33)] +Aggregate Attributes [6]: [sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99] +Results [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(86) Exchange +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Arguments: hashpartitioning(channel#30, id#31, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(87) HashAggregate [codegen id : 24] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [5]: [channel#30, id#31, cast(sum(sales#14)#106 as decimal(37,2)) AS sales#109, cast(sum(returns#32)#107 as decimal(37,2)) AS returns#110, cast(sum(profit#33)#108 as decimal(38,2)) AS profit#111] + +(88) ReusedExchange [Reuses operator id: 86] +Output [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(89) HashAggregate [codegen id : 48] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [4]: [channel#30, sum(sales#14)#106 AS sales#112, sum(returns#32)#107 AS returns#113, sum(profit#33)#108 AS profit#114] + +(90) HashAggregate [codegen id : 48] +Input [4]: [channel#30, sales#112, returns#113, profit#114] +Keys [1]: [channel#30] +Functions [3]: [partial_sum(sales#112), partial_sum(returns#113), partial_sum(profit#114)] +Aggregate Attributes [6]: [sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Results [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] + +(91) Exchange +Input [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Arguments: hashpartitioning(channel#30, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(92) HashAggregate [codegen id : 49] +Input [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [1]: [channel#30] +Functions [3]: [sum(sales#112), sum(returns#113), sum(profit#114)] +Aggregate Attributes [3]: [sum(sales#112)#127, sum(returns#113)#128, sum(profit#114)#129] +Results [5]: [channel#30, null AS id#130, sum(sales#112)#127 AS sales#131, sum(returns#113)#128 AS returns#132, sum(profit#114)#129 AS profit#133] + +(93) ReusedExchange [Reuses operator id: 86] +Output [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(94) HashAggregate [codegen id : 73] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [3]: [sum(sales#14)#106 AS sales#112, sum(returns#32)#107 AS returns#113, sum(profit#33)#108 AS profit#114] + +(95) HashAggregate [codegen id : 73] +Input [3]: [sales#112, returns#113, profit#114] +Keys: [] +Functions [3]: [partial_sum(sales#112), partial_sum(returns#113), partial_sum(profit#114)] +Aggregate Attributes [6]: [sum#134, isEmpty#135, sum#136, isEmpty#137, sum#138, isEmpty#139] +Results [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] + +(96) Exchange +Input [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(97) HashAggregate [codegen id : 74] +Input [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Keys: [] +Functions [3]: [sum(sales#112), sum(returns#113), sum(profit#114)] +Aggregate Attributes [3]: [sum(sales#112)#146, sum(returns#113)#147, sum(profit#114)#148] +Results [5]: [null AS channel#149, null AS id#150, sum(sales#112)#146 AS sales#151, sum(returns#113)#147 AS returns#152, sum(profit#114)#148 AS profit#153] + +(98) Union + +(99) HashAggregate [codegen id : 75] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111] + +(100) Exchange +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Arguments: hashpartitioning(channel#30, id#31, sales#109, returns#110, profit#111, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(101) HashAggregate [codegen id : 76] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111] + +(102) TakeOrderedAndProject +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Arguments: 100, [channel#30 ASC NULLS FIRST, id#31 ASC NULLS FIRST], [channel#30, id#31, sales#109, returns#110, profit#111] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..565a880398 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_datafusion/simplified.txt @@ -0,0 +1,161 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (76) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (75) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (24) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (23) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (8) + Project [s_store_sk,sales,returns,profit,profit_loss] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [s_store_sk] #3 + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,s_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange [s_store_sk] #7 + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum] + Project [sr_return_amt,sr_net_loss,s_store_sk] + BroadcastHashJoin [sr_store_sk,s_store_sk] + Project [sr_store_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + ReusedExchange [s_store_sk] #5 + WholeStageCodegen (14) + Project [cs_call_center_sk,sales,returns,profit,profit_loss] + BroadcastNestedLoopJoin + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [cs_call_center_sk] #9 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange #10 + WholeStageCodegen (13) + HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] + Project [cr_return_amount,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (22) + Project [wp_web_page_sk,sales,returns,profit,profit_loss] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [wp_web_page_sk] #11 + WholeStageCodegen (17) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (16) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange [wp_web_page_sk] #14 + WholeStageCodegen (20) + HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum] + Project [wr_return_amt,wr_net_loss,wp_web_page_sk] + BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] + Project [wr_web_page_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + ReusedExchange [wp_web_page_sk] #12 + WholeStageCodegen (49) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #15 + WholeStageCodegen (48) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (74) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #16 + WholeStageCodegen (73) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..8f0ae3b3f2 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_iceberg_compat/explain.txt @@ -0,0 +1,612 @@ +== Physical Plan == +TakeOrderedAndProject (102) ++- * HashAggregate (101) + +- Exchange (100) + +- * HashAggregate (99) + +- Union (98) + :- * HashAggregate (87) + : +- Exchange (86) + : +- * HashAggregate (85) + : +- Union (84) + : :- * Project (34) + : : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : : :- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet spark_catalog.default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet spark_catalog.default.store (11) + : : +- BroadcastExchange (32) + : : +- * HashAggregate (31) + : : +- Exchange (30) + : : +- * HashAggregate (29) + : : +- * Project (28) + : : +- * BroadcastHashJoin Inner BuildRight (27) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet spark_catalog.default.store_returns (20) + : : : +- ReusedExchange (23) + : : +- ReusedExchange (26) + : :- * Project (53) + : : +- * BroadcastNestedLoopJoin Inner BuildLeft (52) + : : :- BroadcastExchange (43) + : : : +- * HashAggregate (42) + : : : +- Exchange (41) + : : : +- * HashAggregate (40) + : : : +- * Project (39) + : : : +- * BroadcastHashJoin Inner BuildRight (38) + : : : :- * ColumnarToRow (36) + : : : : +- Scan parquet spark_catalog.default.catalog_sales (35) + : : : +- ReusedExchange (37) + : : +- * HashAggregate (51) + : : +- Exchange (50) + : : +- * HashAggregate (49) + : : +- * Project (48) + : : +- * BroadcastHashJoin Inner BuildRight (47) + : : :- * ColumnarToRow (45) + : : : +- Scan parquet spark_catalog.default.catalog_returns (44) + : : +- ReusedExchange (46) + : +- * Project (83) + : +- * BroadcastHashJoin LeftOuter BuildRight (82) + : :- * HashAggregate (68) + : : +- Exchange (67) + : : +- * HashAggregate (66) + : : +- * Project (65) + : : +- * BroadcastHashJoin Inner BuildRight (64) + : : :- * Project (59) + : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : :- * Filter (56) + : : : : +- * ColumnarToRow (55) + : : : : +- Scan parquet spark_catalog.default.web_sales (54) + : : : +- ReusedExchange (57) + : : +- BroadcastExchange (63) + : : +- * Filter (62) + : : +- * ColumnarToRow (61) + : : +- Scan parquet spark_catalog.default.web_page (60) + : +- BroadcastExchange (81) + : +- * HashAggregate (80) + : +- Exchange (79) + : +- * HashAggregate (78) + : +- * Project (77) + : +- * BroadcastHashJoin Inner BuildRight (76) + : :- * Project (74) + : : +- * BroadcastHashJoin Inner BuildRight (73) + : : :- * Filter (71) + : : : +- * ColumnarToRow (70) + : : : +- Scan parquet spark_catalog.default.web_returns (69) + : : +- ReusedExchange (72) + : +- ReusedExchange (75) + :- * HashAggregate (92) + : +- Exchange (91) + : +- * HashAggregate (90) + : +- * HashAggregate (89) + : +- ReusedExchange (88) + +- * HashAggregate (97) + +- Exchange (96) + +- * HashAggregate (95) + +- * HashAggregate (94) + +- ReusedExchange (93) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#4)] +PushedFilters: [IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4] +Condition : isnotnull(ss_store_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 1998-08-04)) AND (d_date#6 <= 1998-09-03)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_date#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#5] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3] +Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#5] + +(11) Scan parquet spark_catalog.default.store +Output [1]: [s_store_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#7] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#7] +Condition : isnotnull(s_store_sk#7) + +(14) BroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#1] +Right keys [1]: [s_store_sk#7] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] +Keys [1]: [s_store_sk#7] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#8, sum#9] +Results [3]: [s_store_sk#7, sum#10, sum#11] + +(18) Exchange +Input [3]: [s_store_sk#7, sum#10, sum#11] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 8] +Input [3]: [s_store_sk#7, sum#10, sum#11] +Keys [1]: [s_store_sk#7] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13] +Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15] + +(20) Scan parquet spark_catalog.default.store_returns +Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(sr_returned_date_sk#19)] +PushedFilters: [IsNotNull(sr_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] + +(22) Filter [codegen id : 6] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Condition : isnotnull(sr_store_sk#16) + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#20] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [d_date_sk#20] +Join type: Inner +Join condition: None + +(25) Project [codegen id : 6] +Output [3]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18] +Input [5]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19, d_date_sk#20] + +(26) ReusedExchange [Reuses operator id: 14] +Output [1]: [s_store_sk#21] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_store_sk#16] +Right keys [1]: [s_store_sk#21] +Join type: Inner +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, s_store_sk#21] + +(29) HashAggregate [codegen id : 6] +Input [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Keys [1]: [s_store_sk#21] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#17)), partial_sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum#22, sum#23] +Results [3]: [s_store_sk#21, sum#24, sum#25] + +(30) Exchange +Input [3]: [s_store_sk#21, sum#24, sum#25] +Arguments: hashpartitioning(s_store_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(31) HashAggregate [codegen id : 7] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Keys [1]: [s_store_sk#21] +Functions [2]: [sum(UnscaledValue(sr_return_amt#17)), sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#17))#26, sum(UnscaledValue(sr_net_loss#18))#27] +Results [3]: [s_store_sk#21, MakeDecimal(sum(UnscaledValue(sr_return_amt#17))#26,17,2) AS returns#28, MakeDecimal(sum(UnscaledValue(sr_net_loss#18))#27,17,2) AS profit_loss#29] + +(32) BroadcastExchange +Input [3]: [s_store_sk#21, returns#28, profit_loss#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_store_sk#7] +Right keys [1]: [s_store_sk#21] +Join type: LeftOuter +Join condition: None + +(34) Project [codegen id : 8] +Output [5]: [store channel AS channel#30, s_store_sk#7 AS id#31, sales#14, coalesce(returns#28, 0.00) AS returns#32, (profit#15 - coalesce(profit_loss#29, 0.00)) AS profit#33] +Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29] + +(35) Scan parquet spark_catalog.default.catalog_sales +Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#37)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] + +(37) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#38] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] +Join type: Inner +Join condition: None + +(39) Project [codegen id : 10] +Output [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Input [5]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37, d_date_sk#38] + +(40) HashAggregate [codegen id : 10] +Input [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum#39, sum#40] +Results [3]: [cs_call_center_sk#34, sum#41, sum#42] + +(41) Exchange +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Arguments: hashpartitioning(cs_call_center_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(42) HashAggregate [codegen id : 11] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44] +Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46] + +(43) BroadcastExchange +Input [3]: [cs_call_center_sk#34, sales#45, profit#46] +Arguments: IdentityBroadcastMode, [plan_id=7] + +(44) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#49)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 13] +Input [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] + +(46) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#50] + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cr_returned_date_sk#49] +Right keys [1]: [d_date_sk#50] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 13] +Output [2]: [cr_return_amount#47, cr_net_loss#48] +Input [4]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49, d_date_sk#50] + +(49) HashAggregate [codegen id : 13] +Input [2]: [cr_return_amount#47, cr_net_loss#48] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#47)), partial_sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [2]: [sum#53, sum#54] + +(50) Exchange +Input [2]: [sum#53, sum#54] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] + +(51) HashAggregate +Input [2]: [sum#53, sum#54] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#47)), sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(UnscaledValue(cr_net_loss#48))#56] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58] + +(52) BroadcastNestedLoopJoin [codegen id : 14] +Join type: Inner +Join condition: None + +(53) Project [codegen id : 14] +Output [5]: [catalog channel AS channel#59, cs_call_center_sk#34 AS id#60, sales#45, returns#57, (profit#46 - profit_loss#58) AS profit#61] +Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58] + +(54) Scan parquet spark_catalog.default.web_sales +Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#65)] +PushedFilters: [IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 17] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] + +(56) Filter [codegen id : 17] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Condition : isnotnull(ws_web_page_sk#62) + +(57) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#66] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 17] +Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64] +Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66] + +(60) Scan parquet spark_catalog.default.web_page +Output [1]: [wp_web_page_sk#67] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 16] +Input [1]: [wp_web_page_sk#67] + +(62) Filter [codegen id : 16] +Input [1]: [wp_web_page_sk#67] +Condition : isnotnull(wp_web_page_sk#67) + +(63) BroadcastExchange +Input [1]: [wp_web_page_sk#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] + +(64) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_web_page_sk#62] +Right keys [1]: [wp_web_page_sk#67] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 17] +Output [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] + +(66) HashAggregate [codegen id : 17] +Input [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#63)), partial_sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum#68, sum#69] +Results [3]: [wp_web_page_sk#67, sum#70, sum#71] + +(67) Exchange +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Arguments: hashpartitioning(wp_web_page_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(68) HashAggregate [codegen id : 22] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73] +Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75] + +(69) Scan parquet spark_catalog.default.web_returns +Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#79)] +PushedFilters: [IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(70) ColumnarToRow [codegen id : 20] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] + +(71) Filter [codegen id : 20] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Condition : isnotnull(wr_web_page_sk#76) + +(72) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#80] + +(73) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [wr_returned_date_sk#79] +Right keys [1]: [d_date_sk#80] +Join type: Inner +Join condition: None + +(74) Project [codegen id : 20] +Output [3]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78] +Input [5]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79, d_date_sk#80] + +(75) ReusedExchange [Reuses operator id: 63] +Output [1]: [wp_web_page_sk#81] + +(76) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [wr_web_page_sk#76] +Right keys [1]: [wp_web_page_sk#81] +Join type: Inner +Join condition: None + +(77) Project [codegen id : 20] +Output [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] + +(78) HashAggregate [codegen id : 20] +Input [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#77)), partial_sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum#82, sum#83] +Results [3]: [wp_web_page_sk#81, sum#84, sum#85] + +(79) Exchange +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Arguments: hashpartitioning(wp_web_page_sk#81, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(80) HashAggregate [codegen id : 21] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [sum(UnscaledValue(wr_return_amt#77)), sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#77))#86, sum(UnscaledValue(wr_net_loss#78))#87] +Results [3]: [wp_web_page_sk#81, MakeDecimal(sum(UnscaledValue(wr_return_amt#77))#86,17,2) AS returns#88, MakeDecimal(sum(UnscaledValue(wr_net_loss#78))#87,17,2) AS profit_loss#89] + +(81) BroadcastExchange +Input [3]: [wp_web_page_sk#81, returns#88, profit_loss#89] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] + +(82) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [wp_web_page_sk#67] +Right keys [1]: [wp_web_page_sk#81] +Join type: LeftOuter +Join condition: None + +(83) Project [codegen id : 22] +Output [5]: [web channel AS channel#90, wp_web_page_sk#67 AS id#91, sales#74, coalesce(returns#88, 0.00) AS returns#92, (profit#75 - coalesce(profit_loss#89, 0.00)) AS profit#93] +Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89] + +(84) Union + +(85) HashAggregate [codegen id : 23] +Input [5]: [channel#30, id#31, sales#14, returns#32, profit#33] +Keys [2]: [channel#30, id#31] +Functions [3]: [partial_sum(sales#14), partial_sum(returns#32), partial_sum(profit#33)] +Aggregate Attributes [6]: [sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99] +Results [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(86) Exchange +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Arguments: hashpartitioning(channel#30, id#31, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(87) HashAggregate [codegen id : 24] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [5]: [channel#30, id#31, cast(sum(sales#14)#106 as decimal(37,2)) AS sales#109, cast(sum(returns#32)#107 as decimal(37,2)) AS returns#110, cast(sum(profit#33)#108 as decimal(38,2)) AS profit#111] + +(88) ReusedExchange [Reuses operator id: 86] +Output [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(89) HashAggregate [codegen id : 48] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [4]: [channel#30, sum(sales#14)#106 AS sales#112, sum(returns#32)#107 AS returns#113, sum(profit#33)#108 AS profit#114] + +(90) HashAggregate [codegen id : 48] +Input [4]: [channel#30, sales#112, returns#113, profit#114] +Keys [1]: [channel#30] +Functions [3]: [partial_sum(sales#112), partial_sum(returns#113), partial_sum(profit#114)] +Aggregate Attributes [6]: [sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Results [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] + +(91) Exchange +Input [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Arguments: hashpartitioning(channel#30, 5), ENSURE_REQUIREMENTS, [plan_id=14] + +(92) HashAggregate [codegen id : 49] +Input [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [1]: [channel#30] +Functions [3]: [sum(sales#112), sum(returns#113), sum(profit#114)] +Aggregate Attributes [3]: [sum(sales#112)#127, sum(returns#113)#128, sum(profit#114)#129] +Results [5]: [channel#30, null AS id#130, sum(sales#112)#127 AS sales#131, sum(returns#113)#128 AS returns#132, sum(profit#114)#129 AS profit#133] + +(93) ReusedExchange [Reuses operator id: 86] +Output [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] + +(94) HashAggregate [codegen id : 73] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [3]: [sum(sales#14)#106 AS sales#112, sum(returns#32)#107 AS returns#113, sum(profit#33)#108 AS profit#114] + +(95) HashAggregate [codegen id : 73] +Input [3]: [sales#112, returns#113, profit#114] +Keys: [] +Functions [3]: [partial_sum(sales#112), partial_sum(returns#113), partial_sum(profit#114)] +Aggregate Attributes [6]: [sum#134, isEmpty#135, sum#136, isEmpty#137, sum#138, isEmpty#139] +Results [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] + +(96) Exchange +Input [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] + +(97) HashAggregate [codegen id : 74] +Input [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Keys: [] +Functions [3]: [sum(sales#112), sum(returns#113), sum(profit#114)] +Aggregate Attributes [3]: [sum(sales#112)#146, sum(returns#113)#147, sum(profit#114)#148] +Results [5]: [null AS channel#149, null AS id#150, sum(sales#112)#146 AS sales#151, sum(returns#113)#147 AS returns#152, sum(profit#114)#148 AS profit#153] + +(98) Union + +(99) HashAggregate [codegen id : 75] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111] + +(100) Exchange +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Arguments: hashpartitioning(channel#30, id#31, sales#109, returns#110, profit#111, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(101) HashAggregate [codegen id : 76] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111] + +(102) TakeOrderedAndProject +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Arguments: 100, [channel#30 ASC NULLS FIRST, id#31 ASC NULLS FIRST], [channel#30, id#31, sales#109, returns#110, profit#111] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..565a880398 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.native_iceberg_compat/simplified.txt @@ -0,0 +1,161 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (76) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (75) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (24) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (23) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (8) + Project [s_store_sk,sales,returns,profit,profit_loss] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [s_store_sk] #3 + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,s_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange [s_store_sk] #7 + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum] + Project [sr_return_amt,sr_net_loss,s_store_sk] + BroadcastHashJoin [sr_store_sk,s_store_sk] + Project [sr_store_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + ReusedExchange [s_store_sk] #5 + WholeStageCodegen (14) + Project [cs_call_center_sk,sales,returns,profit,profit_loss] + BroadcastNestedLoopJoin + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [cs_call_center_sk] #9 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange #10 + WholeStageCodegen (13) + HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] + Project [cr_return_amount,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (22) + Project [wp_web_page_sk,sales,returns,profit,profit_loss] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] + InputAdapter + Exchange [wp_web_page_sk] #11 + WholeStageCodegen (17) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (16) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_page [wp_web_page_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum] + InputAdapter + Exchange [wp_web_page_sk] #14 + WholeStageCodegen (20) + HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum] + Project [wr_return_amt,wr_net_loss,wp_web_page_sk] + BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] + Project [wr_web_page_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #4 + InputAdapter + ReusedExchange [wp_web_page_sk] #12 + WholeStageCodegen (49) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #15 + WholeStageCodegen (48) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (74) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #16 + WholeStageCodegen (73) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_datafusion/explain.txt new file mode 100644 index 0000000000..57fc823955 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_datafusion/explain.txt @@ -0,0 +1,417 @@ +== Physical Plan == +TakeOrderedAndProject (73) ++- * Project (72) + +- * SortMergeJoin Inner (71) + :- * Project (48) + : +- * SortMergeJoin Inner (47) + : :- * Sort (24) + : : +- * HashAggregate (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Project (14) + : : : +- * Filter (13) + : : : +- * SortMergeJoin LeftOuter (12) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.store_returns (6) + : : +- BroadcastExchange (18) + : : +- * Filter (17) + : : +- * ColumnarToRow (16) + : : +- Scan parquet spark_catalog.default.date_dim (15) + : +- * Sort (46) + : +- * Filter (45) + : +- * HashAggregate (44) + : +- Exchange (43) + : +- * HashAggregate (42) + : +- * Project (41) + : +- * BroadcastHashJoin Inner BuildRight (40) + : :- * Project (38) + : : +- * Filter (37) + : : +- * SortMergeJoin LeftOuter (36) + : : :- * Sort (29) + : : : +- Exchange (28) + : : : +- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet spark_catalog.default.web_sales (25) + : : +- * Sort (35) + : : +- Exchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet spark_catalog.default.web_returns (30) + : +- ReusedExchange (39) + +- * Sort (70) + +- * Filter (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * Filter (61) + : +- * SortMergeJoin LeftOuter (60) + : :- * Sort (53) + : : +- Exchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet spark_catalog.default.catalog_sales (49) + : +- * Sort (59) + : +- Exchange (58) + : +- * Project (57) + : +- * Filter (56) + : +- * ColumnarToRow (55) + : +- Scan parquet spark_catalog.default.catalog_returns (54) + +- ReusedExchange (63) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) + +(4) Exchange +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(8) Filter [codegen id : 3] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(9) Project [codegen id : 3] +Output [2]: [sr_item_sk#8, sr_ticket_number#9] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(10) Exchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 6] +Left keys [2]: [ss_ticket_number#3, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Join type: LeftOuter +Join condition: None + +(13) Filter [codegen id : 6] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(14) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] + +(15) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#11, d_year#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#11, d_year#12] + +(17) Filter [codegen id : 5] +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(18) BroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#11, d_year#12] + +(21) HashAggregate [codegen id : 6] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [3]: [sum#13, sum#14, sum#15] +Results [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] + +(22) Exchange +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] +Arguments: hashpartitioning(d_year#12, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(23) HashAggregate [codegen id : 7] +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [3]: [sum(ss_quantity#4)#19, sum(UnscaledValue(ss_wholesale_cost#5))#20, sum(UnscaledValue(ss_sales_price#6))#21] +Results [6]: [d_year#12 AS ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#19 AS ss_qty#23, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#20,17,2) AS ss_wc#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#21,17,2) AS ss_sp#25] + +(24) Sort [codegen id : 7] +Input [6]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25] +Arguments: [ss_sold_year#22 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(25) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#32)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 8] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] + +(27) Filter [codegen id : 8] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Condition : (isnotnull(ws_item_sk#26) AND isnotnull(ws_bill_customer_sk#27)) + +(28) Exchange +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Arguments: hashpartitioning(ws_order_number#28, ws_item_sk#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) Sort [codegen id : 9] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Arguments: [ws_order_number#28 ASC NULLS FIRST, ws_item_sk#26 ASC NULLS FIRST], false, 0 + +(30) Scan parquet spark_catalog.default.web_returns +Output [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 10] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] + +(32) Filter [codegen id : 10] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] +Condition : (isnotnull(wr_order_number#34) AND isnotnull(wr_item_sk#33)) + +(33) Project [codegen id : 10] +Output [2]: [wr_item_sk#33, wr_order_number#34] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] + +(34) Exchange +Input [2]: [wr_item_sk#33, wr_order_number#34] +Arguments: hashpartitioning(wr_order_number#34, wr_item_sk#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 11] +Input [2]: [wr_item_sk#33, wr_order_number#34] +Arguments: [wr_order_number#34 ASC NULLS FIRST, wr_item_sk#33 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin [codegen id : 13] +Left keys [2]: [ws_order_number#28, ws_item_sk#26] +Right keys [2]: [wr_order_number#34, wr_item_sk#33] +Join type: LeftOuter +Join condition: None + +(37) Filter [codegen id : 13] +Input [9]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, wr_item_sk#33, wr_order_number#34] +Condition : isnull(wr_order_number#34) + +(38) Project [codegen id : 13] +Output [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Input [9]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, wr_item_sk#33, wr_order_number#34] + +(39) ReusedExchange [Reuses operator id: 18] +Output [2]: [d_date_sk#36, d_year#37] + +(40) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#36] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 13] +Output [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, d_year#37] +Input [8]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, d_date_sk#36, d_year#37] + +(42) HashAggregate [codegen id : 13] +Input [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, d_year#37] +Keys [3]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27] +Functions [3]: [partial_sum(ws_quantity#29), partial_sum(UnscaledValue(ws_wholesale_cost#30)), partial_sum(UnscaledValue(ws_sales_price#31))] +Aggregate Attributes [3]: [sum#38, sum#39, sum#40] +Results [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] + +(43) Exchange +Input [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] +Arguments: hashpartitioning(d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(44) HashAggregate [codegen id : 14] +Input [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] +Keys [3]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27] +Functions [3]: [sum(ws_quantity#29), sum(UnscaledValue(ws_wholesale_cost#30)), sum(UnscaledValue(ws_sales_price#31))] +Aggregate Attributes [3]: [sum(ws_quantity#29)#44, sum(UnscaledValue(ws_wholesale_cost#30))#45, sum(UnscaledValue(ws_sales_price#31))#46] +Results [6]: [d_year#37 AS ws_sold_year#47, ws_item_sk#26, ws_bill_customer_sk#27 AS ws_customer_sk#48, sum(ws_quantity#29)#44 AS ws_qty#49, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#30))#45,17,2) AS ws_wc#50, MakeDecimal(sum(UnscaledValue(ws_sales_price#31))#46,17,2) AS ws_sp#51] + +(45) Filter [codegen id : 14] +Input [6]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] +Condition : (coalesce(ws_qty#49, 0) > 0) + +(46) Sort [codegen id : 14] +Input [6]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] +Arguments: [ws_sold_year#47 ASC NULLS FIRST, ws_item_sk#26 ASC NULLS FIRST, ws_customer_sk#48 ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 15] +Left keys [3]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 15] +Output [9]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, ws_wc#50, ws_sp#51] +Input [12]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] + +(49) Scan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#58)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 16] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] + +(51) Filter [codegen id : 16] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Condition : (isnotnull(cs_item_sk#53) AND isnotnull(cs_bill_customer_sk#52)) + +(52) Exchange +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Arguments: hashpartitioning(cs_order_number#54, cs_item_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(53) Sort [codegen id : 17] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Arguments: [cs_order_number#54 ASC NULLS FIRST, cs_item_sk#53 ASC NULLS FIRST], false, 0 + +(54) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 18] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] + +(56) Filter [codegen id : 18] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] +Condition : (isnotnull(cr_order_number#60) AND isnotnull(cr_item_sk#59)) + +(57) Project [codegen id : 18] +Output [2]: [cr_item_sk#59, cr_order_number#60] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] + +(58) Exchange +Input [2]: [cr_item_sk#59, cr_order_number#60] +Arguments: hashpartitioning(cr_order_number#60, cr_item_sk#59, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(59) Sort [codegen id : 19] +Input [2]: [cr_item_sk#59, cr_order_number#60] +Arguments: [cr_order_number#60 ASC NULLS FIRST, cr_item_sk#59 ASC NULLS FIRST], false, 0 + +(60) SortMergeJoin [codegen id : 21] +Left keys [2]: [cs_order_number#54, cs_item_sk#53] +Right keys [2]: [cr_order_number#60, cr_item_sk#59] +Join type: LeftOuter +Join condition: None + +(61) Filter [codegen id : 21] +Input [9]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, cr_item_sk#59, cr_order_number#60] +Condition : isnull(cr_order_number#60) + +(62) Project [codegen id : 21] +Output [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Input [9]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, cr_item_sk#59, cr_order_number#60] + +(63) ReusedExchange [Reuses operator id: 18] +Output [2]: [d_date_sk#62, d_year#63] + +(64) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [cs_sold_date_sk#58] +Right keys [1]: [d_date_sk#62] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 21] +Output [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, d_year#63] +Input [8]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, d_date_sk#62, d_year#63] + +(66) HashAggregate [codegen id : 21] +Input [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, d_year#63] +Keys [3]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52] +Functions [3]: [partial_sum(cs_quantity#55), partial_sum(UnscaledValue(cs_wholesale_cost#56)), partial_sum(UnscaledValue(cs_sales_price#57))] +Aggregate Attributes [3]: [sum#64, sum#65, sum#66] +Results [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] + +(67) Exchange +Input [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] +Arguments: hashpartitioning(d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(68) HashAggregate [codegen id : 22] +Input [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] +Keys [3]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52] +Functions [3]: [sum(cs_quantity#55), sum(UnscaledValue(cs_wholesale_cost#56)), sum(UnscaledValue(cs_sales_price#57))] +Aggregate Attributes [3]: [sum(cs_quantity#55)#70, sum(UnscaledValue(cs_wholesale_cost#56))#71, sum(UnscaledValue(cs_sales_price#57))#72] +Results [6]: [d_year#63 AS cs_sold_year#73, cs_item_sk#53, cs_bill_customer_sk#52 AS cs_customer_sk#74, sum(cs_quantity#55)#70 AS cs_qty#75, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#56))#71,17,2) AS cs_wc#76, MakeDecimal(sum(UnscaledValue(cs_sales_price#57))#72,17,2) AS cs_sp#77] + +(69) Filter [codegen id : 22] +Input [6]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] +Condition : (coalesce(cs_qty#75, 0) > 0) + +(70) Sort [codegen id : 22] +Input [6]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] +Arguments: [cs_sold_year#73 ASC NULLS FIRST, cs_item_sk#53 ASC NULLS FIRST, cs_customer_sk#74 ASC NULLS FIRST], false, 0 + +(71) SortMergeJoin [codegen id : 23] +Left keys [3]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74] +Join type: Inner +Join condition: None + +(72) Project [codegen id : 23] +Output [13]: [round((cast(ss_qty#23 as double) / cast(coalesce((ws_qty#49 + cs_qty#75), 1) as double)), 2) AS ratio#78, ss_qty#23 AS store_qty#79, ss_wc#24 AS store_wholesale_cost#80, ss_sp#25 AS store_sales_price#81, (coalesce(ws_qty#49, 0) + coalesce(cs_qty#75, 0)) AS other_chan_qty#82, (coalesce(ws_wc#50, 0.00) + coalesce(cs_wc#76, 0.00)) AS other_chan_wholesale_cost#83, (coalesce(ws_sp#51, 0.00) + coalesce(cs_sp#77, 0.00)) AS other_chan_sales_price#84, ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25] +Input [15]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, ws_wc#50, ws_sp#51, cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] + +(73) TakeOrderedAndProject +Input [13]: [ratio#78, store_qty#79, store_wholesale_cost#80, store_sales_price#81, other_chan_qty#82, other_chan_wholesale_cost#83, other_chan_sales_price#84, ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25] +Arguments: 100, [ss_sold_year#22 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST, ss_qty#23 DESC NULLS LAST, ss_wc#24 DESC NULLS LAST, ss_sp#25 DESC NULLS LAST, other_chan_qty#82 ASC NULLS FIRST, other_chan_wholesale_cost#83 ASC NULLS FIRST, other_chan_sales_price#84 ASC NULLS FIRST, ratio#78 ASC NULLS FIRST], [ratio#78, store_qty#79, store_wholesale_cost#80, store_sales_price#81, other_chan_qty#82, other_chan_wholesale_cost#83, other_chan_sales_price#84] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_datafusion/simplified.txt new file mode 100644 index 0000000000..a9b6bf9fd3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_datafusion/simplified.txt @@ -0,0 +1,123 @@ +TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ratio,store_qty,store_wholesale_cost,store_sales_price] + WholeStageCodegen (23) + Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp,ss_sold_year,ss_item_sk,ss_customer_sk] + SortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,cs_sold_year,cs_item_sk,cs_customer_sk] + InputAdapter + WholeStageCodegen (15) + Project [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] + SortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk] + InputAdapter + WholeStageCodegen (7) + Sort [ss_sold_year,ss_item_sk,ss_customer_sk] + HashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum] [sum(ss_quantity),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price)),ss_sold_year,ss_qty,ss_wc,ss_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,ss_item_sk,ss_customer_sk] #1 + WholeStageCodegen (6) + HashAggregate [d_year,ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price] [sum,sum,sum,sum,sum,sum] + Project [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + Filter [sr_ticket_number] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #3 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (14) + Sort [ws_sold_year,ws_item_sk,ws_customer_sk] + Filter [ws_qty] + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum] [sum(ws_quantity),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price)),ws_sold_year,ws_customer_sk,ws_qty,ws_wc,ws_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 + WholeStageCodegen (13) + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price] [sum,sum,sum,sum,sum,sum] + Project [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + Filter [wr_order_number] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (9) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #6 + WholeStageCodegen (8) + Filter [ws_item_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (11) + Sort [wr_order_number,wr_item_sk] + InputAdapter + Exchange [wr_order_number,wr_item_sk] #7 + WholeStageCodegen (10) + Project [wr_item_sk,wr_order_number] + Filter [wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [cs_sold_year,cs_item_sk,cs_customer_sk] + Filter [cs_qty] + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum] [sum(cs_quantity),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price)),cs_sold_year,cs_customer_sk,cs_qty,cs_wc,cs_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 + WholeStageCodegen (21) + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,cs_quantity,cs_wholesale_cost,cs_sales_price] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + Filter [cr_order_number] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (17) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #9 + WholeStageCodegen (16) + Filter [cs_item_sk,cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (19) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #10 + WholeStageCodegen (18) + Project [cr_item_sk,cr_order_number] + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..57fc823955 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_iceberg_compat/explain.txt @@ -0,0 +1,417 @@ +== Physical Plan == +TakeOrderedAndProject (73) ++- * Project (72) + +- * SortMergeJoin Inner (71) + :- * Project (48) + : +- * SortMergeJoin Inner (47) + : :- * Sort (24) + : : +- * HashAggregate (23) + : : +- Exchange (22) + : : +- * HashAggregate (21) + : : +- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Project (14) + : : : +- * Filter (13) + : : : +- * SortMergeJoin LeftOuter (12) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet spark_catalog.default.store_returns (6) + : : +- BroadcastExchange (18) + : : +- * Filter (17) + : : +- * ColumnarToRow (16) + : : +- Scan parquet spark_catalog.default.date_dim (15) + : +- * Sort (46) + : +- * Filter (45) + : +- * HashAggregate (44) + : +- Exchange (43) + : +- * HashAggregate (42) + : +- * Project (41) + : +- * BroadcastHashJoin Inner BuildRight (40) + : :- * Project (38) + : : +- * Filter (37) + : : +- * SortMergeJoin LeftOuter (36) + : : :- * Sort (29) + : : : +- Exchange (28) + : : : +- * Filter (27) + : : : +- * ColumnarToRow (26) + : : : +- Scan parquet spark_catalog.default.web_sales (25) + : : +- * Sort (35) + : : +- Exchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet spark_catalog.default.web_returns (30) + : +- ReusedExchange (39) + +- * Sort (70) + +- * Filter (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * Filter (61) + : +- * SortMergeJoin LeftOuter (60) + : :- * Sort (53) + : : +- Exchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet spark_catalog.default.catalog_sales (49) + : +- * Sort (59) + : +- Exchange (58) + : +- * Project (57) + : +- * Filter (56) + : +- * ColumnarToRow (55) + : +- Scan parquet spark_catalog.default.catalog_returns (54) + +- ReusedExchange (63) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) + +(4) Exchange +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Arguments: [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.store_returns +Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(8) Filter [codegen id : 3] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(9) Project [codegen id : 3] +Output [2]: [sr_item_sk#8, sr_ticket_number#9] +Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] + +(10) Exchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 6] +Left keys [2]: [ss_ticket_number#3, ss_item_sk#1] +Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Join type: LeftOuter +Join condition: None + +(13) Filter [codegen id : 6] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(14) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9] + +(15) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#11, d_year#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#11, d_year#12] + +(17) Filter [codegen id : 5] +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(18) BroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#11] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#11, d_year#12] + +(21) HashAggregate [codegen id : 6] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [3]: [sum#13, sum#14, sum#15] +Results [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] + +(22) Exchange +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] +Arguments: hashpartitioning(d_year#12, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(23) HashAggregate [codegen id : 7] +Input [6]: [d_year#12, ss_item_sk#1, ss_customer_sk#2, sum#16, sum#17, sum#18] +Keys [3]: [d_year#12, ss_item_sk#1, ss_customer_sk#2] +Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [3]: [sum(ss_quantity#4)#19, sum(UnscaledValue(ss_wholesale_cost#5))#20, sum(UnscaledValue(ss_sales_price#6))#21] +Results [6]: [d_year#12 AS ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#19 AS ss_qty#23, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#20,17,2) AS ss_wc#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#21,17,2) AS ss_sp#25] + +(24) Sort [codegen id : 7] +Input [6]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25] +Arguments: [ss_sold_year#22 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0 + +(25) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#32)] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 8] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] + +(27) Filter [codegen id : 8] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Condition : (isnotnull(ws_item_sk#26) AND isnotnull(ws_bill_customer_sk#27)) + +(28) Exchange +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Arguments: hashpartitioning(ws_order_number#28, ws_item_sk#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(29) Sort [codegen id : 9] +Input [7]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Arguments: [ws_order_number#28 ASC NULLS FIRST, ws_item_sk#26 ASC NULLS FIRST], false, 0 + +(30) Scan parquet spark_catalog.default.web_returns +Output [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 10] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] + +(32) Filter [codegen id : 10] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] +Condition : (isnotnull(wr_order_number#34) AND isnotnull(wr_item_sk#33)) + +(33) Project [codegen id : 10] +Output [2]: [wr_item_sk#33, wr_order_number#34] +Input [3]: [wr_item_sk#33, wr_order_number#34, wr_returned_date_sk#35] + +(34) Exchange +Input [2]: [wr_item_sk#33, wr_order_number#34] +Arguments: hashpartitioning(wr_order_number#34, wr_item_sk#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(35) Sort [codegen id : 11] +Input [2]: [wr_item_sk#33, wr_order_number#34] +Arguments: [wr_order_number#34 ASC NULLS FIRST, wr_item_sk#33 ASC NULLS FIRST], false, 0 + +(36) SortMergeJoin [codegen id : 13] +Left keys [2]: [ws_order_number#28, ws_item_sk#26] +Right keys [2]: [wr_order_number#34, wr_item_sk#33] +Join type: LeftOuter +Join condition: None + +(37) Filter [codegen id : 13] +Input [9]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, wr_item_sk#33, wr_order_number#34] +Condition : isnull(wr_order_number#34) + +(38) Project [codegen id : 13] +Output [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32] +Input [9]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_order_number#28, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, wr_item_sk#33, wr_order_number#34] + +(39) ReusedExchange [Reuses operator id: 18] +Output [2]: [d_date_sk#36, d_year#37] + +(40) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#36] +Join type: Inner +Join condition: None + +(41) Project [codegen id : 13] +Output [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, d_year#37] +Input [8]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, ws_sold_date_sk#32, d_date_sk#36, d_year#37] + +(42) HashAggregate [codegen id : 13] +Input [6]: [ws_item_sk#26, ws_bill_customer_sk#27, ws_quantity#29, ws_wholesale_cost#30, ws_sales_price#31, d_year#37] +Keys [3]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27] +Functions [3]: [partial_sum(ws_quantity#29), partial_sum(UnscaledValue(ws_wholesale_cost#30)), partial_sum(UnscaledValue(ws_sales_price#31))] +Aggregate Attributes [3]: [sum#38, sum#39, sum#40] +Results [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] + +(43) Exchange +Input [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] +Arguments: hashpartitioning(d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(44) HashAggregate [codegen id : 14] +Input [6]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27, sum#41, sum#42, sum#43] +Keys [3]: [d_year#37, ws_item_sk#26, ws_bill_customer_sk#27] +Functions [3]: [sum(ws_quantity#29), sum(UnscaledValue(ws_wholesale_cost#30)), sum(UnscaledValue(ws_sales_price#31))] +Aggregate Attributes [3]: [sum(ws_quantity#29)#44, sum(UnscaledValue(ws_wholesale_cost#30))#45, sum(UnscaledValue(ws_sales_price#31))#46] +Results [6]: [d_year#37 AS ws_sold_year#47, ws_item_sk#26, ws_bill_customer_sk#27 AS ws_customer_sk#48, sum(ws_quantity#29)#44 AS ws_qty#49, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#30))#45,17,2) AS ws_wc#50, MakeDecimal(sum(UnscaledValue(ws_sales_price#31))#46,17,2) AS ws_sp#51] + +(45) Filter [codegen id : 14] +Input [6]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] +Condition : (coalesce(ws_qty#49, 0) > 0) + +(46) Sort [codegen id : 14] +Input [6]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] +Arguments: [ws_sold_year#47 ASC NULLS FIRST, ws_item_sk#26 ASC NULLS FIRST, ws_customer_sk#48 ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin [codegen id : 15] +Left keys [3]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48] +Join type: Inner +Join condition: None + +(48) Project [codegen id : 15] +Output [9]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, ws_wc#50, ws_sp#51] +Input [12]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_sold_year#47, ws_item_sk#26, ws_customer_sk#48, ws_qty#49, ws_wc#50, ws_sp#51] + +(49) Scan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#58)] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 16] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] + +(51) Filter [codegen id : 16] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Condition : (isnotnull(cs_item_sk#53) AND isnotnull(cs_bill_customer_sk#52)) + +(52) Exchange +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Arguments: hashpartitioning(cs_order_number#54, cs_item_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(53) Sort [codegen id : 17] +Input [7]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Arguments: [cs_order_number#54 ASC NULLS FIRST, cs_item_sk#53 ASC NULLS FIRST], false, 0 + +(54) Scan parquet spark_catalog.default.catalog_returns +Output [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 18] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] + +(56) Filter [codegen id : 18] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] +Condition : (isnotnull(cr_order_number#60) AND isnotnull(cr_item_sk#59)) + +(57) Project [codegen id : 18] +Output [2]: [cr_item_sk#59, cr_order_number#60] +Input [3]: [cr_item_sk#59, cr_order_number#60, cr_returned_date_sk#61] + +(58) Exchange +Input [2]: [cr_item_sk#59, cr_order_number#60] +Arguments: hashpartitioning(cr_order_number#60, cr_item_sk#59, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(59) Sort [codegen id : 19] +Input [2]: [cr_item_sk#59, cr_order_number#60] +Arguments: [cr_order_number#60 ASC NULLS FIRST, cr_item_sk#59 ASC NULLS FIRST], false, 0 + +(60) SortMergeJoin [codegen id : 21] +Left keys [2]: [cs_order_number#54, cs_item_sk#53] +Right keys [2]: [cr_order_number#60, cr_item_sk#59] +Join type: LeftOuter +Join condition: None + +(61) Filter [codegen id : 21] +Input [9]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, cr_item_sk#59, cr_order_number#60] +Condition : isnull(cr_order_number#60) + +(62) Project [codegen id : 21] +Output [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58] +Input [9]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_order_number#54, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, cr_item_sk#59, cr_order_number#60] + +(63) ReusedExchange [Reuses operator id: 18] +Output [2]: [d_date_sk#62, d_year#63] + +(64) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [cs_sold_date_sk#58] +Right keys [1]: [d_date_sk#62] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 21] +Output [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, d_year#63] +Input [8]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, cs_sold_date_sk#58, d_date_sk#62, d_year#63] + +(66) HashAggregate [codegen id : 21] +Input [6]: [cs_bill_customer_sk#52, cs_item_sk#53, cs_quantity#55, cs_wholesale_cost#56, cs_sales_price#57, d_year#63] +Keys [3]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52] +Functions [3]: [partial_sum(cs_quantity#55), partial_sum(UnscaledValue(cs_wholesale_cost#56)), partial_sum(UnscaledValue(cs_sales_price#57))] +Aggregate Attributes [3]: [sum#64, sum#65, sum#66] +Results [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] + +(67) Exchange +Input [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] +Arguments: hashpartitioning(d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, 5), ENSURE_REQUIREMENTS, [plan_id=10] + +(68) HashAggregate [codegen id : 22] +Input [6]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52, sum#67, sum#68, sum#69] +Keys [3]: [d_year#63, cs_item_sk#53, cs_bill_customer_sk#52] +Functions [3]: [sum(cs_quantity#55), sum(UnscaledValue(cs_wholesale_cost#56)), sum(UnscaledValue(cs_sales_price#57))] +Aggregate Attributes [3]: [sum(cs_quantity#55)#70, sum(UnscaledValue(cs_wholesale_cost#56))#71, sum(UnscaledValue(cs_sales_price#57))#72] +Results [6]: [d_year#63 AS cs_sold_year#73, cs_item_sk#53, cs_bill_customer_sk#52 AS cs_customer_sk#74, sum(cs_quantity#55)#70 AS cs_qty#75, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#56))#71,17,2) AS cs_wc#76, MakeDecimal(sum(UnscaledValue(cs_sales_price#57))#72,17,2) AS cs_sp#77] + +(69) Filter [codegen id : 22] +Input [6]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] +Condition : (coalesce(cs_qty#75, 0) > 0) + +(70) Sort [codegen id : 22] +Input [6]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] +Arguments: [cs_sold_year#73 ASC NULLS FIRST, cs_item_sk#53 ASC NULLS FIRST, cs_customer_sk#74 ASC NULLS FIRST], false, 0 + +(71) SortMergeJoin [codegen id : 23] +Left keys [3]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74] +Join type: Inner +Join condition: None + +(72) Project [codegen id : 23] +Output [13]: [round((cast(ss_qty#23 as double) / cast(coalesce((ws_qty#49 + cs_qty#75), 1) as double)), 2) AS ratio#78, ss_qty#23 AS store_qty#79, ss_wc#24 AS store_wholesale_cost#80, ss_sp#25 AS store_sales_price#81, (coalesce(ws_qty#49, 0) + coalesce(cs_qty#75, 0)) AS other_chan_qty#82, (coalesce(ws_wc#50, 0.00) + coalesce(cs_wc#76, 0.00)) AS other_chan_wholesale_cost#83, (coalesce(ws_sp#51, 0.00) + coalesce(cs_sp#77, 0.00)) AS other_chan_sales_price#84, ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25] +Input [15]: [ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25, ws_qty#49, ws_wc#50, ws_sp#51, cs_sold_year#73, cs_item_sk#53, cs_customer_sk#74, cs_qty#75, cs_wc#76, cs_sp#77] + +(73) TakeOrderedAndProject +Input [13]: [ratio#78, store_qty#79, store_wholesale_cost#80, store_sales_price#81, other_chan_qty#82, other_chan_wholesale_cost#83, other_chan_sales_price#84, ss_sold_year#22, ss_item_sk#1, ss_customer_sk#2, ss_qty#23, ss_wc#24, ss_sp#25] +Arguments: 100, [ss_sold_year#22 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST, ss_qty#23 DESC NULLS LAST, ss_wc#24 DESC NULLS LAST, ss_sp#25 DESC NULLS LAST, other_chan_qty#82 ASC NULLS FIRST, other_chan_wholesale_cost#83 ASC NULLS FIRST, other_chan_sales_price#84 ASC NULLS FIRST, ratio#78 ASC NULLS FIRST], [ratio#78, store_qty#79, store_wholesale_cost#80, store_sales_price#81, other_chan_qty#82, other_chan_wholesale_cost#83, other_chan_sales_price#84] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..a9b6bf9fd3 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.native_iceberg_compat/simplified.txt @@ -0,0 +1,123 @@ +TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ratio,store_qty,store_wholesale_cost,store_sales_price] + WholeStageCodegen (23) + Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp,ss_sold_year,ss_item_sk,ss_customer_sk] + SortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,cs_sold_year,cs_item_sk,cs_customer_sk] + InputAdapter + WholeStageCodegen (15) + Project [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] + SortMergeJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk] + InputAdapter + WholeStageCodegen (7) + Sort [ss_sold_year,ss_item_sk,ss_customer_sk] + HashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum] [sum(ss_quantity),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price)),ss_sold_year,ss_qty,ss_wc,ss_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,ss_item_sk,ss_customer_sk] #1 + WholeStageCodegen (6) + HashAggregate [d_year,ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price] [sum,sum,sum,sum,sum,sum] + Project [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + Filter [sr_ticket_number] + SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + InputAdapter + WholeStageCodegen (2) + Sort [ss_ticket_number,ss_item_sk] + InputAdapter + Exchange [ss_ticket_number,ss_item_sk] #2 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_ticket_number,sr_item_sk] + InputAdapter + Exchange [sr_ticket_number,sr_item_sk] #3 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (14) + Sort [ws_sold_year,ws_item_sk,ws_customer_sk] + Filter [ws_qty] + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum] [sum(ws_quantity),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price)),ws_sold_year,ws_customer_sk,ws_qty,ws_wc,ws_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 + WholeStageCodegen (13) + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price] [sum,sum,sum,sum,sum,sum] + Project [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + Filter [wr_order_number] + SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + InputAdapter + WholeStageCodegen (9) + Sort [ws_order_number,ws_item_sk] + InputAdapter + Exchange [ws_order_number,ws_item_sk] #6 + WholeStageCodegen (8) + Filter [ws_item_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (11) + Sort [wr_order_number,wr_item_sk] + InputAdapter + Exchange [wr_order_number,wr_item_sk] #7 + WholeStageCodegen (10) + Project [wr_item_sk,wr_order_number] + Filter [wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 + InputAdapter + WholeStageCodegen (22) + Sort [cs_sold_year,cs_item_sk,cs_customer_sk] + Filter [cs_qty] + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum] [sum(cs_quantity),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price)),cs_sold_year,cs_customer_sk,cs_qty,cs_wc,cs_sp,sum,sum,sum] + InputAdapter + Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 + WholeStageCodegen (21) + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,cs_quantity,cs_wholesale_cost,cs_sales_price] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + Filter [cr_order_number] + SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + InputAdapter + WholeStageCodegen (17) + Sort [cs_order_number,cs_item_sk] + InputAdapter + Exchange [cs_order_number,cs_item_sk] #9 + WholeStageCodegen (16) + Filter [cs_item_sk,cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (19) + Sort [cr_order_number,cr_item_sk] + InputAdapter + Exchange [cr_order_number,cr_item_sk] #10 + WholeStageCodegen (18) + Project [cr_item_sk,cr_order_number] + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk,d_year] #4 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_datafusion/explain.txt new file mode 100644 index 0000000000..866e41848c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_datafusion/explain.txt @@ -0,0 +1,716 @@ +== Physical Plan == +TakeOrderedAndProject (124) ++- * HashAggregate (123) + +- Exchange (122) + +- * HashAggregate (121) + +- Union (120) + :- * HashAggregate (109) + : +- Exchange (108) + : +- * HashAggregate (107) + : +- Union (106) + : :- * HashAggregate (43) + : : +- Exchange (42) + : : +- * HashAggregate (41) + : : +- * Project (40) + : : +- * BroadcastHashJoin Inner BuildRight (39) + : : :- * Project (33) + : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : :- * Project (26) + : : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : : :- * Project (20) + : : : : : +- * BroadcastHashJoin Inner BuildRight (19) + : : : : : :- * Project (13) + : : : : : : +- * SortMergeJoin LeftOuter (12) + : : : : : : :- * Sort (5) + : : : : : : : +- Exchange (4) + : : : : : : : +- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- * Sort (11) + : : : : : : +- Exchange (10) + : : : : : : +- * Project (9) + : : : : : : +- * Filter (8) + : : : : : : +- * ColumnarToRow (7) + : : : : : : +- Scan parquet spark_catalog.default.store_returns (6) + : : : : : +- BroadcastExchange (18) + : : : : : +- * Project (17) + : : : : : +- * Filter (16) + : : : : : +- * ColumnarToRow (15) + : : : : : +- Scan parquet spark_catalog.default.date_dim (14) + : : : : +- BroadcastExchange (24) + : : : : +- * Filter (23) + : : : : +- * ColumnarToRow (22) + : : : : +- Scan parquet spark_catalog.default.store (21) + : : : +- BroadcastExchange (31) + : : : +- * Project (30) + : : : +- * Filter (29) + : : : +- * ColumnarToRow (28) + : : : +- Scan parquet spark_catalog.default.item (27) + : : +- BroadcastExchange (38) + : : +- * Project (37) + : : +- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet spark_catalog.default.promotion (34) + : :- * HashAggregate (74) + : : +- Exchange (73) + : : +- * HashAggregate (72) + : : +- * Project (71) + : : +- * BroadcastHashJoin Inner BuildRight (70) + : : :- * Project (68) + : : : +- * BroadcastHashJoin Inner BuildRight (67) + : : : :- * Project (65) + : : : : +- * BroadcastHashJoin Inner BuildRight (64) + : : : : :- * Project (59) + : : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : : :- * Project (56) + : : : : : : +- * SortMergeJoin LeftOuter (55) + : : : : : : :- * Sort (48) + : : : : : : : +- Exchange (47) + : : : : : : : +- * Filter (46) + : : : : : : : +- * ColumnarToRow (45) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (44) + : : : : : : +- * Sort (54) + : : : : : : +- Exchange (53) + : : : : : : +- * Project (52) + : : : : : : +- * Filter (51) + : : : : : : +- * ColumnarToRow (50) + : : : : : : +- Scan parquet spark_catalog.default.catalog_returns (49) + : : : : : +- ReusedExchange (57) + : : : : +- BroadcastExchange (63) + : : : : +- * Filter (62) + : : : : +- * ColumnarToRow (61) + : : : : +- Scan parquet spark_catalog.default.catalog_page (60) + : : : +- ReusedExchange (66) + : : +- ReusedExchange (69) + : +- * HashAggregate (105) + : +- Exchange (104) + : +- * HashAggregate (103) + : +- * Project (102) + : +- * BroadcastHashJoin Inner BuildRight (101) + : :- * Project (99) + : : +- * BroadcastHashJoin Inner BuildRight (98) + : : :- * Project (96) + : : : +- * BroadcastHashJoin Inner BuildRight (95) + : : : :- * Project (90) + : : : : +- * BroadcastHashJoin Inner BuildRight (89) + : : : : :- * Project (87) + : : : : : +- * SortMergeJoin LeftOuter (86) + : : : : : :- * Sort (79) + : : : : : : +- Exchange (78) + : : : : : : +- * Filter (77) + : : : : : : +- * ColumnarToRow (76) + : : : : : : +- Scan parquet spark_catalog.default.web_sales (75) + : : : : : +- * Sort (85) + : : : : : +- Exchange (84) + : : : : : +- * Project (83) + : : : : : +- * Filter (82) + : : : : : +- * ColumnarToRow (81) + : : : : : +- Scan parquet spark_catalog.default.web_returns (80) + : : : : +- ReusedExchange (88) + : : : +- BroadcastExchange (94) + : : : +- * Filter (93) + : : : +- * ColumnarToRow (92) + : : : +- Scan parquet spark_catalog.default.web_site (91) + : : +- ReusedExchange (97) + : +- ReusedExchange (100) + :- * HashAggregate (114) + : +- Exchange (113) + : +- * HashAggregate (112) + : +- * HashAggregate (111) + : +- ReusedExchange (110) + +- * HashAggregate (119) + +- Exchange (118) + +- * HashAggregate (117) + +- * HashAggregate (116) + +- ReusedExchange (115) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(4) Exchange +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(8) Filter [codegen id : 3] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(9) Project [codegen id : 3] +Output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(10) Exchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: hashpartitioning(sr_item_sk#8, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [sr_item_sk#8 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_item_sk#1, ss_ticket_number#4] +Right keys [2]: [sr_item_sk#8, sr_ticket_number#9] +Join type: LeftOuter +Join condition: None + +(13) Project [codegen id : 9] +Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(14) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#13, d_date#14] + +(16) Filter [codegen id : 5] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 1998-08-04)) AND (d_date#14 <= 1998-09-03)) AND isnotnull(d_date_sk#13)) + +(17) Project [codegen id : 5] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(18) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 9] +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] + +(21) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_store_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [2]: [s_store_sk#15, s_store_id#16] + +(23) Filter [codegen id : 6] +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) + +(24) BroadcastExchange +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 9] +Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_sk#15, s_store_id#16] + +(27) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_current_price#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 7] +Input [2]: [i_item_sk#17, i_current_price#18] + +(29) Filter [codegen id : 7] +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) + +(30) Project [codegen id : 7] +Output [1]: [i_item_sk#17] +Input [2]: [i_item_sk#17, i_current_price#18] + +(31) BroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, i_item_sk#17] + +(34) Scan parquet spark_catalog.default.promotion +Output [2]: [p_promo_sk#19, p_channel_tv#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [p_promo_sk#19, p_channel_tv#20] + +(36) Filter [codegen id : 8] +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) + +(37) Project [codegen id : 8] +Output [1]: [p_promo_sk#19] +Input [2]: [p_promo_sk#19, p_channel_tv#20] + +(38) BroadcastExchange +Input [1]: [p_promo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_promo_sk#3] +Right keys [1]: [p_promo_sk#19] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 9] +Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, p_promo_sk#19] + +(41) HashAggregate [codegen id : 9] +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Results [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] + +(42) Exchange +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(43) HashAggregate [codegen id : 10] +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#32, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#33] +Results [5]: [store channel AS channel#34, concat(store, s_store_id#16) AS id#35, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#36, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#32 AS returns#37, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#33 AS profit#38] + +(44) Scan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#45)] +PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 11] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(46) Filter [codegen id : 11] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41)) + +(47) Exchange +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(48) Sort [codegen id : 12] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0 + +(49) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 13] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] + +(51) Filter [codegen id : 13] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Condition : (isnotnull(cr_item_sk#46) AND isnotnull(cr_order_number#47)) + +(52) Project [codegen id : 13] +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] + +(53) Exchange +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: hashpartitioning(cr_item_sk#46, cr_order_number#47, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(54) Sort [codegen id : 14] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST], false, 0 + +(55) SortMergeJoin [codegen id : 19] +Left keys [2]: [cs_item_sk#40, cs_order_number#42] +Right keys [2]: [cr_item_sk#46, cr_order_number#47] +Join type: LeftOuter +Join condition: None + +(56) Project [codegen id : 19] +Output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] + +(57) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#51] + +(58) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#51] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 19] +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#51] + +(60) Scan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 16] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] + +(62) Filter [codegen id : 16] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Condition : isnotnull(cp_catalog_page_sk#52) + +(63) BroadcastExchange +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(64) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_catalog_page_sk#39] +Right keys [1]: [cp_catalog_page_sk#52] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 19] +Output [7]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#52, cp_catalog_page_id#53] + +(66) ReusedExchange [Reuses operator id: 31] +Output [1]: [i_item_sk#54] + +(67) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_item_sk#40] +Right keys [1]: [i_item_sk#54] +Join type: Inner +Join condition: None + +(68) Project [codegen id : 19] +Output [6]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [8]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, i_item_sk#54] + +(69) ReusedExchange [Reuses operator id: 38] +Output [1]: [p_promo_sk#55] + +(70) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_promo_sk#41] +Right keys [1]: [p_promo_sk#55] +Join type: Inner +Join condition: None + +(71) Project [codegen id : 19] +Output [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [7]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, p_promo_sk#55] + +(72) HashAggregate [codegen id : 19] +Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Results [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] + +(73) Exchange +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Arguments: hashpartitioning(cp_catalog_page_id#53, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(74) HashAggregate [codegen id : 20] +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68] +Results [5]: [catalog channel AS channel#69, concat(catalog_page, cp_catalog_page_id#53) AS id#70, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#71, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#72, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68 AS profit#73] + +(75) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#80)] +PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 21] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] + +(77) Filter [codegen id : 21] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Condition : ((isnotnull(ws_web_site_sk#75) AND isnotnull(ws_item_sk#74)) AND isnotnull(ws_promo_sk#76)) + +(78) Exchange +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(79) Sort [codegen id : 22] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0 + +(80) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(81) ColumnarToRow [codegen id : 23] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] + +(82) Filter [codegen id : 23] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Condition : (isnotnull(wr_item_sk#81) AND isnotnull(wr_order_number#82)) + +(83) Project [codegen id : 23] +Output [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] + +(84) Exchange +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: hashpartitioning(wr_item_sk#81, wr_order_number#82, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(85) Sort [codegen id : 24] +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin [codegen id : 29] +Left keys [2]: [ws_item_sk#74, ws_order_number#77] +Right keys [2]: [wr_item_sk#81, wr_order_number#82] +Join type: LeftOuter +Join condition: None + +(87) Project [codegen id : 29] +Output [8]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [11]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] + +(88) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#86] + +(89) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#86] +Join type: Inner +Join condition: None + +(90) Project [codegen id : 29] +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#86] + +(91) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#87, web_site_id#88] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(92) ColumnarToRow [codegen id : 26] +Input [2]: [web_site_sk#87, web_site_id#88] + +(93) Filter [codegen id : 26] +Input [2]: [web_site_sk#87, web_site_id#88] +Condition : isnotnull(web_site_sk#87) + +(94) BroadcastExchange +Input [2]: [web_site_sk#87, web_site_id#88] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] + +(95) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_web_site_sk#75] +Right keys [1]: [web_site_sk#87] +Join type: Inner +Join condition: None + +(96) Project [codegen id : 29] +Output [7]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_sk#87, web_site_id#88] + +(97) ReusedExchange [Reuses operator id: 31] +Output [1]: [i_item_sk#89] + +(98) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#89] +Join type: Inner +Join condition: None + +(99) Project [codegen id : 29] +Output [6]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [8]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, i_item_sk#89] + +(100) ReusedExchange [Reuses operator id: 38] +Output [1]: [p_promo_sk#90] + +(101) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_promo_sk#76] +Right keys [1]: [p_promo_sk#90] +Join type: Inner +Join condition: None + +(102) Project [codegen id : 29] +Output [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [7]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, p_promo_sk#90] + +(103) HashAggregate [codegen id : 29] +Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Keys [1]: [web_site_id#88] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95] +Results [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] + +(104) Exchange +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Arguments: hashpartitioning(web_site_id#88, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(105) HashAggregate [codegen id : 30] +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Keys [1]: [web_site_id#88] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103] +Results [5]: [web channel AS channel#104, concat(web_site, web_site_id#88) AS id#105, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#106, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#107, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103 AS profit#108] + +(106) Union + +(107) HashAggregate [codegen id : 31] +Input [5]: [channel#34, id#35, sales#36, returns#37, profit#38] +Keys [2]: [channel#34, id#35] +Functions [3]: [partial_sum(sales#36), partial_sum(returns#37), partial_sum(profit#38)] +Aggregate Attributes [6]: [sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114] +Results [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] + +(108) Exchange +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(109) HashAggregate [codegen id : 32] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [5]: [channel#34, id#35, cast(sum(sales#36)#121 as decimal(37,2)) AS sales#124, cast(sum(returns#37)#122 as decimal(38,2)) AS returns#125, cast(sum(profit#38)#123 as decimal(38,2)) AS profit#126] + +(110) ReusedExchange [Reuses operator id: 108] +Output [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] + +(111) HashAggregate [codegen id : 64] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [4]: [channel#34, sum(sales#36)#121 AS sales#127, sum(returns#37)#122 AS returns#128, sum(profit#38)#123 AS profit#129] + +(112) HashAggregate [codegen id : 64] +Input [4]: [channel#34, sales#127, returns#128, profit#129] +Keys [1]: [channel#34] +Functions [3]: [partial_sum(sales#127), partial_sum(returns#128), partial_sum(profit#129)] +Aggregate Attributes [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] +Results [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] + +(113) Exchange +Input [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [plan_id=17] + +(114) HashAggregate [codegen id : 65] +Input [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Keys [1]: [channel#34] +Functions [3]: [sum(sales#127), sum(returns#128), sum(profit#129)] +Aggregate Attributes [3]: [sum(sales#127)#142, sum(returns#128)#143, sum(profit#129)#144] +Results [5]: [channel#34, null AS id#145, sum(sales#127)#142 AS sales#146, sum(returns#128)#143 AS returns#147, sum(profit#129)#144 AS profit#148] + +(115) ReusedExchange [Reuses operator id: 108] +Output [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] + +(116) HashAggregate [codegen id : 97] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [3]: [sum(sales#36)#121 AS sales#127, sum(returns#37)#122 AS returns#128, sum(profit#38)#123 AS profit#129] + +(117) HashAggregate [codegen id : 97] +Input [3]: [sales#127, returns#128, profit#129] +Keys: [] +Functions [3]: [partial_sum(sales#127), partial_sum(returns#128), partial_sum(profit#129)] +Aggregate Attributes [6]: [sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154] +Results [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] + +(118) Exchange +Input [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] + +(119) HashAggregate [codegen id : 98] +Input [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] +Keys: [] +Functions [3]: [sum(sales#127), sum(returns#128), sum(profit#129)] +Aggregate Attributes [3]: [sum(sales#127)#161, sum(returns#128)#162, sum(profit#129)#163] +Results [5]: [null AS channel#164, null AS id#165, sum(sales#127)#161 AS sales#166, sum(returns#128)#162 AS returns#167, sum(profit#129)#163 AS profit#168] + +(120) Union + +(121) HashAggregate [codegen id : 99] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Keys [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#124, returns#125, profit#126] + +(122) Exchange +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Arguments: hashpartitioning(channel#34, id#35, sales#124, returns#125, profit#126, 5), ENSURE_REQUIREMENTS, [plan_id=19] + +(123) HashAggregate [codegen id : 100] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Keys [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#124, returns#125, profit#126] + +(124) TakeOrderedAndProject +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#124, returns#125, profit#126] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..011fa1eb28 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_datafusion/simplified.txt @@ -0,0 +1,203 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (100) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (99) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (32) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (31) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (10) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [s_store_id] #3 + WholeStageCodegen (9) + HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss] + SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + InputAdapter + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #4 + WholeStageCodegen (1) + Filter [ss_store_sk,ss_item_sk,ss_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #5 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv] + WholeStageCodegen (20) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cp_catalog_page_id] #10 + WholeStageCodegen (19) + HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] + Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (12) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #11 + WholeStageCodegen (11) + Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (14) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #12 + WholeStageCodegen (13) + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (16) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + InputAdapter + ReusedExchange [i_item_sk] #8 + InputAdapter + ReusedExchange [p_promo_sk] #9 + WholeStageCodegen (30) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [web_site_id] #14 + WholeStageCodegen (29) + HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] + Project [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss] + SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + InputAdapter + WholeStageCodegen (22) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #15 + WholeStageCodegen (21) + Filter [ws_web_site_sk,ws_item_sk,ws_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (24) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #16 + WholeStageCodegen (23) + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (26) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] + InputAdapter + ReusedExchange [i_item_sk] #8 + InputAdapter + ReusedExchange [p_promo_sk] #9 + WholeStageCodegen (65) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #18 + WholeStageCodegen (64) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (98) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #19 + WholeStageCodegen (97) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..866e41848c --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_iceberg_compat/explain.txt @@ -0,0 +1,716 @@ +== Physical Plan == +TakeOrderedAndProject (124) ++- * HashAggregate (123) + +- Exchange (122) + +- * HashAggregate (121) + +- Union (120) + :- * HashAggregate (109) + : +- Exchange (108) + : +- * HashAggregate (107) + : +- Union (106) + : :- * HashAggregate (43) + : : +- Exchange (42) + : : +- * HashAggregate (41) + : : +- * Project (40) + : : +- * BroadcastHashJoin Inner BuildRight (39) + : : :- * Project (33) + : : : +- * BroadcastHashJoin Inner BuildRight (32) + : : : :- * Project (26) + : : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : : :- * Project (20) + : : : : : +- * BroadcastHashJoin Inner BuildRight (19) + : : : : : :- * Project (13) + : : : : : : +- * SortMergeJoin LeftOuter (12) + : : : : : : :- * Sort (5) + : : : : : : : +- Exchange (4) + : : : : : : : +- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : : : : +- * Sort (11) + : : : : : : +- Exchange (10) + : : : : : : +- * Project (9) + : : : : : : +- * Filter (8) + : : : : : : +- * ColumnarToRow (7) + : : : : : : +- Scan parquet spark_catalog.default.store_returns (6) + : : : : : +- BroadcastExchange (18) + : : : : : +- * Project (17) + : : : : : +- * Filter (16) + : : : : : +- * ColumnarToRow (15) + : : : : : +- Scan parquet spark_catalog.default.date_dim (14) + : : : : +- BroadcastExchange (24) + : : : : +- * Filter (23) + : : : : +- * ColumnarToRow (22) + : : : : +- Scan parquet spark_catalog.default.store (21) + : : : +- BroadcastExchange (31) + : : : +- * Project (30) + : : : +- * Filter (29) + : : : +- * ColumnarToRow (28) + : : : +- Scan parquet spark_catalog.default.item (27) + : : +- BroadcastExchange (38) + : : +- * Project (37) + : : +- * Filter (36) + : : +- * ColumnarToRow (35) + : : +- Scan parquet spark_catalog.default.promotion (34) + : :- * HashAggregate (74) + : : +- Exchange (73) + : : +- * HashAggregate (72) + : : +- * Project (71) + : : +- * BroadcastHashJoin Inner BuildRight (70) + : : :- * Project (68) + : : : +- * BroadcastHashJoin Inner BuildRight (67) + : : : :- * Project (65) + : : : : +- * BroadcastHashJoin Inner BuildRight (64) + : : : : :- * Project (59) + : : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : : :- * Project (56) + : : : : : : +- * SortMergeJoin LeftOuter (55) + : : : : : : :- * Sort (48) + : : : : : : : +- Exchange (47) + : : : : : : : +- * Filter (46) + : : : : : : : +- * ColumnarToRow (45) + : : : : : : : +- Scan parquet spark_catalog.default.catalog_sales (44) + : : : : : : +- * Sort (54) + : : : : : : +- Exchange (53) + : : : : : : +- * Project (52) + : : : : : : +- * Filter (51) + : : : : : : +- * ColumnarToRow (50) + : : : : : : +- Scan parquet spark_catalog.default.catalog_returns (49) + : : : : : +- ReusedExchange (57) + : : : : +- BroadcastExchange (63) + : : : : +- * Filter (62) + : : : : +- * ColumnarToRow (61) + : : : : +- Scan parquet spark_catalog.default.catalog_page (60) + : : : +- ReusedExchange (66) + : : +- ReusedExchange (69) + : +- * HashAggregate (105) + : +- Exchange (104) + : +- * HashAggregate (103) + : +- * Project (102) + : +- * BroadcastHashJoin Inner BuildRight (101) + : :- * Project (99) + : : +- * BroadcastHashJoin Inner BuildRight (98) + : : :- * Project (96) + : : : +- * BroadcastHashJoin Inner BuildRight (95) + : : : :- * Project (90) + : : : : +- * BroadcastHashJoin Inner BuildRight (89) + : : : : :- * Project (87) + : : : : : +- * SortMergeJoin LeftOuter (86) + : : : : : :- * Sort (79) + : : : : : : +- Exchange (78) + : : : : : : +- * Filter (77) + : : : : : : +- * ColumnarToRow (76) + : : : : : : +- Scan parquet spark_catalog.default.web_sales (75) + : : : : : +- * Sort (85) + : : : : : +- Exchange (84) + : : : : : +- * Project (83) + : : : : : +- * Filter (82) + : : : : : +- * ColumnarToRow (81) + : : : : : +- Scan parquet spark_catalog.default.web_returns (80) + : : : : +- ReusedExchange (88) + : : : +- BroadcastExchange (94) + : : : +- * Filter (93) + : : : +- * ColumnarToRow (92) + : : : +- Scan parquet spark_catalog.default.web_site (91) + : : +- ReusedExchange (97) + : +- ReusedExchange (100) + :- * HashAggregate (114) + : +- Exchange (113) + : +- * HashAggregate (112) + : +- * HashAggregate (111) + : +- ReusedExchange (110) + +- * HashAggregate (119) + +- Exchange (118) + +- * HashAggregate (117) + +- * HashAggregate (116) + +- ReusedExchange (115) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#7)] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] + +(3) Filter [codegen id : 1] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) + +(4) Exchange +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] + +(5) Sort [codegen id : 2] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] +Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 + +(6) Scan parquet spark_catalog.default.store_returns +Output [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(8) Filter [codegen id : 3] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(9) Project [codegen id : 3] +Output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Input [5]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11, sr_returned_date_sk#12] + +(10) Exchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: hashpartitioning(sr_item_sk#8, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] + +(11) Sort [codegen id : 4] +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: [sr_item_sk#8 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST], false, 0 + +(12) SortMergeJoin [codegen id : 9] +Left keys [2]: [ss_item_sk#1, ss_ticket_number#4] +Right keys [2]: [sr_item_sk#8, sr_ticket_number#9] +Join type: LeftOuter +Join condition: None + +(13) Project [codegen id : 9] +Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11] +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(14) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#13, d_date#14] + +(16) Filter [codegen id : 5] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 1998-08-04)) AND (d_date#14 <= 1998-09-03)) AND isnotnull(d_date_sk#13)) + +(17) Project [codegen id : 5] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(18) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] + +(19) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#13] +Join type: Inner +Join condition: None + +(20) Project [codegen id : 9] +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] + +(21) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#15, s_store_id#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [2]: [s_store_sk#15, s_store_id#16] + +(23) Filter [codegen id : 6] +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) + +(24) BroadcastExchange +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#15] +Join type: Inner +Join condition: None + +(26) Project [codegen id : 9] +Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_sk#15, s_store_id#16] + +(27) Scan parquet spark_catalog.default.item +Output [2]: [i_item_sk#17, i_current_price#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 7] +Input [2]: [i_item_sk#17, i_current_price#18] + +(29) Filter [codegen id : 7] +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) + +(30) Project [codegen id : 7] +Output [1]: [i_item_sk#17] +Input [2]: [i_item_sk#17, i_current_price#18] + +(31) BroadcastExchange +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#17] +Join type: Inner +Join condition: None + +(33) Project [codegen id : 9] +Output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, i_item_sk#17] + +(34) Scan parquet spark_catalog.default.promotion +Output [2]: [p_promo_sk#19, p_channel_tv#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [p_promo_sk#19, p_channel_tv#20] + +(36) Filter [codegen id : 8] +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) + +(37) Project [codegen id : 8] +Output [1]: [p_promo_sk#19] +Input [2]: [p_promo_sk#19, p_channel_tv#20] + +(38) BroadcastExchange +Input [1]: [p_promo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_promo_sk#3] +Right keys [1]: [p_promo_sk#19] +Join type: Inner +Join condition: None + +(40) Project [codegen id : 9] +Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16, p_promo_sk#19] + +(41) HashAggregate [codegen id : 9] +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#10, sr_net_loss#11, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Results [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] + +(42) Exchange +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(43) HashAggregate [codegen id : 10] +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#32, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#33] +Results [5]: [store channel AS channel#34, concat(store, s_store_id#16) AS id#35, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#36, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#32 AS returns#37, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00)))#33 AS profit#38] + +(44) Scan parquet spark_catalog.default.catalog_sales +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#45)] +PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 11] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] + +(46) Filter [codegen id : 11] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41)) + +(47) Exchange +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, [plan_id=8] + +(48) Sort [codegen id : 12] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0 + +(49) Scan parquet spark_catalog.default.catalog_returns +Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 13] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] + +(51) Filter [codegen id : 13] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Condition : (isnotnull(cr_item_sk#46) AND isnotnull(cr_order_number#47)) + +(52) Project [codegen id : 13] +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] + +(53) Exchange +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: hashpartitioning(cr_item_sk#46, cr_order_number#47, 5), ENSURE_REQUIREMENTS, [plan_id=9] + +(54) Sort [codegen id : 14] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST], false, 0 + +(55) SortMergeJoin [codegen id : 19] +Left keys [2]: [cs_item_sk#40, cs_order_number#42] +Right keys [2]: [cr_item_sk#46, cr_order_number#47] +Join type: LeftOuter +Join condition: None + +(56) Project [codegen id : 19] +Output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] + +(57) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#51] + +(58) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#51] +Join type: Inner +Join condition: None + +(59) Project [codegen id : 19] +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#51] + +(60) Scan parquet spark_catalog.default.catalog_page +Output [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(61) ColumnarToRow [codegen id : 16] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] + +(62) Filter [codegen id : 16] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Condition : isnotnull(cp_catalog_page_sk#52) + +(63) BroadcastExchange +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] + +(64) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_catalog_page_sk#39] +Right keys [1]: [cp_catalog_page_sk#52] +Join type: Inner +Join condition: None + +(65) Project [codegen id : 19] +Output [7]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#52, cp_catalog_page_id#53] + +(66) ReusedExchange [Reuses operator id: 31] +Output [1]: [i_item_sk#54] + +(67) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_item_sk#40] +Right keys [1]: [i_item_sk#54] +Join type: Inner +Join condition: None + +(68) Project [codegen id : 19] +Output [6]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [8]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, i_item_sk#54] + +(69) ReusedExchange [Reuses operator id: 38] +Output [1]: [p_promo_sk#55] + +(70) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [cs_promo_sk#41] +Right keys [1]: [p_promo_sk#55] +Join type: Inner +Join condition: None + +(71) Project [codegen id : 19] +Output [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [7]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, p_promo_sk#55] + +(72) HashAggregate [codegen id : 19] +Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Results [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] + +(73) Exchange +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Arguments: hashpartitioning(cp_catalog_page_id#53, 5), ENSURE_REQUIREMENTS, [plan_id=11] + +(74) HashAggregate [codegen id : 20] +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68] +Results [5]: [catalog channel AS channel#69, concat(catalog_page, cp_catalog_page_id#53) AS id#70, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#71, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#72, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68 AS profit#73] + +(75) Scan parquet spark_catalog.default.web_sales +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#80)] +PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 21] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] + +(77) Filter [codegen id : 21] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Condition : ((isnotnull(ws_web_site_sk#75) AND isnotnull(ws_item_sk#74)) AND isnotnull(ws_promo_sk#76)) + +(78) Exchange +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIREMENTS, [plan_id=12] + +(79) Sort [codegen id : 22] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0 + +(80) Scan parquet spark_catalog.default.web_returns +Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(81) ColumnarToRow [codegen id : 23] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] + +(82) Filter [codegen id : 23] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Condition : (isnotnull(wr_item_sk#81) AND isnotnull(wr_order_number#82)) + +(83) Project [codegen id : 23] +Output [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] + +(84) Exchange +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: hashpartitioning(wr_item_sk#81, wr_order_number#82, 5), ENSURE_REQUIREMENTS, [plan_id=13] + +(85) Sort [codegen id : 24] +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin [codegen id : 29] +Left keys [2]: [ws_item_sk#74, ws_order_number#77] +Right keys [2]: [wr_item_sk#81, wr_order_number#82] +Join type: LeftOuter +Join condition: None + +(87) Project [codegen id : 29] +Output [8]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [11]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] + +(88) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#86] + +(89) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#86] +Join type: Inner +Join condition: None + +(90) Project [codegen id : 29] +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#86] + +(91) Scan parquet spark_catalog.default.web_site +Output [2]: [web_site_sk#87, web_site_id#88] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(92) ColumnarToRow [codegen id : 26] +Input [2]: [web_site_sk#87, web_site_id#88] + +(93) Filter [codegen id : 26] +Input [2]: [web_site_sk#87, web_site_id#88] +Condition : isnotnull(web_site_sk#87) + +(94) BroadcastExchange +Input [2]: [web_site_sk#87, web_site_id#88] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] + +(95) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_web_site_sk#75] +Right keys [1]: [web_site_sk#87] +Join type: Inner +Join condition: None + +(96) Project [codegen id : 29] +Output [7]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_sk#87, web_site_id#88] + +(97) ReusedExchange [Reuses operator id: 31] +Output [1]: [i_item_sk#89] + +(98) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#89] +Join type: Inner +Join condition: None + +(99) Project [codegen id : 29] +Output [6]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [8]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, i_item_sk#89] + +(100) ReusedExchange [Reuses operator id: 38] +Output [1]: [p_promo_sk#90] + +(101) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ws_promo_sk#76] +Right keys [1]: [p_promo_sk#90] +Join type: Inner +Join condition: None + +(102) Project [codegen id : 29] +Output [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [7]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, p_promo_sk#90] + +(103) HashAggregate [codegen id : 29] +Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Keys [1]: [web_site_id#88] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))] +Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95] +Results [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] + +(104) Exchange +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Arguments: hashpartitioning(web_site_id#88, 5), ENSURE_REQUIREMENTS, [plan_id=15] + +(105) HashAggregate [codegen id : 30] +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Keys [1]: [web_site_id#88] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103] +Results [5]: [web channel AS channel#104, concat(web_site, web_site_id#88) AS id#105, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#106, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#107, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103 AS profit#108] + +(106) Union + +(107) HashAggregate [codegen id : 31] +Input [5]: [channel#34, id#35, sales#36, returns#37, profit#38] +Keys [2]: [channel#34, id#35] +Functions [3]: [partial_sum(sales#36), partial_sum(returns#37), partial_sum(profit#38)] +Aggregate Attributes [6]: [sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114] +Results [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] + +(108) Exchange +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [plan_id=16] + +(109) HashAggregate [codegen id : 32] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [5]: [channel#34, id#35, cast(sum(sales#36)#121 as decimal(37,2)) AS sales#124, cast(sum(returns#37)#122 as decimal(38,2)) AS returns#125, cast(sum(profit#38)#123 as decimal(38,2)) AS profit#126] + +(110) ReusedExchange [Reuses operator id: 108] +Output [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] + +(111) HashAggregate [codegen id : 64] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [4]: [channel#34, sum(sales#36)#121 AS sales#127, sum(returns#37)#122 AS returns#128, sum(profit#38)#123 AS profit#129] + +(112) HashAggregate [codegen id : 64] +Input [4]: [channel#34, sales#127, returns#128, profit#129] +Keys [1]: [channel#34] +Functions [3]: [partial_sum(sales#127), partial_sum(returns#128), partial_sum(profit#129)] +Aggregate Attributes [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] +Results [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] + +(113) Exchange +Input [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [plan_id=17] + +(114) HashAggregate [codegen id : 65] +Input [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Keys [1]: [channel#34] +Functions [3]: [sum(sales#127), sum(returns#128), sum(profit#129)] +Aggregate Attributes [3]: [sum(sales#127)#142, sum(returns#128)#143, sum(profit#129)#144] +Results [5]: [channel#34, null AS id#145, sum(sales#127)#142 AS sales#146, sum(returns#128)#143 AS returns#147, sum(profit#129)#144 AS profit#148] + +(115) ReusedExchange [Reuses operator id: 108] +Output [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] + +(116) HashAggregate [codegen id : 97] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [3]: [sum(sales#36)#121 AS sales#127, sum(returns#37)#122 AS returns#128, sum(profit#38)#123 AS profit#129] + +(117) HashAggregate [codegen id : 97] +Input [3]: [sales#127, returns#128, profit#129] +Keys: [] +Functions [3]: [partial_sum(sales#127), partial_sum(returns#128), partial_sum(profit#129)] +Aggregate Attributes [6]: [sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154] +Results [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] + +(118) Exchange +Input [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] + +(119) HashAggregate [codegen id : 98] +Input [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] +Keys: [] +Functions [3]: [sum(sales#127), sum(returns#128), sum(profit#129)] +Aggregate Attributes [3]: [sum(sales#127)#161, sum(returns#128)#162, sum(profit#129)#163] +Results [5]: [null AS channel#164, null AS id#165, sum(sales#127)#161 AS sales#166, sum(returns#128)#162 AS returns#167, sum(profit#129)#163 AS profit#168] + +(120) Union + +(121) HashAggregate [codegen id : 99] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Keys [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#124, returns#125, profit#126] + +(122) Exchange +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Arguments: hashpartitioning(channel#34, id#35, sales#124, returns#125, profit#126, 5), ENSURE_REQUIREMENTS, [plan_id=19] + +(123) HashAggregate [codegen id : 100] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Keys [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#34, id#35, sales#124, returns#125, profit#126] + +(124) TakeOrderedAndProject +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#124, returns#125, profit#126] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..011fa1eb28 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.native_iceberg_compat/simplified.txt @@ -0,0 +1,203 @@ +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (100) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Exchange [channel,id,sales,returns,profit] #1 + WholeStageCodegen (99) + HashAggregate [channel,id,sales,returns,profit] + InputAdapter + Union + WholeStageCodegen (32) + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel,id] #2 + WholeStageCodegen (31) + HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (10) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [s_store_id] #3 + WholeStageCodegen (9) + HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss] + SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + InputAdapter + WholeStageCodegen (2) + Sort [ss_item_sk,ss_ticket_number] + InputAdapter + Exchange [ss_item_sk,ss_ticket_number] #4 + WholeStageCodegen (1) + Filter [ss_store_sk,ss_item_sk,ss_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + WholeStageCodegen (4) + Sort [sr_item_sk,sr_ticket_number] + InputAdapter + Exchange [sr_item_sk,sr_ticket_number] #5 + WholeStageCodegen (3) + Project [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (7) + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_current_price] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen (8) + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv] + WholeStageCodegen (20) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cp_catalog_page_id] #10 + WholeStageCodegen (19) + HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] + Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss] + SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + InputAdapter + WholeStageCodegen (12) + Sort [cs_item_sk,cs_order_number] + InputAdapter + Exchange [cs_item_sk,cs_order_number] #11 + WholeStageCodegen (11) + Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + InputAdapter + WholeStageCodegen (14) + Sort [cr_item_sk,cr_order_number] + InputAdapter + Exchange [cr_item_sk,cr_order_number] #12 + WholeStageCodegen (13) + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (16) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + InputAdapter + ReusedExchange [i_item_sk] #8 + InputAdapter + ReusedExchange [p_promo_sk] #9 + WholeStageCodegen (30) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [web_site_id] #14 + WholeStageCodegen (29) + HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] + Project [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss] + SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + InputAdapter + WholeStageCodegen (22) + Sort [ws_item_sk,ws_order_number] + InputAdapter + Exchange [ws_item_sk,ws_order_number] #15 + WholeStageCodegen (21) + Filter [ws_web_site_sk,ws_item_sk,ws_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk] + InputAdapter + WholeStageCodegen (24) + Sort [wr_item_sk,wr_order_number] + InputAdapter + Exchange [wr_item_sk,wr_order_number] #16 + WholeStageCodegen (23) + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk] + InputAdapter + ReusedExchange [d_date_sk] #6 + InputAdapter + BroadcastExchange #17 + WholeStageCodegen (26) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id] + InputAdapter + ReusedExchange [i_item_sk] #8 + InputAdapter + ReusedExchange [p_promo_sk] #9 + WholeStageCodegen (65) + HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [channel] #18 + WholeStageCodegen (64) + HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 + WholeStageCodegen (98) + HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange #19 + WholeStageCodegen (97) + HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] + InputAdapter + ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_datafusion/explain.txt new file mode 100644 index 0000000000..348af132d6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_datafusion/explain.txt @@ -0,0 +1,230 @@ +== Physical Plan == +TakeOrderedAndProject (38) ++- * Project (37) + +- Window (36) + +- * Sort (35) + +- Exchange (34) + +- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- Union (30) + :- * HashAggregate (19) + : +- Exchange (18) + : +- * HashAggregate (17) + : +- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.item (11) + :- * HashAggregate (24) + : +- Exchange (23) + : +- * HashAggregate (22) + : +- * HashAggregate (21) + : +- ReusedExchange (20) + +- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * HashAggregate (26) + +- ReusedExchange (25) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ws_item_sk#1, ws_net_paid#2] +Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#6, i_class#7, i_category#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#6, i_class#7, i_category#8] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Condition : isnotnull(i_item_sk#6) + +(14) BroadcastExchange +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ws_net_paid#2, i_class#7, i_category#8] +Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ws_net_paid#2, i_class#7, i_category#8] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [i_category#8, i_class#7, sum#10] + +(18) Exchange +Input [3]: [i_category#8, i_class#7, sum#10] +Arguments: hashpartitioning(i_category#8, i_class#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [3]: [i_category#8, i_class#7, sum#10] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) as decimal(27,2)) AS total_sum#12, i_category#8, i_class#7, 0 AS g_category#13, 0 AS g_class#14, 0 AS lochierarchy#15] + +(20) ReusedExchange [Reuses operator id: 18] +Output [3]: [i_category#8, i_class#7, sum#16] + +(21) HashAggregate [codegen id : 8] +Input [3]: [i_category#8, i_class#7, sum#16] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [2]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) AS total_sum#17, i_category#8] + +(22) HashAggregate [codegen id : 8] +Input [2]: [total_sum#17, i_category#8] +Keys [1]: [i_category#8] +Functions [1]: [partial_sum(total_sum#17)] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [3]: [i_category#8, sum#20, isEmpty#21] + +(23) Exchange +Input [3]: [i_category#8, sum#20, isEmpty#21] +Arguments: hashpartitioning(i_category#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) HashAggregate [codegen id : 9] +Input [3]: [i_category#8, sum#20, isEmpty#21] +Keys [1]: [i_category#8] +Functions [1]: [sum(total_sum#17)] +Aggregate Attributes [1]: [sum(total_sum#17)#22] +Results [6]: [sum(total_sum#17)#22 AS total_sum#23, i_category#8, null AS i_class#24, 0 AS g_category#25, 1 AS g_class#26, 1 AS lochierarchy#27] + +(25) ReusedExchange [Reuses operator id: 18] +Output [3]: [i_category#8, i_class#7, sum#28] + +(26) HashAggregate [codegen id : 13] +Input [3]: [i_category#8, i_class#7, sum#28] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) AS total_sum#17] + +(27) HashAggregate [codegen id : 13] +Input [1]: [total_sum#17] +Keys: [] +Functions [1]: [partial_sum(total_sum#17)] +Aggregate Attributes [2]: [sum#29, isEmpty#30] +Results [2]: [sum#31, isEmpty#32] + +(28) Exchange +Input [2]: [sum#31, isEmpty#32] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(29) HashAggregate [codegen id : 14] +Input [2]: [sum#31, isEmpty#32] +Keys: [] +Functions [1]: [sum(total_sum#17)] +Aggregate Attributes [1]: [sum(total_sum#17)#33] +Results [6]: [sum(total_sum#17)#33 AS total_sum#34, null AS i_category#35, null AS i_class#36, 1 AS g_category#37, 1 AS g_class#38, 2 AS lochierarchy#39] + +(30) Union + +(31) HashAggregate [codegen id : 15] +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Keys [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] + +(32) Exchange +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Arguments: hashpartitioning(total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(33) HashAggregate [codegen id : 16] +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Keys [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Functions: [] +Aggregate Attributes: [] +Results [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, CASE WHEN (g_class#14 = 0) THEN i_category#8 END AS _w0#40] + +(34) Exchange +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: hashpartitioning(lochierarchy#15, _w0#40, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(35) Sort [codegen id : 17] +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: [lochierarchy#15 ASC NULLS FIRST, _w0#40 ASC NULLS FIRST, total_sum#12 DESC NULLS LAST], false, 0 + +(36) Window +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: [rank(total_sum#12) windowspecdefinition(lochierarchy#15, _w0#40, total_sum#12 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#41], [lochierarchy#15, _w0#40], [total_sum#12 DESC NULLS LAST] + +(37) Project [codegen id : 18] +Output [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] +Input [6]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40, rank_within_parent#41] + +(38) TakeOrderedAndProject +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] +Arguments: 100, [lochierarchy#15 DESC NULLS LAST, CASE WHEN (lochierarchy#15 = 0) THEN i_category#8 END ASC NULLS FIRST, rank_within_parent#41 ASC NULLS FIRST], [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_datafusion/simplified.txt new file mode 100644 index 0000000000..db47334fd5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_datafusion/simplified.txt @@ -0,0 +1,64 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] + WholeStageCodegen (18) + Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [total_sum,lochierarchy,_w0] + WholeStageCodegen (17) + Sort [lochierarchy,_w0,total_sum] + InputAdapter + Exchange [lochierarchy,_w0] #1 + WholeStageCodegen (16) + HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] [_w0] + InputAdapter + Exchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #2 + WholeStageCodegen (15) + HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] + InputAdapter + Union + WholeStageCodegen (4) + HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,g_category,g_class,lochierarchy,sum] + InputAdapter + Exchange [i_category,i_class] #3 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,ws_net_paid] [sum,sum] + Project [ws_net_paid,i_class,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + WholeStageCodegen (9) + HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty] + InputAdapter + Exchange [i_category] #6 + WholeStageCodegen (8) + HashAggregate [i_category,total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum] #3 + WholeStageCodegen (14) + HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,isEmpty] + InputAdapter + Exchange #7 + WholeStageCodegen (13) + HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..348af132d6 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_iceberg_compat/explain.txt @@ -0,0 +1,230 @@ +== Physical Plan == +TakeOrderedAndProject (38) ++- * Project (37) + +- Window (36) + +- * Sort (35) + +- Exchange (34) + +- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- Union (30) + :- * HashAggregate (19) + : +- Exchange (18) + : +- * HashAggregate (17) + : +- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet spark_catalog.default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet spark_catalog.default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet spark_catalog.default.item (11) + :- * HashAggregate (24) + : +- Exchange (23) + : +- * HashAggregate (22) + : +- * HashAggregate (21) + : +- ReusedExchange (20) + +- * HashAggregate (29) + +- Exchange (28) + +- * HashAggregate (27) + +- * HashAggregate (26) + +- ReusedExchange (25) + + +(1) Scan parquet spark_catalog.default.web_sales +Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#3)] +PushedFilters: [IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3] +Condition : isnotnull(ws_item_sk#1) + +(4) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1212)) AND (d_month_seq#5 <= 1223)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#3] +Right keys [1]: [d_date_sk#4] +Join type: Inner +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ws_item_sk#1, ws_net_paid#2] +Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#4] + +(11) Scan parquet spark_catalog.default.item +Output [3]: [i_item_sk#6, i_class#7, i_category#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#6, i_class#7, i_category#8] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Condition : isnotnull(i_item_sk#6) + +(14) BroadcastExchange +Input [3]: [i_item_sk#6, i_class#7, i_category#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#1] +Right keys [1]: [i_item_sk#6] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ws_net_paid#2, i_class#7, i_category#8] +Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ws_net_paid#2, i_class#7, i_category#8] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum#9] +Results [3]: [i_category#8, i_class#7, sum#10] + +(18) Exchange +Input [3]: [i_category#8, i_class#7, sum#10] +Arguments: hashpartitioning(i_category#8, i_class#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [3]: [i_category#8, i_class#7, sum#10] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) as decimal(27,2)) AS total_sum#12, i_category#8, i_class#7, 0 AS g_category#13, 0 AS g_class#14, 0 AS lochierarchy#15] + +(20) ReusedExchange [Reuses operator id: 18] +Output [3]: [i_category#8, i_class#7, sum#16] + +(21) HashAggregate [codegen id : 8] +Input [3]: [i_category#8, i_class#7, sum#16] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [2]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) AS total_sum#17, i_category#8] + +(22) HashAggregate [codegen id : 8] +Input [2]: [total_sum#17, i_category#8] +Keys [1]: [i_category#8] +Functions [1]: [partial_sum(total_sum#17)] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [3]: [i_category#8, sum#20, isEmpty#21] + +(23) Exchange +Input [3]: [i_category#8, sum#20, isEmpty#21] +Arguments: hashpartitioning(i_category#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(24) HashAggregate [codegen id : 9] +Input [3]: [i_category#8, sum#20, isEmpty#21] +Keys [1]: [i_category#8] +Functions [1]: [sum(total_sum#17)] +Aggregate Attributes [1]: [sum(total_sum#17)#22] +Results [6]: [sum(total_sum#17)#22 AS total_sum#23, i_category#8, null AS i_class#24, 0 AS g_category#25, 1 AS g_class#26, 1 AS lochierarchy#27] + +(25) ReusedExchange [Reuses operator id: 18] +Output [3]: [i_category#8, i_class#7, sum#28] + +(26) HashAggregate [codegen id : 13] +Input [3]: [i_category#8, i_class#7, sum#28] +Keys [2]: [i_category#8, i_class#7] +Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) AS total_sum#17] + +(27) HashAggregate [codegen id : 13] +Input [1]: [total_sum#17] +Keys: [] +Functions [1]: [partial_sum(total_sum#17)] +Aggregate Attributes [2]: [sum#29, isEmpty#30] +Results [2]: [sum#31, isEmpty#32] + +(28) Exchange +Input [2]: [sum#31, isEmpty#32] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] + +(29) HashAggregate [codegen id : 14] +Input [2]: [sum#31, isEmpty#32] +Keys: [] +Functions [1]: [sum(total_sum#17)] +Aggregate Attributes [1]: [sum(total_sum#17)#33] +Results [6]: [sum(total_sum#17)#33 AS total_sum#34, null AS i_category#35, null AS i_class#36, 1 AS g_category#37, 1 AS g_class#38, 2 AS lochierarchy#39] + +(30) Union + +(31) HashAggregate [codegen id : 15] +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Keys [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Functions: [] +Aggregate Attributes: [] +Results [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] + +(32) Exchange +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Arguments: hashpartitioning(total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15, 5), ENSURE_REQUIREMENTS, [plan_id=6] + +(33) HashAggregate [codegen id : 16] +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Keys [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Functions: [] +Aggregate Attributes: [] +Results [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, CASE WHEN (g_class#14 = 0) THEN i_category#8 END AS _w0#40] + +(34) Exchange +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: hashpartitioning(lochierarchy#15, _w0#40, 5), ENSURE_REQUIREMENTS, [plan_id=7] + +(35) Sort [codegen id : 17] +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: [lochierarchy#15 ASC NULLS FIRST, _w0#40 ASC NULLS FIRST, total_sum#12 DESC NULLS LAST], false, 0 + +(36) Window +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: [rank(total_sum#12) windowspecdefinition(lochierarchy#15, _w0#40, total_sum#12 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#41], [lochierarchy#15, _w0#40], [total_sum#12 DESC NULLS LAST] + +(37) Project [codegen id : 18] +Output [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] +Input [6]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40, rank_within_parent#41] + +(38) TakeOrderedAndProject +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] +Arguments: 100, [lochierarchy#15 DESC NULLS LAST, CASE WHEN (lochierarchy#15 = 0) THEN i_category#8 END ASC NULLS FIRST, rank_within_parent#41 ASC NULLS FIRST], [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..db47334fd5 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.native_iceberg_compat/simplified.txt @@ -0,0 +1,64 @@ +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] + WholeStageCodegen (18) + Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] + InputAdapter + Window [total_sum,lochierarchy,_w0] + WholeStageCodegen (17) + Sort [lochierarchy,_w0,total_sum] + InputAdapter + Exchange [lochierarchy,_w0] #1 + WholeStageCodegen (16) + HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] [_w0] + InputAdapter + Exchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #2 + WholeStageCodegen (15) + HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] + InputAdapter + Union + WholeStageCodegen (4) + HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,g_category,g_class,lochierarchy,sum] + InputAdapter + Exchange [i_category,i_class] #3 + WholeStageCodegen (3) + HashAggregate [i_category,i_class,ws_net_paid] [sum,sum] + Project [ws_net_paid,i_class,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category] + WholeStageCodegen (9) + HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty] + InputAdapter + Exchange [i_category] #6 + WholeStageCodegen (8) + HashAggregate [i_category,total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum] #3 + WholeStageCodegen (14) + HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,isEmpty] + InputAdapter + Exchange #7 + WholeStageCodegen (13) + HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty] + HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum] + InputAdapter + ReusedExchange [i_category,i_class,sum] #3 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_datafusion/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_datafusion/explain.txt new file mode 100644 index 0000000000..d50ed8e269 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_datafusion/explain.txt @@ -0,0 +1,145 @@ +== Physical Plan == +* Sort (25) ++- Exchange (24) + +- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.store_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16] + +(20) Exchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, _we0#17] + +(24) Exchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(25) Sort [codegen id : 7] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] +Arguments: [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_datafusion/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_datafusion/simplified.txt new file mode 100644 index 0000000000..729dc08617 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_datafusion/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (7) + Sort [i_category,i_class,i_item_id,i_item_desc,revenueratio] + InputAdapter + Exchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 + WholeStageCodegen (6) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_iceberg_compat/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_iceberg_compat/explain.txt new file mode 100644 index 0000000000..d50ed8e269 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_iceberg_compat/explain.txt @@ -0,0 +1,145 @@ +== Physical Plan == +* Sort (25) ++- Exchange (24) + +- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet spark_catalog.default.store_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet spark_catalog.default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet spark_catalog.default.date_dim (10) + + +(1) Scan parquet spark_catalog.default.store_sales +Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ss_sold_date_sk#3)] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] +Condition : isnotnull(ss_item_sk#1) + +(4) Scan parquet spark_catalog.default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#4] +Join type: Inner +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet spark_catalog.default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 1999-02-22)) AND (d_date#11 <= 1999-03-24)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#10] +Join type: Inner +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#10] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_ext_sales_price#2, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#13] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16] + +(20) Exchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: hashpartitioning(i_class#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] + +(21) Sort [codegen id : 5] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16] +Arguments: [sum(_w0#16) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18] +Input [8]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, _w0#16, _we0#17] + +(24) Exchange +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] + +(25) Sort [codegen id : 7] +Input [7]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#15, revenueratio#18] +Arguments: [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], true, 0 + diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_iceberg_compat/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_iceberg_compat/simplified.txt new file mode 100644 index 0000000000..729dc08617 --- /dev/null +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.native_iceberg_compat/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen (7) + Sort [i_category,i_class,i_item_id,i_item_desc,revenueratio] + InputAdapter + Exchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 + WholeStageCodegen (6) + Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0] + InputAdapter + Window [_w0,i_class] + WholeStageCodegen (5) + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_ext_sales_price,ss_sold_date_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date] diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 058eb7a13d..b4724b5416 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -2218,7 +2218,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - ignore("get_struct_field - select primitive fields") { + test("get_struct_field - select primitive fields") { withTempPath { dir => // create input file with Comet disabled withSQLConf(CometConf.COMET_ENABLED.key -> "false") { @@ -2230,9 +2230,103 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { df.write.parquet(dir.toString()) } + val df = spark.read.parquet(dir.toString()).select("nested1.id") + // Comet's original scan does not support structs. + // The plan will have a Comet Scan only if scan impl is native_full or native_recordbatch + if (!CometConf.COMET_NATIVE_SCAN_IMPL.get().equals(CometConf.SCAN_NATIVE_COMET)) { + checkSparkAnswerAndOperator(df) + } else { + checkSparkAnswer(df) + } + } + } + + test("get_struct_field - select subset of struct") { + withTempPath { dir => + // create input file with Comet disabled + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + val df = spark + .range(5) + // Add both a null struct and null inner value + .select( + when( + col("id") > 1, + struct( + when(col("id") > 2, col("id")).alias("id"), + when(col("id") > 2, struct(when(col("id") > 3, col("id")).alias("id"))) + .as("nested2"))) + .alias("nested1")) + + df.write.parquet(dir.toString()) + } + + val df = spark.read.parquet(dir.toString()) + // Comet's original scan does not support structs. + // The plan will have a Comet Scan only if scan impl is native_full or native_recordbatch + if (!CometConf.COMET_NATIVE_SCAN_IMPL.get().equals(CometConf.SCAN_NATIVE_COMET)) { + checkSparkAnswerAndOperator(df.select("nested1.id")) + checkSparkAnswerAndOperator(df.select("nested1.nested2")) + checkSparkAnswerAndOperator(df.select("nested1.nested2.id")) + checkSparkAnswerAndOperator(df.select("nested1.id", "nested1.nested2.id")) + } else { + checkSparkAnswer(df.select("nested1.id")) + checkSparkAnswer(df.select("nested1.nested2")) + checkSparkAnswer(df.select("nested1.nested2.id")) + checkSparkAnswer(df.select("nested1.id", "nested1.nested2.id")) + } + } + } + + test("get_struct_field - read entire struct") { + withTempPath { dir => + // create input file with Comet disabled + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + val df = spark + .range(5) + // Add both a null struct and null inner value + .select( + when( + col("id") > 1, + struct( + when(col("id") > 2, col("id")).alias("id"), + when(col("id") > 2, struct(when(col("id") > 3, col("id")).alias("id"))) + .as("nested2"))) + .alias("nested1")) + + df.write.parquet(dir.toString()) + } + + val df = spark.read.parquet(dir.toString()).select("nested1.id") + // Comet's original scan does not support structs. + // The plan will have a Comet Scan only if scan impl is native_full or native_recordbatch + if (!CometConf.COMET_NATIVE_SCAN_IMPL.get().equals(CometConf.SCAN_NATIVE_COMET)) { + checkSparkAnswerAndOperator(df) + } else { + checkSparkAnswer(df) + } + } + } + + test("get_struct_field with DataFusion ParquetExec - simple case") { + withTempPath { dir => + // create input file with Comet disabled + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + val df = spark + .range(5) + // Add both a null struct and null inner value + .select(when(col("id") > 1, struct(when(col("id") > 2, col("id")).alias("id"))) + .alias("nested1")) + + df.write.parquet(dir.toString()) + } + + Seq("parquet").foreach { v1List => + withSQLConf( + SQLConf.USE_V1_SOURCE_LIST.key -> v1List, + CometConf.COMET_ENABLED.key -> "true", + CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_DATAFUSION, + CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key -> "true") { - Seq("", "parquet").foreach { v1List => - withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) { val df = spark.read.parquet(dir.toString()) checkSparkAnswerAndOperator(df.select("nested1.id")) } @@ -2240,7 +2334,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - ignore("get_struct_field - select subset of struct") { + test("get_struct_field with DataFusion ParquetExec - select subset of struct") { withTempPath { dir => // create input file with Comet disabled withSQLConf(CometConf.COMET_ENABLED.key -> "false") { @@ -2259,19 +2353,28 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { df.write.parquet(dir.toString()) } - Seq("", "parquet").foreach { v1List => - withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) { + Seq("parquet").foreach { v1List => + withSQLConf( + SQLConf.USE_V1_SOURCE_LIST.key -> v1List, + CometConf.COMET_ENABLED.key -> "true", + CometConf.COMET_NATIVE_SCAN_IMPL.key -> CometConf.SCAN_NATIVE_DATAFUSION, + CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key -> "true") { + val df = spark.read.parquet(dir.toString()) + checkSparkAnswerAndOperator(df.select("nested1.id")) - checkSparkAnswerAndOperator(df.select("nested1.nested2")) - checkSparkAnswerAndOperator(df.select("nested1.nested2.id")) + checkSparkAnswerAndOperator(df.select("nested1.id", "nested1.nested2.id")) + + // unsupported cast from Int64 to Struct([Field { name: "id", data_type: Int64, ... + // checkSparkAnswerAndOperator(df.select("nested1.nested2.id")) } } } } - ignore("get_struct_field - read entire struct") { + // TODO this is not using DataFusion's ParquetExec for some reason + ignore("get_struct_field with DataFusion ParquetExec - read entire struct") { withTempPath { dir => // create input file with Comet disabled withSQLConf(CometConf.COMET_ENABLED.key -> "false") { @@ -2290,8 +2393,12 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { df.write.parquet(dir.toString()) } - Seq("", "parquet").foreach { v1List => - withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) { + Seq("parquet").foreach { v1List => + withSQLConf( + SQLConf.USE_V1_SOURCE_LIST.key -> v1List, + CometConf.COMET_ENABLED.key -> "true", + CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key -> "true") { + val df = spark.read.parquet(dir.toString()) checkSparkAnswerAndOperator(df.select("nested1")) } @@ -2301,12 +2408,12 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { ignore("read map[int, int] from parquet") { withTempPath { dir => - // create input file with Comet disabled +// create input file with Comet disabled withSQLConf(CometConf.COMET_ENABLED.key -> "false") { val df = spark .range(5) - // Spark does not allow null as a key but does allow null as a - // value, and the entire map be null +// Spark does not allow null as a key but does allow null as a +// value, and the entire map be null .select( when(col("id") > 1, map(col("id"), when(col("id") > 2, col("id")))).alias("map1")) df.write.parquet(dir.toString()) @@ -2325,12 +2432,12 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { ignore("read array[int] from parquet") { withTempPath { dir => - // create input file with Comet disabled +// create input file with Comet disabled withSQLConf(CometConf.COMET_ENABLED.key -> "false") { val df = spark .range(5) - // Spark does not allow null as a key but does allow null as a - // value, and the entire map be null +// Spark does not allow null as a key but does allow null as a +// value, and the entire map be null .select(when(col("id") > 1, sequence(lit(0), col("id") * 2)).alias("array1")) df.write.parquet(dir.toString()) } diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala index b6636d9c12..2d823f804d 100644 --- a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala +++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStatistics, CatalogTable} import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo, Hex} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateMode, BloomFilterAggregate} -import org.apache.spark.sql.comet.{CometBroadcastExchangeExec, CometBroadcastHashJoinExec, CometCollectLimitExec, CometFilterExec, CometHashAggregateExec, CometHashJoinExec, CometProjectExec, CometScanExec, CometSortExec, CometSortMergeJoinExec, CometSparkToColumnarExec, CometTakeOrderedAndProjectExec} +import org.apache.spark.sql.comet.{CometBroadcastExchangeExec, CometBroadcastHashJoinExec, CometCollectLimitExec, CometFilterExec, CometHashAggregateExec, CometHashJoinExec, CometNativeScanExec, CometProjectExec, CometScanExec, CometSortExec, CometSortMergeJoinExec, CometSparkToColumnarExec, CometTakeOrderedAndProjectExec} import org.apache.spark.sql.comet.execution.shuffle.{CometColumnarShuffle, CometShuffleExchangeExec} import org.apache.spark.sql.execution.{CollectLimitExec, ProjectExec, SQLExecution, UnionExec} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec @@ -531,7 +531,8 @@ class CometExecSuite extends CometTestBase { val df = sql("SELECT * FROM tbl WHERE _2 > _3") df.collect() - val metrics = find(df.queryExecution.executedPlan)(_.isInstanceOf[CometScanExec]) + val metrics = find(df.queryExecution.executedPlan)(s => + s.isInstanceOf[CometScanExec] || s.isInstanceOf[CometNativeScanExec]) .map(_.metrics) .get @@ -1456,7 +1457,10 @@ class CometExecSuite extends CometTestBase { val projected = df.selectExpr("_1 as x") val unioned = projected.union(df) val p = unioned.queryExecution.executedPlan.find(_.isInstanceOf[UnionExec]) - assert(p.get.collectLeaves().forall(_.isInstanceOf[CometScanExec])) + assert( + p.get + .collectLeaves() + .forall(o => o.isInstanceOf[CometScanExec] || o.isInstanceOf[CometNativeScanExec])) } } } diff --git a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala index d2e71b0b66..56b3ba9d2c 100644 --- a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala +++ b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala @@ -37,8 +37,7 @@ import org.apache.spark.SparkException import org.apache.spark.sql.{CometTestBase, DataFrame, Row} import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.DateTimeUtils -import org.apache.spark.sql.comet.CometBatchScanExec -import org.apache.spark.sql.comet.CometScanExec +import org.apache.spark.sql.comet.{CometBatchScanExec, CometNativeScanExec, CometScanExec} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -1367,10 +1366,15 @@ abstract class ParquetReadSuite extends CometTestBase { } } - def testScanner(cometEnabled: String, scanner: String, v1: Option[String] = None): Unit = { + def testScanner( + cometEnabled: String, + cometNativeScanImpl: String, + scanner: String, + v1: Option[String] = None): Unit = { withSQLConf( CometConf.COMET_ENABLED.key -> cometEnabled, CometConf.COMET_EXEC_ENABLED.key -> cometEnabled, + CometConf.COMET_NATIVE_SCAN_IMPL.key -> cometNativeScanImpl, SQLConf.USE_V1_SOURCE_LIST.key -> v1.getOrElse("")) { withParquetTable(Seq((Long.MaxValue, 1), (Long.MaxValue, 2)), "tbl") { val df = spark.sql("select * from tbl") @@ -1414,8 +1418,11 @@ class ParquetReadV1Suite extends ParquetReadSuite with AdaptiveSparkPlanHelper { data: Seq[T], f: Row => Boolean = _ => true): Unit = { withParquetDataFrame(data) { r => - val scans = collect(r.filter(f).queryExecution.executedPlan) { case p: CometScanExec => - p + val scans = collect(r.filter(f).queryExecution.executedPlan) { + case p: CometScanExec => + p + case p: CometNativeScanExec => + p } if (CometConf.COMET_ENABLED.get()) { assert(scans.nonEmpty) @@ -1426,9 +1433,17 @@ class ParquetReadV1Suite extends ParquetReadSuite with AdaptiveSparkPlanHelper { } test("Test V1 parquet scan uses respective scanner") { - Seq(("false", "FileScan parquet"), ("true", "CometScan parquet")).foreach { - case (cometEnabled, expectedScanner) => - testScanner(cometEnabled, scanner = expectedScanner, v1 = Some("parquet")) + Seq( + ("false", CometConf.SCAN_NATIVE_COMET, "FileScan parquet"), + ("true", CometConf.SCAN_NATIVE_COMET, "CometScan parquet"), + ("true", CometConf.SCAN_NATIVE_DATAFUSION, "CometNativeScan"), + ("true", CometConf.SCAN_NATIVE_ICEBERG_COMPAT, "CometScan parquet")).foreach { + case (cometEnabled, cometNativeScanImpl, expectedScanner) => + testScanner( + cometEnabled, + cometNativeScanImpl, + scanner = expectedScanner, + v1 = Some("parquet")) } } } @@ -1459,7 +1474,11 @@ class ParquetReadV2Suite extends ParquetReadSuite with AdaptiveSparkPlanHelper { test("Test V2 parquet scan uses respective scanner") { Seq(("false", "BatchScan"), ("true", "CometBatchScan")).foreach { case (cometEnabled, expectedScanner) => - testScanner(cometEnabled, scanner = expectedScanner, v1 = None) + testScanner( + cometEnabled, + CometConf.SCAN_NATIVE_DATAFUSION, + scanner = expectedScanner, + v1 = None) } } } diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala index ee59e7897a..8d084fd75d 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala @@ -185,6 +185,7 @@ class CometTPCDSQuerySuite "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager") conf.set(CometConf.COMET_ENABLED.key, "true") conf.set(CometConf.COMET_EXEC_ENABLED.key, "true") + conf.set(CometConf.COMET_NATIVE_SCAN_ENABLED.key, "true") conf.set(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key, "true") conf.set(CometConf.COMET_MEMORY_OVERHEAD.key, "15g") conf.set("spark.sql.adaptive.coalescePartitions.enabled", "true") diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala index c81fba3efa..0dadb22179 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala @@ -89,6 +89,7 @@ class CometTPCHQuerySuite extends QueryTest with TPCBase with ShimCometTPCHQuery "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager") conf.set(CometConf.COMET_ENABLED.key, "true") conf.set(CometConf.COMET_EXEC_ENABLED.key, "true") + conf.set(CometConf.COMET_NATIVE_SCAN_ENABLED.key, "true") conf.set(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key, "true") conf.set(CometConf.COMET_SHUFFLE_MODE.key, "jvm") conf.set(MEMORY_OFFHEAP_ENABLED.key, "true") diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala index f69bb25c0d..a3906d6421 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala @@ -36,7 +36,7 @@ import org.apache.parquet.hadoop.example.ExampleParquetWriter import org.apache.parquet.schema.{MessageType, MessageTypeParser} import org.apache.spark._ import org.apache.spark.internal.config.{MEMORY_OFFHEAP_ENABLED, MEMORY_OFFHEAP_SIZE, SHUFFLE_MANAGER} -import org.apache.spark.sql.comet.{CometBatchScanExec, CometBroadcastExchangeExec, CometColumnarToRowExec, CometExec, CometScanExec, CometScanWrapper, CometSinkPlaceHolder, CometSparkToColumnarExec} +import org.apache.spark.sql.comet.{CometBatchScanExec, CometBroadcastExchangeExec, CometColumnarToRowExec, CometExec, CometNativeScanExec, CometScanExec, CometScanWrapper, CometSinkPlaceHolder, CometSparkToColumnarExec} import org.apache.spark.sql.comet.execution.shuffle.{CometColumnarShuffle, CometNativeShuffle, CometShuffleExchangeExec} import org.apache.spark.sql.execution.{ColumnarToRowExec, ExtendedMode, InputAdapter, SparkPlan, WholeStageCodegenExec} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper @@ -80,6 +80,7 @@ abstract class CometTestBase conf.set(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key, "true") conf.set(CometConf.COMET_SHUFFLE_FALLBACK_TO_COLUMNAR.key, "true") conf.set(CometConf.COMET_SPARK_TO_ARROW_ENABLED.key, "true") + conf.set(CometConf.COMET_NATIVE_SCAN_ENABLED.key, "true") conf.set(CometConf.COMET_MEMORY_OVERHEAD.key, "2g") conf.set(CometConf.COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED.key, "true") conf @@ -173,7 +174,7 @@ abstract class CometTestBase protected def checkCometOperators(plan: SparkPlan, excludedClasses: Class[_]*): Unit = { val wrapped = wrapCometSparkToColumnar(plan) wrapped.foreach { - case _: CometScanExec | _: CometBatchScanExec => + case _: CometNativeScanExec | _: CometScanExec | _: CometBatchScanExec => case _: CometSinkPlaceHolder | _: CometScanWrapper => case _: CometColumnarToRowExec => case _: CometSparkToColumnarExec => diff --git a/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala b/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala index 702c18b00b..871132bd7c 100644 --- a/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/comet/CometPlanStabilitySuite.scala @@ -77,11 +77,15 @@ trait CometPlanStabilitySuite extends DisableAdaptiveExecutionSuite with TPCDSBa private val approvedAnsiPlans: Seq[String] = Seq("q83", "q83.sf100") private def getDirForTest(name: String): File = { - val goldenFileName = if (SQLConf.get.ansiEnabled && approvedAnsiPlans.contains(name)) { + var goldenFileName = if (SQLConf.get.ansiEnabled && approvedAnsiPlans.contains(name)) { name + ".ansi" } else { name } + val nativeImpl = CometConf.COMET_NATIVE_SCAN_IMPL.get() + if (!nativeImpl.equals(CometConf.SCAN_NATIVE_COMET)) { + goldenFileName = s"$goldenFileName.$nativeImpl" + } new File(goldenFilePath, goldenFileName) } @@ -91,8 +95,8 @@ trait CometPlanStabilitySuite extends DisableAdaptiveExecutionSuite with TPCDSBa actualExplain: String): Boolean = { val simplifiedFile = new File(dir, "simplified.txt") val expectedSimplified = FileUtils.readFileToString(simplifiedFile, StandardCharsets.UTF_8) - lazy val explainFile = new File(dir, "explain.txt") - lazy val expectedExplain = FileUtils.readFileToString(explainFile, StandardCharsets.UTF_8) + val explainFile = new File(dir, "explain.txt") + val expectedExplain = FileUtils.readFileToString(explainFile, StandardCharsets.UTF_8) expectedSimplified == actualSimplifiedPlan && expectedExplain == actualExplain } @@ -257,11 +261,18 @@ trait CometPlanStabilitySuite extends DisableAdaptiveExecutionSuite with TPCDSBa val queryString = resourceToString( s"$tpcdsGroup/$query.sql", classLoader = Thread.currentThread().getContextClassLoader) + + // Comet does not yet support DPP yet with full native scan enabled + // https://github.com/apache/datafusion-comet/issues/121 + val dppEnabled = CometConf.COMET_NATIVE_SCAN_IMPL.get() == CometConf.SCAN_NATIVE_COMET + // Disable char/varchar read-side handling for better performance. withSQLConf( CometConf.COMET_ENABLED.key -> "true", + CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true", CometConf.COMET_DPP_FALLBACK_ENABLED.key -> "false", + SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> dppEnabled.toString, CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true", CometConf.COMET_SHUFFLE_FALLBACK_TO_COLUMNAR.key -> "true", CometConf.COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED.key -> "true", @@ -292,6 +303,7 @@ trait CometPlanStabilitySuite extends DisableAdaptiveExecutionSuite with TPCDSBa conf.set(MEMORY_OFFHEAP_SIZE.key, "2g") conf.set(CometConf.COMET_ENABLED.key, "true") conf.set(CometConf.COMET_EXEC_ENABLED.key, "true") + conf.set(CometConf.COMET_NATIVE_SCAN_ENABLED.key, "true") conf.set(CometConf.COMET_MEMORY_OVERHEAD.key, "1g") conf.set(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key, "true") @@ -314,9 +326,11 @@ class CometTPCDSV1_4_PlanStabilitySuite extends CometPlanStabilitySuite { override val goldenFilePath: String = new File(baseResourcePath, planName).getAbsolutePath - tpcdsQueries.foreach { q => - test(s"check simplified (tpcds-v1.4/$q)") { - testQuery("tpcds", q) + if (CometConf.COMET_NATIVE_SCAN_IMPL.get().equals(CometConf.SCAN_NATIVE_COMET)) { + tpcdsQueries.foreach { q => + test(s"check simplified (tpcds-v1.4/$q)") { + testQuery("tpcds", q) + } } } } @@ -332,9 +346,11 @@ class CometTPCDSV2_7_PlanStabilitySuite extends CometPlanStabilitySuite { override val goldenFilePath: String = new File(baseResourcePath, planName).getAbsolutePath - tpcdsQueriesV2_7_0.foreach { q => - test(s"check simplified (tpcds-v2.7.0/$q)") { - testQuery("tpcds-v2.7.0", q) + if (CometConf.COMET_NATIVE_SCAN_IMPL.get().equals(CometConf.SCAN_NATIVE_COMET)) { + tpcdsQueriesV2_7_0.foreach { q => + test(s"check simplified (tpcds-v2.7.0/$q)") { + testQuery("tpcds-v2.7.0", q) + } } } }